From 7f8786d9aaa2a1926c84aff1ca838ec226c914ab Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Mon, 20 Mar 2023 22:08:24 +0100
Subject: [PATCH 001/296] [PT FE] Make NodeContext constant inside conversion
 rules (#16165)

* Make NodeContext constant inside conversion rules

* Use shared_ptr

* Fix ptr

* Fix logical not
---
 .../openvino/frontend/pytorch/frontend.hpp    |  2 +-
 .../frontend/pytorch/node_context.hpp         | 34 ++++++-------
 src/frontends/pytorch/src/node_context.cpp    | 10 ++--
 .../pytorch/src/op/adaptive_avg_pool3d.cpp    |  2 +-
 .../pytorch/src/op/adaptive_max_pool2d.cpp    |  2 +-
 src/frontends/pytorch/src/op/add.cpp          |  2 +-
 src/frontends/pytorch/src/op/addcmul.cpp      |  2 +-
 src/frontends/pytorch/src/op/addmm.cpp        |  2 +-
 src/frontends/pytorch/src/op/arange.cpp       |  2 +-
 src/frontends/pytorch/src/op/as_tensor.cpp    |  2 +-
 src/frontends/pytorch/src/op/avg_poolnd.cpp   |  2 +-
 src/frontends/pytorch/src/op/batch_norm.cpp   |  2 +-
 src/frontends/pytorch/src/op/bitwise_not.cpp  |  2 +-
 src/frontends/pytorch/src/op/bool.cpp         |  2 +-
 src/frontends/pytorch/src/op/cat.cpp          |  2 +-
 src/frontends/pytorch/src/op/clamp.cpp        |  2 +-
 src/frontends/pytorch/src/op/constant.cpp     |  2 +-
 .../pytorch/src/op/conv_transposend.cpp       |  2 +-
 src/frontends/pytorch/src/op/convnd.cpp       |  2 +-
 src/frontends/pytorch/src/op/convolution.cpp  |  2 +-
 .../pytorch/src/op/convolution_mode.cpp       |  2 +-
 src/frontends/pytorch/src/op/cumsum.cpp       |  2 +-
 src/frontends/pytorch/src/op/dim.cpp          |  2 +-
 src/frontends/pytorch/src/op/div.cpp          |  2 +-
 src/frontends/pytorch/src/op/elu.cpp          |  2 +-
 src/frontends/pytorch/src/op/embedding.cpp    |  2 +-
 src/frontends/pytorch/src/op/expand.cpp       |  4 +-
 src/frontends/pytorch/src/op/eye.cpp          |  2 +-
 src/frontends/pytorch/src/op/flatten.cpp      |  2 +-
 src/frontends/pytorch/src/op/floor_divide.cpp |  2 +-
 src/frontends/pytorch/src/op/floordiv.cpp     |  2 +-
 src/frontends/pytorch/src/op/full.cpp         | 22 ++++-----
 src/frontends/pytorch/src/op/gelu.cpp         |  2 +-
 src/frontends/pytorch/src/op/get_attr.cpp     |  2 +-
 src/frontends/pytorch/src/op/getitem.cpp      |  2 +-
 src/frontends/pytorch/src/op/glu.cpp          |  2 +-
 src/frontends/pytorch/src/op/grid_sampler.cpp |  2 +-
 src/frontends/pytorch/src/op/group_norm.cpp   |  2 +-
 src/frontends/pytorch/src/op/hardtanh.cpp     |  2 +-
 src/frontends/pytorch/src/op/if.cpp           |  2 +-
 src/frontends/pytorch/src/op/im2col.cpp       |  2 +-
 src/frontends/pytorch/src/op/index_put_.cpp   |  4 +-
 .../pytorch/src/op/instance_norm.cpp          |  2 +-
 src/frontends/pytorch/src/op/int.cpp          |  2 +-
 src/frontends/pytorch/src/op/layer_norm.cpp   |  2 +-
 src/frontends/pytorch/src/op/len.cpp          |  2 +-
 src/frontends/pytorch/src/op/linear.cpp       |  2 +-
 .../pytorch/src/op/list_construct.cpp         |  2 +-
 src/frontends/pytorch/src/op/log.cpp          |  4 +-
 src/frontends/pytorch/src/op/loop.cpp         |  2 +-
 src/frontends/pytorch/src/op/masked_fill.cpp  |  2 +-
 src/frontends/pytorch/src/op/max_poolnd.cpp   |  2 +-
 src/frontends/pytorch/src/op/mean.cpp         |  2 +-
 src/frontends/pytorch/src/op/meshgrid.cpp     |  2 +-
 src/frontends/pytorch/src/op/min_max.cpp      |  4 +-
 src/frontends/pytorch/src/op/narrow.cpp       |  2 +-
 src/frontends/pytorch/src/op/neg.cpp          |  2 +-
 src/frontends/pytorch/src/op/nms.cpp          |  2 +-
 src/frontends/pytorch/src/op/nonzero.cpp      |  2 +-
 src/frontends/pytorch/src/op/norm.cpp         |  2 +-
 src/frontends/pytorch/src/op/numel.cpp        |  2 +-
 src/frontends/pytorch/src/op/pad.cpp          |  2 +-
 src/frontends/pytorch/src/op/pow.cpp          |  2 +-
 src/frontends/pytorch/src/op/pythonop.cpp     |  2 +-
 src/frontends/pytorch/src/op/reciprocal.cpp   |  2 +-
 src/frontends/pytorch/src/op/relu6.cpp        |  2 +-
 src/frontends/pytorch/src/op/remainder.cpp    |  2 +-
 src/frontends/pytorch/src/op/repeat.cpp       |  2 +-
 .../pytorch/src/op/repeat_interleave.cpp      |  2 +-
 src/frontends/pytorch/src/op/reshape.cpp      |  2 +-
 src/frontends/pytorch/src/op/reshape_as.cpp   |  2 +-
 src/frontends/pytorch/src/op/roi_align.cpp    |  2 +-
 src/frontends/pytorch/src/op/roll.cpp         |  2 +-
 src/frontends/pytorch/src/op/rsqrt.cpp        |  2 +-
 src/frontends/pytorch/src/op/rsub.cpp         |  2 +-
 src/frontends/pytorch/src/op/select.cpp       |  2 +-
 src/frontends/pytorch/src/op/selu.cpp         |  2 +-
 src/frontends/pytorch/src/op/set_item.cpp     |  2 +-
 src/frontends/pytorch/src/op/size.cpp         |  2 +-
 src/frontends/pytorch/src/op/slice.cpp        |  2 +-
 src/frontends/pytorch/src/op/softmax.cpp      |  2 +-
 src/frontends/pytorch/src/op/sort.cpp         |  4 +-
 src/frontends/pytorch/src/op/square.cpp       |  2 +-
 src/frontends/pytorch/src/op/squeeze.cpp      |  2 +-
 src/frontends/pytorch/src/op/sub.cpp          |  2 +-
 src/frontends/pytorch/src/op/sum.cpp          |  2 +-
 src/frontends/pytorch/src/op/to.cpp           |  2 +-
 src/frontends/pytorch/src/op/topk.cpp         |  2 +-
 src/frontends/pytorch/src/op/transpose.cpp    |  2 +-
 src/frontends/pytorch/src/op/trilu.cpp        |  4 +-
 src/frontends/pytorch/src/op/unfold.cpp       |  2 +-
 src/frontends/pytorch/src/op/upsample.cpp     | 14 +++---
 src/frontends/pytorch/src/op/var_mean.cpp     |  4 +-
 src/frontends/pytorch/src/op/where.cpp        |  2 +-
 src/frontends/pytorch/src/op_table.cpp        |  4 +-
 src/frontends/pytorch/src/op_table.hpp        |  2 +-
 .../pytorch/src/translate_session.cpp         | 49 +++++++++----------
 .../pytorch/src/translate_session.hpp         |  6 +--
 src/frontends/pytorch/src/utils.cpp           |  2 +-
 src/frontends/pytorch/src/utils.hpp           | 16 +++---
 100 files changed, 173 insertions(+), 180 deletions(-)

diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
index 123f24fd4a16a1..9bd62ada8ff706 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
@@ -60,7 +60,7 @@ class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd {
     bool supported_impl(const std::vector<ov::Any>& variants) const override;
     ov::frontend::InputModel::Ptr load_impl(const std::vector<ov::Any>& variants) const override;
 
-    std::map<std::string, PytorchCreatorFunction> m_op_translators;
+    std::map<std::string, CreatorFunction> m_op_translators;
 };
 
 }  // namespace pytorch
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
index 41205130137c09..a3c5504c5c0f3a 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
@@ -19,20 +19,22 @@ typedef std::unordered_map<size_t, Output<Node>> TensorMap;
 class NodeContext : public frontend::NodeContext {
 public:
     NodeContext(std::shared_ptr<TorchDecoder> decoder,
-                TensorMap* tensor_map,
-                ParameterVector* external_parameters,
                 const TensorMap& ext_tensor_map,
+                std::shared_ptr<TensorMap> tensor_map,
+                std::shared_ptr<ParameterVector> external_parameters,
+                std::shared_ptr<std::set<size_t>> mutated_tensors,
                 TranslateSession* translate_session)
         : frontend::NodeContext(decoder->get_op_type()),
           m_decoder(decoder),
-          m_tensor_map(tensor_map),
           m_ext_tensor_map(ext_tensor_map),
+          m_tensor_map(tensor_map),
           m_external_parameters(external_parameters),
+          m_mutated_tensors(mutated_tensors),
           m_translate_session(translate_session),
           m_decoder_inputs(decoder->inputs()),
           m_decoder_outputs(decoder->outputs()) {
-        FRONT_END_GENERAL_CHECK(tensor_map != nullptr && external_parameters != nullptr &&
-                                translate_session != nullptr);
+        FRONT_END_GENERAL_CHECK(m_tensor_map != nullptr && m_external_parameters != nullptr &&
+                                m_mutated_tensors != nullptr && m_translate_session != nullptr);
     }
 
     // Do not search for input in tensor map; try to access it as a constant of specified type T and return its value
@@ -106,11 +108,7 @@ class NodeContext : public frontend::NodeContext {
             "There is no any named attributes in PyTorch node, query by attribute name is not implemented");
     }
 
-    void mutate_input(size_t index, Output<Node> ov_output);
-
-    std::set<size_t> get_mutated_tensors() const {
-        return m_mutated_tensors;
-    }
+    void mutate_input(size_t index, Output<Node> ov_output) const;
 
     std::shared_ptr<TorchDecoder> get_decoder() const {
         return m_decoder;
@@ -120,7 +118,7 @@ class NodeContext : public frontend::NodeContext {
         return m_translate_session;
     }
 
-    void add_tensor_to_context(size_t index, Output<Node> ov_output);
+    void add_tensor_to_context(size_t index, Output<Node> ov_output) const;
 
     Output<Node> get_tensor_from_model(size_t index) const {
         if (m_tensor_map->find(index) != m_tensor_map->end()) {
@@ -130,22 +128,22 @@ class NodeContext : public frontend::NodeContext {
         }
     }
 
-    Output<Node> get_tensor_from_model_or_create_input(size_t index);
+    Output<Node> get_tensor_from_model_or_create_input(size_t index) const;
     Output<Node> get_input_from_visible_context(size_t index) const;
-    std::shared_ptr<ov::Model> convert_subgraph(size_t index);
+    std::shared_ptr<ov::Model> convert_subgraph(size_t index) const;
 
 private:
     std::shared_ptr<TorchDecoder> m_decoder;
-    std::set<size_t> m_mutated_tensors;
-    TensorMap* m_tensor_map;
     const TensorMap& m_ext_tensor_map;
-    ParameterVector* m_external_parameters;
-    TranslateSession* m_translate_session;
+    std::shared_ptr<TensorMap> m_tensor_map;
+    std::shared_ptr<ParameterVector> m_external_parameters;
+    std::shared_ptr<std::set<size_t>> m_mutated_tensors;
+    TranslateSession* m_translate_session = nullptr;
     const std::vector<size_t> m_decoder_inputs;
     const std::vector<size_t> m_decoder_outputs;
 };
 
-using PytorchCreatorFunction = std::function<OutputVector(NodeContext&)>;
+using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::pytorch::NodeContext&)>;
 
 }  // namespace pytorch
 }  // namespace frontend
diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp
index 10ac4fc5d005e9..49495749d570f4 100644
--- a/src/frontends/pytorch/src/node_context.cpp
+++ b/src/frontends/pytorch/src/node_context.cpp
@@ -42,16 +42,16 @@ std::shared_ptr<Node> NodeContext::mark_node(std::shared_ptr<Node> ov_node) cons
     return m_decoder->mark_node(ov_node);
 }
 
-void NodeContext::mutate_input(size_t index, Output<Node> ov_output) {
+void NodeContext::mutate_input(size_t index, Output<Node> ov_output) const {
     FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index);
     auto input_id = m_decoder_inputs.at(index);
     FRONT_END_GENERAL_CHECK(m_tensor_map->count(input_id), "No tensor corresponding input: ", input_id, " exist.");
     m_translate_session->encode_tensor_name(ov_output, input_id, m_decoder->get_input_debug_name(index));
     (*m_tensor_map)[input_id] = ov_output;
-    m_mutated_tensors.insert(input_id);
+    m_mutated_tensors->insert(input_id);
 }
 
-void NodeContext::add_tensor_to_context(size_t index, Output<Node> ov_output) {
+void NodeContext::add_tensor_to_context(size_t index, Output<Node> ov_output) const {
     if (m_tensor_map->count(index)) {
         OPENVINO_DEBUG << "[ WARNING ] Current context has tensor. Rewriting.\n";
     }
@@ -59,7 +59,7 @@ void NodeContext::add_tensor_to_context(size_t index, Output<Node> ov_output) {
     (*m_tensor_map)[index] = ov_output;
 }
 
-Output<Node> NodeContext::get_tensor_from_model_or_create_input(size_t index) {
+Output<Node> NodeContext::get_tensor_from_model_or_create_input(size_t index) const {
     if (m_tensor_map->find(index) != m_tensor_map->end()) {
         return m_tensor_map->at(index);
     } else {
@@ -87,7 +87,7 @@ Output<Node> NodeContext::get_input_from_visible_context(size_t index) const {
     return input_tensor;
 }
 
-std::shared_ptr<ov::Model> NodeContext::convert_subgraph(size_t index) {
+std::shared_ptr<ov::Model> NodeContext::convert_subgraph(size_t index) const {
     auto subgraph_decoder = m_decoder->get_subgraph_decoder(index);
 
     // Extend external context with internal tensors except Parameter nodes, because internal Parameters are created to
diff --git a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp
index 75d000ce1d7da8..42aa3da1d8586b 100644
--- a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp
+++ b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp
@@ -19,7 +19,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_adaptive_avg_pool3d(NodeContext& context) {
+OutputVector translate_adaptive_avg_pool3d(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto const_tile_params = context.mark_node(v0::Constant::create(element::i32, Shape{5}, {1, 1, 1, 1, 1}));
     auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
diff --git a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp
index bbd72927fc6fac..5705fd22e70f47 100644
--- a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp
+++ b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_adaptive_max_pool2d(NodeContext& context) {
+OutputVector translate_adaptive_max_pool2d(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp
index 3c4976efe2cdef..65ecfe021c5b7e 100644
--- a/src/frontends/pytorch/src/op/add.cpp
+++ b/src/frontends/pytorch/src/op/add.cpp
@@ -15,7 +15,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_add(NodeContext& context) {
+OutputVector translate_add(const NodeContext& context) {
     num_inputs_check(context, 2, 3);
     auto lhs = context.get_input(0);
     auto rhs = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/addcmul.cpp b/src/frontends/pytorch/src/op/addcmul.cpp
index 50d2cec4a3d045..7933acfde1d176 100644
--- a/src/frontends/pytorch/src/op/addcmul.cpp
+++ b/src/frontends/pytorch/src/op/addcmul.cpp
@@ -17,7 +17,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_addcmul(NodeContext& context) {
+OutputVector translate_addcmul(const NodeContext& context) {
     num_inputs_check(context, 4, 4);
     const auto eltwise_mult = std::make_shared<v1::Multiply>(context.get_input(1), context.get_input(2));
     const auto value = context.get_input(3);
diff --git a/src/frontends/pytorch/src/op/addmm.cpp b/src/frontends/pytorch/src/op/addmm.cpp
index 4a2e16906b2b09..e8ba9b7e9159ab 100644
--- a/src/frontends/pytorch/src/op/addmm.cpp
+++ b/src/frontends/pytorch/src/op/addmm.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_addmm(NodeContext& context) {
+OutputVector translate_addmm(const NodeContext& context) {
     num_inputs_check(context, 5, 5);
     auto input = context.get_input(0);
     auto m1 = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/arange.cpp b/src/frontends/pytorch/src/op/arange.cpp
index bacd8d113bc3c3..c238cd12bbc394 100644
--- a/src/frontends/pytorch/src/op/arange.cpp
+++ b/src/frontends/pytorch/src/op/arange.cpp
@@ -17,7 +17,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_arange(NodeContext& context) {
+OutputVector translate_arange(const NodeContext& context) {
     auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
     auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
     int dtype_port = -1;
diff --git a/src/frontends/pytorch/src/op/as_tensor.cpp b/src/frontends/pytorch/src/op/as_tensor.cpp
index 114f4e6fe6ad34..ae2c15d0a1eba9 100644
--- a/src/frontends/pytorch/src/op/as_tensor.cpp
+++ b/src/frontends/pytorch/src/op/as_tensor.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_as_tensor(NodeContext& context) {
+OutputVector translate_as_tensor(const NodeContext& context) {
     // aten::tensor(t[] data, *, ScalarType? dtype=None, Device? device=None, bool requires_grad=False) -> Tensor
     num_inputs_check(context, 1, 4);
     auto dtype = element::f32;
diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp
index bb1d16b99df2cb..77f35a0569e76b 100644
--- a/src/frontends/pytorch/src/op/avg_poolnd.cpp
+++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp
@@ -18,7 +18,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_avg_poolnd(NodeContext& context) {
+OutputVector translate_avg_poolnd(const NodeContext& context) {
     num_inputs_check(context, 6, 7);
     auto input = context.get_input(0);
     auto kernel = context.const_input<Shape>(1);
diff --git a/src/frontends/pytorch/src/op/batch_norm.cpp b/src/frontends/pytorch/src/op/batch_norm.cpp
index 495295e212e213..a306dd21832e82 100644
--- a/src/frontends/pytorch/src/op/batch_norm.cpp
+++ b/src/frontends/pytorch/src/op/batch_norm.cpp
@@ -32,7 +32,7 @@ Output<Node> broadcast_const_to_channel_dim(const NodeContext& context,
 }
 }  // namespace
 
-OutputVector translate_batch_norm(NodeContext& context) {
+OutputVector translate_batch_norm(const NodeContext& context) {
     // Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var,
     // bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
     num_inputs_check(context, 8, 9);
diff --git a/src/frontends/pytorch/src/op/bitwise_not.cpp b/src/frontends/pytorch/src/op/bitwise_not.cpp
index dcb953990e0d82..55ba1203b80a04 100644
--- a/src/frontends/pytorch/src/op/bitwise_not.cpp
+++ b/src/frontends/pytorch/src/op/bitwise_not.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_bitwise_not(NodeContext& context) {
+OutputVector translate_bitwise_not(const NodeContext& context) {
     num_inputs_check(context, 1, 2);
     auto x = context.get_input(0);
     FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean),
diff --git a/src/frontends/pytorch/src/op/bool.cpp b/src/frontends/pytorch/src/op/bool.cpp
index 60d7a4e0158631..0d0d53ad608fb1 100644
--- a/src/frontends/pytorch/src/op/bool.cpp
+++ b/src/frontends/pytorch/src/op/bool.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_bool(NodeContext& context) {
+OutputVector translate_bool(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     return {context.mark_node(std::make_shared<ov::op::v0::Convert>(context.get_input(0), element::boolean))};
 };
diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp
index ce359c6384885f..8fbdd0f3e6f32f 100644
--- a/src/frontends/pytorch/src/op/cat.cpp
+++ b/src/frontends/pytorch/src/op/cat.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_cat(NodeContext& context) {
+OutputVector translate_cat(const NodeContext& context) {
     // This translator is only needed to get axis as constant from external scope
     num_inputs_check(context, 2, 2);
     const auto&& list_elems = get_list_as_outputs(context.get_input(0));
diff --git a/src/frontends/pytorch/src/op/clamp.cpp b/src/frontends/pytorch/src/op/clamp.cpp
index afbe349cf4a25f..fa28ca301df48a 100644
--- a/src/frontends/pytorch/src/op/clamp.cpp
+++ b/src/frontends/pytorch/src/op/clamp.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_clamp(NodeContext& context) {
+OutputVector translate_clamp(const NodeContext& context) {
     num_inputs_check(context, 1, 3);
     auto x = context.get_input(0);
     if (!context.input_is_none(1)) {
diff --git a/src/frontends/pytorch/src/op/constant.cpp b/src/frontends/pytorch/src/op/constant.cpp
index 944a1e197867dc..6fc6444f04fa35 100644
--- a/src/frontends/pytorch/src/op/constant.cpp
+++ b/src/frontends/pytorch/src/op/constant.cpp
@@ -9,7 +9,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_constant(NodeContext& context) {
+OutputVector translate_constant(const NodeContext& context) {
     return context.as_constant();
 };
 
diff --git a/src/frontends/pytorch/src/op/conv_transposend.cpp b/src/frontends/pytorch/src/op/conv_transposend.cpp
index d4a0cc2e047114..1f281f90486fad 100644
--- a/src/frontends/pytorch/src/op/conv_transposend.cpp
+++ b/src/frontends/pytorch/src/op/conv_transposend.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_conv_transposend(NodeContext& context) {
+OutputVector translate_conv_transposend(const NodeContext& context) {
     num_inputs_check(context, 8, 8);
     auto strides = context.const_input<Strides>(3);
     // PyTorch support only symmetric padding, padding sizes are the same for begins and ends for each dimension
diff --git a/src/frontends/pytorch/src/op/convnd.cpp b/src/frontends/pytorch/src/op/convnd.cpp
index 9b91985182479a..861cb68f1fa30f 100644
--- a/src/frontends/pytorch/src/op/convnd.cpp
+++ b/src/frontends/pytorch/src/op/convnd.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_convnd(NodeContext& context) {
+OutputVector translate_convnd(const NodeContext& context) {
     num_inputs_check(context, 7, 7);
     auto strides = context.const_input<Strides>(3);
     // In torch pads at beginning are same as at end
diff --git a/src/frontends/pytorch/src/op/convolution.cpp b/src/frontends/pytorch/src/op/convolution.cpp
index d0e77e4adadafe..7a250115b54a51 100644
--- a/src/frontends/pytorch/src/op/convolution.cpp
+++ b/src/frontends/pytorch/src/op/convolution.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_convolution(NodeContext& context) {
+OutputVector translate_convolution(const NodeContext& context) {
     // Schema: aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[]
     // dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool
     // cudnn_enabled, bool allow_tf32) -> Tensor
diff --git a/src/frontends/pytorch/src/op/convolution_mode.cpp b/src/frontends/pytorch/src/op/convolution_mode.cpp
index c732aa6688941f..7aa9d8f991b97b 100644
--- a/src/frontends/pytorch/src/op/convolution_mode.cpp
+++ b/src/frontends/pytorch/src/op/convolution_mode.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_convolution_mode(NodeContext& context) {
+OutputVector translate_convolution_mode(const NodeContext& context) {
     // Schema: aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[]
     // dilation, int groups) -> Tensor
     num_inputs_check(context, 7, 7);
diff --git a/src/frontends/pytorch/src/op/cumsum.cpp b/src/frontends/pytorch/src/op/cumsum.cpp
index 55d46500427e6d..c396521a9e402b 100644
--- a/src/frontends/pytorch/src/op/cumsum.cpp
+++ b/src/frontends/pytorch/src/op/cumsum.cpp
@@ -13,7 +13,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_cumsum(NodeContext& context) {
+OutputVector translate_cumsum(const NodeContext& context) {
     // aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None, Tensor out=None)
     num_inputs_check(context, 2, 4);
     auto x = context.get_input(0);
diff --git a/src/frontends/pytorch/src/op/dim.cpp b/src/frontends/pytorch/src/op/dim.cpp
index 7af4aa8fe3147e..2d69cb3e37a796 100644
--- a/src/frontends/pytorch/src/op/dim.cpp
+++ b/src/frontends/pytorch/src/op/dim.cpp
@@ -12,7 +12,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_dim(NodeContext& context) {
+OutputVector translate_dim(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     Output<Node> rank;
     std::tie(std::ignore, rank) = get_shape_rank(context, context.get_input(0), true);
diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp
index 54cab6325ae7f2..e9dd7136e35bef 100644
--- a/src/frontends/pytorch/src/op/div.cpp
+++ b/src/frontends/pytorch/src/op/div.cpp
@@ -17,7 +17,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_div(NodeContext& context) {
+OutputVector translate_div(const NodeContext& context) {
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/elu.cpp b/src/frontends/pytorch/src/op/elu.cpp
index f60d76b96638e5..4f96371ee83ebd 100644
--- a/src/frontends/pytorch/src/op/elu.cpp
+++ b/src/frontends/pytorch/src/op/elu.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_elu(NodeContext& context) {
+OutputVector translate_elu(const NodeContext& context) {
     // aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
     num_inputs_check(context, 2, 4);
     auto x = context.get_input(0);
diff --git a/src/frontends/pytorch/src/op/embedding.cpp b/src/frontends/pytorch/src/op/embedding.cpp
index c920992bdaafec..e5dc85a0ddfebf 100644
--- a/src/frontends/pytorch/src/op/embedding.cpp
+++ b/src/frontends/pytorch/src/op/embedding.cpp
@@ -13,7 +13,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_embedding(NodeContext& context) {
+OutputVector translate_embedding(const NodeContext& context) {
     // aten::embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool
     // sparse=False)
     num_inputs_check(context, 5, 5);
diff --git a/src/frontends/pytorch/src/op/expand.cpp b/src/frontends/pytorch/src/op/expand.cpp
index 34f0a9d70c323b..9210cedc6eba6f 100644
--- a/src/frontends/pytorch/src/op/expand.cpp
+++ b/src/frontends/pytorch/src/op/expand.cpp
@@ -30,7 +30,7 @@ OutputVector base_expand(const NodeContext& context, const Output<Node>& x, cons
 };
 }  // namespace
 
-OutputVector translate_expand(NodeContext& context) {
+OutputVector translate_expand(const NodeContext& context) {
     // aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
@@ -41,7 +41,7 @@ OutputVector translate_expand(NodeContext& context) {
     return base_expand(context, x, sizes);
 };
 
-OutputVector translate_expand_as(NodeContext& context) {
+OutputVector translate_expand_as(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/eye.cpp b/src/frontends/pytorch/src/op/eye.cpp
index ab35c56569c5c4..9b7f7ef8c3bc29 100644
--- a/src/frontends/pytorch/src/op/eye.cpp
+++ b/src/frontends/pytorch/src/op/eye.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_eye(NodeContext& context) {
+OutputVector translate_eye(const NodeContext& context) {
     size_t num_inputs = context.get_input_size();
     auto x = context.get_input(0);
     // num rows and cols should be integer, but at the moment conversion their data type can be unknown yet
diff --git a/src/frontends/pytorch/src/op/flatten.cpp b/src/frontends/pytorch/src/op/flatten.cpp
index 6d9005a64b8643..6022661c3aa8cf 100644
--- a/src/frontends/pytorch/src/op/flatten.cpp
+++ b/src/frontends/pytorch/src/op/flatten.cpp
@@ -18,7 +18,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_flatten(NodeContext& context) {
+OutputVector translate_flatten(const NodeContext& context) {
     num_inputs_check(context, 1, 3);
     auto x = context.get_input(0);
     int64_t start_dim = 0;
diff --git a/src/frontends/pytorch/src/op/floor_divide.cpp b/src/frontends/pytorch/src/op/floor_divide.cpp
index 5731006dd770aa..4fb1b230d44c21 100644
--- a/src/frontends/pytorch/src/op/floor_divide.cpp
+++ b/src/frontends/pytorch/src/op/floor_divide.cpp
@@ -14,7 +14,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_floor_divide(NodeContext& context) {
+OutputVector translate_floor_divide(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/floordiv.cpp b/src/frontends/pytorch/src/op/floordiv.cpp
index b85cacf3fc5145..91c03e74d7f0b1 100644
--- a/src/frontends/pytorch/src/op/floordiv.cpp
+++ b/src/frontends/pytorch/src/op/floordiv.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_floordiv(NodeContext& context) {
+OutputVector translate_floordiv(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp
index abfacbf872f913..bbb7f98022f42b 100644
--- a/src/frontends/pytorch/src/op/full.cpp
+++ b/src/frontends/pytorch/src/op/full.cpp
@@ -42,7 +42,7 @@ Output<Node> base_translate_full_with_convert(const NodeContext& context,
 }
 }  // namespace
 
-OutputVector translate_full(NodeContext& context) {
+OutputVector translate_full(const NodeContext& context) {
     num_inputs_check(context, 2, 6);
     auto sizes = context.get_input(0);
     auto value = context.get_input(1);
@@ -59,7 +59,7 @@ OutputVector translate_full(NodeContext& context) {
     return {base_translate_full_with_convert(context, sizes, value, dtype_id)};
 };
 
-OutputVector translate_full_like(NodeContext& context) {
+OutputVector translate_full_like(const NodeContext& context) {
     num_inputs_check(context, 2, 7);
     auto input = context.get_input(0);
     auto value = context.get_input(1);
@@ -71,7 +71,7 @@ OutputVector translate_full_like(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, out)};
 };
 
-OutputVector translate_fill_(NodeContext& context) {
+OutputVector translate_fill_(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto input = context.get_input(0);
     auto value = context.get_input(1);
@@ -79,7 +79,7 @@ OutputVector translate_fill_(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, input)};
 };
 
-OutputVector translate_new_full(NodeContext& context) {
+OutputVector translate_new_full(const NodeContext& context) {
     num_inputs_check(context, 3, 7);
     auto input = context.get_input(0);
     auto sizes = context.get_input(1);
@@ -90,7 +90,7 @@ OutputVector translate_new_full(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, input)};
 };
 
-OutputVector translate_zeros(NodeContext& context) {
+OutputVector translate_zeros(const NodeContext& context) {
     num_inputs_check(context, 2, 5);
     auto sizes = context.get_input(0);
     auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
@@ -107,7 +107,7 @@ OutputVector translate_zeros(NodeContext& context) {
     return {base_translate_full_with_convert(context, sizes, value, dtype_id)};
 };
 
-OutputVector translate_zeros_like(NodeContext& context) {
+OutputVector translate_zeros_like(const NodeContext& context) {
     num_inputs_check(context, 1, 6);
     auto input = context.get_input(0);
     auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
@@ -119,7 +119,7 @@ OutputVector translate_zeros_like(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, out)};
 };
 
-OutputVector translate_new_zeros(NodeContext& context) {
+OutputVector translate_new_zeros(const NodeContext& context) {
     num_inputs_check(context, 2, 6);
     auto input = context.get_input(0);
     auto sizes = context.get_input(1);
@@ -130,7 +130,7 @@ OutputVector translate_new_zeros(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, input)};
 };
 
-OutputVector translate_ones(NodeContext& context) {
+OutputVector translate_ones(const NodeContext& context) {
     num_inputs_check(context, 1, 5);
     auto sizes = context.get_input(0);
     auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
@@ -147,7 +147,7 @@ OutputVector translate_ones(NodeContext& context) {
     return {base_translate_full_with_convert(context, sizes, value, dtype_id)};
 };
 
-OutputVector translate_ones_like(NodeContext& context) {
+OutputVector translate_ones_like(const NodeContext& context) {
     num_inputs_check(context, 1, 6);
     auto input = context.get_input(0);
     auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
@@ -159,7 +159,7 @@ OutputVector translate_ones_like(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, out)};
 };
 
-OutputVector translate_new_ones(NodeContext& context) {
+OutputVector translate_new_ones(const NodeContext& context) {
     num_inputs_check(context, 2, 6);
     auto input = context.get_input(0);
     auto sizes = context.get_input(1);
@@ -170,7 +170,7 @@ OutputVector translate_new_ones(NodeContext& context) {
     return {base_translate_full_with_convertlike(context, sizes, value, input)};
 };
 
-OutputVector translate_empty(NodeContext& context) {
+OutputVector translate_empty(const NodeContext& context) {
     // aten::empty(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool?
     // pin_memory=None, MemoryFormat? memory_format=None) -> Tensor layout, device and work with memory ignored on our
     // side, so just skip these parameters
diff --git a/src/frontends/pytorch/src/op/gelu.cpp b/src/frontends/pytorch/src/op/gelu.cpp
index 598f6865b3e2b0..c64ea647688584 100644
--- a/src/frontends/pytorch/src/op/gelu.cpp
+++ b/src/frontends/pytorch/src/op/gelu.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_gelu(NodeContext& context) {
+OutputVector translate_gelu(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto approximate = context.const_input<std::string>(1);
diff --git a/src/frontends/pytorch/src/op/get_attr.cpp b/src/frontends/pytorch/src/op/get_attr.cpp
index 3575a5210a8518..1d0ae0e4d13d1b 100644
--- a/src/frontends/pytorch/src/op/get_attr.cpp
+++ b/src/frontends/pytorch/src/op/get_attr.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_get_attr(NodeContext& context) {
+OutputVector translate_get_attr(const NodeContext& context) {
     auto res = context.get_decoder()->try_decode_get_attr();
     FRONT_END_OP_CONVERSION_CHECK(res.size() > 0, "GetAttr must have at least one output.");
     return res;
diff --git a/src/frontends/pytorch/src/op/getitem.cpp b/src/frontends/pytorch/src/op/getitem.cpp
index 1bf9f4a0e8a274..0a1243196f4d6c 100644
--- a/src/frontends/pytorch/src/op/getitem.cpp
+++ b/src/frontends/pytorch/src/op/getitem.cpp
@@ -13,7 +13,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_getitem(NodeContext& context) {
+OutputVector translate_getitem(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto input = context.get_input(0);
     if (std::dynamic_pointer_cast<ov::op::util::FrameworkNode>(input.get_node_shared_ptr())) {
diff --git a/src/frontends/pytorch/src/op/glu.cpp b/src/frontends/pytorch/src/op/glu.cpp
index e650e9c4a54c05..dbe979fb1f2870 100644
--- a/src/frontends/pytorch/src/op/glu.cpp
+++ b/src/frontends/pytorch/src/op/glu.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_glu(NodeContext& context) {
+OutputVector translate_glu(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto dim = context.input_is_none(1) ? context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1}))
diff --git a/src/frontends/pytorch/src/op/grid_sampler.cpp b/src/frontends/pytorch/src/op/grid_sampler.cpp
index 9011abd8566a67..8c603813d888f7 100644
--- a/src/frontends/pytorch/src/op/grid_sampler.cpp
+++ b/src/frontends/pytorch/src/op/grid_sampler.cpp
@@ -13,7 +13,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_grid_sampler(NodeContext& context) {
+OutputVector translate_grid_sampler(const NodeContext& context) {
     num_inputs_check(context, 4, 5);
     auto x = context.get_input(0);
     auto grid = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/group_norm.cpp b/src/frontends/pytorch/src/op/group_norm.cpp
index 7b3ac53bbb4772..6ce36aac6601b1 100644
--- a/src/frontends/pytorch/src/op/group_norm.cpp
+++ b/src/frontends/pytorch/src/op/group_norm.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_group_norm(NodeContext& context) {
+OutputVector translate_group_norm(const NodeContext& context) {
     // aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float
     // eps=1.0000000000000001e-05, bool cudnn_enabled=True) -> Tensor
     num_inputs_check(context, 2, 6);
diff --git a/src/frontends/pytorch/src/op/hardtanh.cpp b/src/frontends/pytorch/src/op/hardtanh.cpp
index 52551a259a97d2..a85bedbf00626b 100644
--- a/src/frontends/pytorch/src/op/hardtanh.cpp
+++ b/src/frontends/pytorch/src/op/hardtanh.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_hardtanh(NodeContext& context) {
+OutputVector translate_hardtanh(const NodeContext& context) {
     num_inputs_check(context, 1, 3);
     float min = -1;
     float max = 1;
diff --git a/src/frontends/pytorch/src/op/if.cpp b/src/frontends/pytorch/src/op/if.cpp
index 1e5d3a26778cc1..7fb3ecce123a26 100644
--- a/src/frontends/pytorch/src/op/if.cpp
+++ b/src/frontends/pytorch/src/op/if.cpp
@@ -13,7 +13,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_if(NodeContext& context) {
+OutputVector translate_if(const NodeContext& context) {
     auto if_node = std::make_shared<opset10::If>(context.get_input(0));
     context.mark_node(if_node);
     auto decoder = context.get_decoder();
diff --git a/src/frontends/pytorch/src/op/im2col.cpp b/src/frontends/pytorch/src/op/im2col.cpp
index 12fb4f3b7c4a04..718e0eadaa4ca0 100644
--- a/src/frontends/pytorch/src/op/im2col.cpp
+++ b/src/frontends/pytorch/src/op/im2col.cpp
@@ -56,7 +56,7 @@ std::shared_ptr<Node> get_im2col_indices_along_dim(const NodeContext& context,
 }
 }  // namespace
 
-OutputVector translate_im2col(NodeContext& context) {
+OutputVector translate_im2col(const NodeContext& context) {
     num_inputs_check(context, 5, 5);
     auto input = context.get_input(0);
     auto kernel_size = context.const_input<std::vector<int64_t>>(1);
diff --git a/src/frontends/pytorch/src/op/index_put_.cpp b/src/frontends/pytorch/src/op/index_put_.cpp
index 1ce4ea0e96d6a1..d8a599eaf5abcf 100644
--- a/src/frontends/pytorch/src/op/index_put_.cpp
+++ b/src/frontends/pytorch/src/op/index_put_.cpp
@@ -10,9 +10,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-using namespace ov::op;
-
-OutputVector translate_index_put_(NodeContext& context) {
+OutputVector translate_index_put_(const NodeContext& context) {
     // Pass as PtFrameworkNode to register as `inplace_op`. Conversion to OV operators is done as transformation.
     auto node = std::make_shared<PtFrameworkNode>(context.get_decoder(), context.inputs());
     return {context.mark_node(node)};
diff --git a/src/frontends/pytorch/src/op/instance_norm.cpp b/src/frontends/pytorch/src/op/instance_norm.cpp
index b00b7bff260bd1..ff53bd11e92ffb 100644
--- a/src/frontends/pytorch/src/op/instance_norm.cpp
+++ b/src/frontends/pytorch/src/op/instance_norm.cpp
@@ -88,7 +88,7 @@ OutputVector translate_instance_norm_train(const NodeContext& context,
 
 }  // namespace
 
-OutputVector translate_instance_norm(NodeContext& context) {
+OutputVector translate_instance_norm(const NodeContext& context) {
     num_inputs_check(context, 8, 9);
     auto input = context.get_input(0);
     auto eps = context.const_input<float>(7);
diff --git a/src/frontends/pytorch/src/op/int.cpp b/src/frontends/pytorch/src/op/int.cpp
index e11397cb6f11e3..5a407a1a7254c3 100644
--- a/src/frontends/pytorch/src/op/int.cpp
+++ b/src/frontends/pytorch/src/op/int.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_int(NodeContext& context) {
+OutputVector translate_int(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     return {context.mark_node(std::make_shared<ov::op::v0::Convert>(context.get_input(0), element::i32))};
 };
diff --git a/src/frontends/pytorch/src/op/layer_norm.cpp b/src/frontends/pytorch/src/op/layer_norm.cpp
index c954110111e799..204d7164531c72 100644
--- a/src/frontends/pytorch/src/op/layer_norm.cpp
+++ b/src/frontends/pytorch/src/op/layer_norm.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_layer_norm(NodeContext& context) {
+OutputVector translate_layer_norm(const NodeContext& context) {
     num_inputs_check(context, 5, 6);
     auto eps = context.const_input<float>(4);
     auto normalized_shape = context.const_input<Shape>(1);
diff --git a/src/frontends/pytorch/src/op/len.cpp b/src/frontends/pytorch/src/op/len.cpp
index 71f8bdf3a32b88..9a22658500913f 100644
--- a/src/frontends/pytorch/src/op/len.cpp
+++ b/src/frontends/pytorch/src/op/len.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_len(NodeContext& context) {
+OutputVector translate_len(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
     auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
diff --git a/src/frontends/pytorch/src/op/linear.cpp b/src/frontends/pytorch/src/op/linear.cpp
index e94ff7c9168003..8288220f320251 100644
--- a/src/frontends/pytorch/src/op/linear.cpp
+++ b/src/frontends/pytorch/src/op/linear.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_linear(NodeContext& context) {
+OutputVector translate_linear(const NodeContext& context) {
     // schema: aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
diff --git a/src/frontends/pytorch/src/op/list_construct.cpp b/src/frontends/pytorch/src/op/list_construct.cpp
index e69188e23d89a3..e58a3c4744ff61 100644
--- a/src/frontends/pytorch/src/op/list_construct.cpp
+++ b/src/frontends/pytorch/src/op/list_construct.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_list_construct(NodeContext& context) {
+OutputVector translate_list_construct(const NodeContext& context) {
     // Process the case when prim::ListConstruct has all inputs constant
     auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
     ov::OutputVector consts;
diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp
index 85947b7694ee06..808dff6ed32822 100644
--- a/src/frontends/pytorch/src/op/log.cpp
+++ b/src/frontends/pytorch/src/op/log.cpp
@@ -17,7 +17,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_log(NodeContext& context) {
+OutputVector translate_log(const NodeContext& context) {
     // torch.log returns a tensor with the natural logarithm of the elements of input.
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
@@ -26,7 +26,7 @@ OutputVector translate_log(NodeContext& context) {
     return {log};
 };
 
-OutputVector translate_log2(NodeContext& context) {
+OutputVector translate_log2(const NodeContext& context) {
     // torch.log2 returns a tensor with the logarithm to the base 2 of the elements of input.
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
diff --git a/src/frontends/pytorch/src/op/loop.cpp b/src/frontends/pytorch/src/op/loop.cpp
index 7bf03cfcd30138..36369ea63bd4ee 100644
--- a/src/frontends/pytorch/src/op/loop.cpp
+++ b/src/frontends/pytorch/src/op/loop.cpp
@@ -13,7 +13,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_loop(NodeContext& context) {
+OutputVector translate_loop(const NodeContext& context) {
     const auto& inputs = context.inputs();
     FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= 2, "Loop must have at least 2 inputs.");
     auto loop = std::make_shared<ov::op::v5::Loop>(inputs[0], inputs[1]);
diff --git a/src/frontends/pytorch/src/op/masked_fill.cpp b/src/frontends/pytorch/src/op/masked_fill.cpp
index 2a071755b3a145..5ed090e0b619b7 100644
--- a/src/frontends/pytorch/src/op/masked_fill.cpp
+++ b/src/frontends/pytorch/src/op/masked_fill.cpp
@@ -18,7 +18,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_masked_fill(NodeContext& context) {
+OutputVector translate_masked_fill(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     auto data = context.get_input(0);
     auto mask = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp
index f594b0a2b0798c..f756b1488ce9ea 100644
--- a/src/frontends/pytorch/src/op/max_poolnd.cpp
+++ b/src/frontends/pytorch/src/op/max_poolnd.cpp
@@ -13,7 +13,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_max_poolnd(NodeContext& context) {
+OutputVector translate_max_poolnd(const NodeContext& context) {
     num_inputs_check(context, 6, 6);
     auto kernel = context.const_input<Shape>(1);
     auto strides = context.const_input<Strides>(2);
diff --git a/src/frontends/pytorch/src/op/mean.cpp b/src/frontends/pytorch/src/op/mean.cpp
index b7a5acfb6fb22f..46c42f6be1a4b7 100644
--- a/src/frontends/pytorch/src/op/mean.cpp
+++ b/src/frontends/pytorch/src/op/mean.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_mean(NodeContext& context) {
+OutputVector translate_mean(const NodeContext& context) {
     num_inputs_check(context, 3, 4);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/meshgrid.cpp b/src/frontends/pytorch/src/op/meshgrid.cpp
index 841de80fcbf494..c9b5833ae68d60 100644
--- a/src/frontends/pytorch/src/op/meshgrid.cpp
+++ b/src/frontends/pytorch/src/op/meshgrid.cpp
@@ -10,7 +10,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_meshgrid(NodeContext& context) {
+OutputVector translate_meshgrid(const NodeContext& context) {
     std::string indexing = "ij";
     if (!context.input_is_none(1)) {
         indexing = context.const_input<std::string>(1);
diff --git a/src/frontends/pytorch/src/op/min_max.cpp b/src/frontends/pytorch/src/op/min_max.cpp
index 34c70219f1137c..898403bf82b7cf 100644
--- a/src/frontends/pytorch/src/op/min_max.cpp
+++ b/src/frontends/pytorch/src/op/min_max.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_max(NodeContext& context) {
+OutputVector translate_max(const NodeContext& context) {
     // torch.max (same for torch.min) actually has two interfaces smashed together:
     // torch.max(x, dim, keepdim) and torch.max(x, y)
     num_inputs_check(context, 1, 3);
@@ -49,7 +49,7 @@ OutputVector translate_max(NodeContext& context) {
     return {values, indicies};
 };
 
-OutputVector translate_min(NodeContext& context) {
+OutputVector translate_min(const NodeContext& context) {
     // torch.min (same for torch.max) actually has two interfaces smashed together:
     // torch.min(x, dim, keepdim) and torch.min(x, y)
     num_inputs_check(context, 1, 3);
diff --git a/src/frontends/pytorch/src/op/narrow.cpp b/src/frontends/pytorch/src/op/narrow.cpp
index a212b22503434e..ffae7d2ec55a03 100644
--- a/src/frontends/pytorch/src/op/narrow.cpp
+++ b/src/frontends/pytorch/src/op/narrow.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_narrow(NodeContext& context) {
+OutputVector translate_narrow(const NodeContext& context) {
     num_inputs_check(context, 4, 4);
 
     auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
diff --git a/src/frontends/pytorch/src/op/neg.cpp b/src/frontends/pytorch/src/op/neg.cpp
index e902eb0f21fefb..423118c3b2fc24 100644
--- a/src/frontends/pytorch/src/op/neg.cpp
+++ b/src/frontends/pytorch/src/op/neg.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_neg(NodeContext& context) {
+OutputVector translate_neg(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
     auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1}));
diff --git a/src/frontends/pytorch/src/op/nms.cpp b/src/frontends/pytorch/src/op/nms.cpp
index 2454d94a78e6a8..86ecb3df73cf87 100644
--- a/src/frontends/pytorch/src/op/nms.cpp
+++ b/src/frontends/pytorch/src/op/nms.cpp
@@ -18,7 +18,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_nms(NodeContext& context) {
+OutputVector translate_nms(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
     auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
diff --git a/src/frontends/pytorch/src/op/nonzero.cpp b/src/frontends/pytorch/src/op/nonzero.cpp
index 80edef3f079b6b..29a6aa51175008 100644
--- a/src/frontends/pytorch/src/op/nonzero.cpp
+++ b/src/frontends/pytorch/src/op/nonzero.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_nonzero(NodeContext& context) {
+OutputVector translate_nonzero(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto cond = context.get_input(0);
     auto non_zero = context.mark_node(std::make_shared<v3::NonZero>(cond));
diff --git a/src/frontends/pytorch/src/op/norm.cpp b/src/frontends/pytorch/src/op/norm.cpp
index d35c0fec25867f..34a0bdd01c4cfa 100644
--- a/src/frontends/pytorch/src/op/norm.cpp
+++ b/src/frontends/pytorch/src/op/norm.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_norm(NodeContext& context) {
+OutputVector translate_norm(const NodeContext& context) {
     num_inputs_check(context, 4, 4);
     auto input_tensor = context.get_input(0);
     auto p = context.const_input<float>(1);
diff --git a/src/frontends/pytorch/src/op/numel.cpp b/src/frontends/pytorch/src/op/numel.cpp
index 721ed7e173bdc6..a4d2a836c6a2bd 100644
--- a/src/frontends/pytorch/src/op/numel.cpp
+++ b/src/frontends/pytorch/src/op/numel.cpp
@@ -10,7 +10,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_numel(NodeContext& context) {
+OutputVector translate_numel(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     return {numel(context, context.get_input(0))};
 };
diff --git a/src/frontends/pytorch/src/op/pad.cpp b/src/frontends/pytorch/src/op/pad.cpp
index 8a0568ece9cc3d..170544654542e5 100644
--- a/src/frontends/pytorch/src/op/pad.cpp
+++ b/src/frontends/pytorch/src/op/pad.cpp
@@ -22,7 +22,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_pad(NodeContext& context) {
+OutputVector translate_pad(const NodeContext& context) {
     num_inputs_check(context, 2, 4);
     auto data = context.get_input(0);
     auto paddings = context.const_input<std::vector<int64_t>>(1);
diff --git a/src/frontends/pytorch/src/op/pow.cpp b/src/frontends/pytorch/src/op/pow.cpp
index d418f3385340bf..d3a39694bf3953 100644
--- a/src/frontends/pytorch/src/op/pow.cpp
+++ b/src/frontends/pytorch/src/op/pow.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_pow(NodeContext& context) {
+OutputVector translate_pow(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto lhs = context.get_input(0);
     auto rhs = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/pythonop.cpp b/src/frontends/pytorch/src/op/pythonop.cpp
index 36a4b388738c02..4aa142f04b58ed 100644
--- a/src/frontends/pytorch/src/op/pythonop.cpp
+++ b/src/frontends/pytorch/src/op/pythonop.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_pythonop(NodeContext& context) {
+OutputVector translate_pythonop(const NodeContext& context) {
     auto decoder = context.get_decoder();
     FRONT_END_OP_CONVERSION_CHECK(decoder->get_subgraph_size() == 1,
                                   "PythonOp must have 1 subgraph to be able to translate it to OV.");
diff --git a/src/frontends/pytorch/src/op/reciprocal.cpp b/src/frontends/pytorch/src/op/reciprocal.cpp
index 67c5bdba78b39f..38b12fee06cb18 100644
--- a/src/frontends/pytorch/src/op/reciprocal.cpp
+++ b/src/frontends/pytorch/src/op/reciprocal.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_reciprocal(NodeContext& context) {
+OutputVector translate_reciprocal(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
     auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1}));
diff --git a/src/frontends/pytorch/src/op/relu6.cpp b/src/frontends/pytorch/src/op/relu6.cpp
index 5dd5906061bc7f..08996811249dcc 100644
--- a/src/frontends/pytorch/src/op/relu6.cpp
+++ b/src/frontends/pytorch/src/op/relu6.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_relu6(NodeContext& context) {
+OutputVector translate_relu6(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
     return {context.mark_node(std::make_shared<ov::op::v0::Clamp>(x, 0., 6.))};
diff --git a/src/frontends/pytorch/src/op/remainder.cpp b/src/frontends/pytorch/src/op/remainder.cpp
index 55d33e00c3f321..622e20eba52fa4 100644
--- a/src/frontends/pytorch/src/op/remainder.cpp
+++ b/src/frontends/pytorch/src/op/remainder.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_remainder(NodeContext& context) {
+OutputVector translate_remainder(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/repeat.cpp b/src/frontends/pytorch/src/op/repeat.cpp
index 574951aaf82c20..15dc03a466ec92 100644
--- a/src/frontends/pytorch/src/op/repeat.cpp
+++ b/src/frontends/pytorch/src/op/repeat.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_repeat(NodeContext& context) {
+OutputVector translate_repeat(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto x = context.get_input(0);
     auto repeats = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/repeat_interleave.cpp b/src/frontends/pytorch/src/op/repeat_interleave.cpp
index 06d8333e04cceb..64971f6e3f28bc 100644
--- a/src/frontends/pytorch/src/op/repeat_interleave.cpp
+++ b/src/frontends/pytorch/src/op/repeat_interleave.cpp
@@ -34,7 +34,7 @@ OutputVector generate_indices_from_repeats_tensor(const NodeContext& context, co
 };
 }  // namespace
 
-OutputVector translate_repeat_interleave(NodeContext& context) {
+OutputVector translate_repeat_interleave(const NodeContext& context) {
     num_inputs_check(context, 2, 3);
     // constants
     auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
diff --git a/src/frontends/pytorch/src/op/reshape.cpp b/src/frontends/pytorch/src/op/reshape.cpp
index b0d669e47be63b..c5c33f4f6e61da 100644
--- a/src/frontends/pytorch/src/op/reshape.cpp
+++ b/src/frontends/pytorch/src/op/reshape.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_reshape(NodeContext& context) {
+OutputVector translate_reshape(const NodeContext& context) {
     // Translation is used by both aten::view and aten::reshape.
     // Schema: aten::view(Tensor input, int[] shape) -> Tensor
     // Schema: aten::reshape(Tensor input, int[] shape) -> Tensor
diff --git a/src/frontends/pytorch/src/op/reshape_as.cpp b/src/frontends/pytorch/src/op/reshape_as.cpp
index 63d18ee468f087..1c9be43d4a2d15 100644
--- a/src/frontends/pytorch/src/op/reshape_as.cpp
+++ b/src/frontends/pytorch/src/op/reshape_as.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_reshape_as(NodeContext& context) {
+OutputVector translate_reshape_as(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     auto input_tensor = context.get_input(0);
     auto shape_tesnor = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/roi_align.cpp b/src/frontends/pytorch/src/op/roi_align.cpp
index d3a389c59654b9..fb2ad3a41d7455 100644
--- a/src/frontends/pytorch/src/op/roi_align.cpp
+++ b/src/frontends/pytorch/src/op/roi_align.cpp
@@ -19,7 +19,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_roi_align(NodeContext& context) {
+OutputVector translate_roi_align(const NodeContext& context) {
     num_inputs_check(context, 7, 7);
     auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
     auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
diff --git a/src/frontends/pytorch/src/op/roll.cpp b/src/frontends/pytorch/src/op/roll.cpp
index b0aef51a6872b9..9f358368fbce8c 100644
--- a/src/frontends/pytorch/src/op/roll.cpp
+++ b/src/frontends/pytorch/src/op/roll.cpp
@@ -17,7 +17,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_roll(NodeContext& context) {
+OutputVector translate_roll(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     const auto data = context.get_input(0);
     const auto shifts = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/rsqrt.cpp b/src/frontends/pytorch/src/op/rsqrt.cpp
index 9e9ba9330c87ca..d4f56040da27c5 100644
--- a/src/frontends/pytorch/src/op/rsqrt.cpp
+++ b/src/frontends/pytorch/src/op/rsqrt.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_rsqrt(NodeContext& context) {
+OutputVector translate_rsqrt(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto data = context.get_input(0);
     auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(data, element::i32));
diff --git a/src/frontends/pytorch/src/op/rsub.cpp b/src/frontends/pytorch/src/op/rsub.cpp
index 21b109e9037182..200094b6eecede 100644
--- a/src/frontends/pytorch/src/op/rsub.cpp
+++ b/src/frontends/pytorch/src/op/rsub.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_rsub(NodeContext& context) {
+OutputVector translate_rsub(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     auto self = context.get_input(0);
     auto other = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/select.cpp b/src/frontends/pytorch/src/op/select.cpp
index c6d7cb0048f325..ea5255f2410ffa 100644
--- a/src/frontends/pytorch/src/op/select.cpp
+++ b/src/frontends/pytorch/src/op/select.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_select(NodeContext& context) {
+OutputVector translate_select(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
     auto const_minus_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
diff --git a/src/frontends/pytorch/src/op/selu.cpp b/src/frontends/pytorch/src/op/selu.cpp
index 9ec08af77facc8..aef54491e74bd1 100644
--- a/src/frontends/pytorch/src/op/selu.cpp
+++ b/src/frontends/pytorch/src/op/selu.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_selu(NodeContext& context) {
+OutputVector translate_selu(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto x = context.get_input(0);
     auto alpha = context.mark_node(v0::Constant::create(element::f64, Shape{}, {1.6732632423543772848170429916717}));
diff --git a/src/frontends/pytorch/src/op/set_item.cpp b/src/frontends/pytorch/src/op/set_item.cpp
index 9ce33fce24e8d2..ef11a2a391c39a 100644
--- a/src/frontends/pytorch/src/op/set_item.cpp
+++ b/src/frontends/pytorch/src/op/set_item.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_set_item(NodeContext& context) {
+OutputVector translate_set_item(const NodeContext& context) {
     // schema: aten::_set_item.t(t[](a!) l, int idx, t(b -> *) el) -> t[](a!)
     // _set_item inserts element in list
     num_inputs_check(context, 3, 3);
diff --git a/src/frontends/pytorch/src/op/size.cpp b/src/frontends/pytorch/src/op/size.cpp
index a4d70cef19ad2c..289facd0fe7f44 100644
--- a/src/frontends/pytorch/src/op/size.cpp
+++ b/src/frontends/pytorch/src/op/size.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_size(NodeContext& context) {
+OutputVector translate_size(const NodeContext& context) {
     num_inputs_check(context, 1, 2);
     auto shape = context.mark_node(std::make_shared<v3::ShapeOf>(context.get_input(0), element::i32));
     if (context.input_is_none(1)) {
diff --git a/src/frontends/pytorch/src/op/slice.cpp b/src/frontends/pytorch/src/op/slice.cpp
index 756efc1590b796..391b1c834fd535 100644
--- a/src/frontends/pytorch/src/op/slice.cpp
+++ b/src/frontends/pytorch/src/op/slice.cpp
@@ -18,7 +18,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_slice(NodeContext& context) {
+OutputVector translate_slice(const NodeContext& context) {
     // aten::slice.t(t[] l, int? start=None, int? end=None, int step=1) -> (t[])
     // aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=None, int? end=None, int step=1) -> (Tensor(a))
     ov::Output<ov::Node> dim;
diff --git a/src/frontends/pytorch/src/op/softmax.cpp b/src/frontends/pytorch/src/op/softmax.cpp
index 1b94a3560ad972..10c3afea7cda0b 100644
--- a/src/frontends/pytorch/src/op/softmax.cpp
+++ b/src/frontends/pytorch/src/op/softmax.cpp
@@ -13,7 +13,7 @@ namespace pytorch {
 namespace op {
 
 using namespace ov::op;
-OutputVector translate_softmax(NodeContext& context) {
+OutputVector translate_softmax(const NodeContext& context) {
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
     auto axis = context.const_input<int64_t>(1);
diff --git a/src/frontends/pytorch/src/op/sort.cpp b/src/frontends/pytorch/src/op/sort.cpp
index c0e54d54d9be2a..715a7a52d3efe8 100644
--- a/src/frontends/pytorch/src/op/sort.cpp
+++ b/src/frontends/pytorch/src/op/sort.cpp
@@ -9,7 +9,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_sort(NodeContext& context) {
+OutputVector translate_sort(const NodeContext& context) {
     num_inputs_check(context, 3, 4);
     const auto input_tensor = context.get_input(0);
     bool stable, descending;
@@ -40,7 +40,7 @@ OutputVector translate_sort(NodeContext& context) {
     return topk->outputs();
 };
 
-OutputVector translate_argsort(NodeContext& context) {
+OutputVector translate_argsort(const NodeContext& context) {
     auto sort = translate_sort(context);
     return {sort[1]};
 };
diff --git a/src/frontends/pytorch/src/op/square.cpp b/src/frontends/pytorch/src/op/square.cpp
index 7194aafd9abb36..2310fda75aa574 100644
--- a/src/frontends/pytorch/src/op/square.cpp
+++ b/src/frontends/pytorch/src/op/square.cpp
@@ -14,7 +14,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_square(NodeContext& context) {
+OutputVector translate_square(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     auto input_0 = context.get_input(0);
     auto const_2 = context.mark_node(v0::Constant::create(input_0.get_element_type(), Shape{1}, {2}));
diff --git a/src/frontends/pytorch/src/op/squeeze.cpp b/src/frontends/pytorch/src/op/squeeze.cpp
index dacf2c55a4d596..fb15801367a564 100644
--- a/src/frontends/pytorch/src/op/squeeze.cpp
+++ b/src/frontends/pytorch/src/op/squeeze.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_squeeze(NodeContext& context) {
+OutputVector translate_squeeze(const NodeContext& context) {
     num_inputs_check(context, 1, 2);
     auto x = context.get_input(0);
     if (context.input_is_none(1)) {
diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp
index fd449c12bbd2d3..94963ed9bdb61f 100644
--- a/src/frontends/pytorch/src/op/sub.cpp
+++ b/src/frontends/pytorch/src/op/sub.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_sub(NodeContext& context) {
+OutputVector translate_sub(const NodeContext& context) {
     num_inputs_check(context, 2, 3);
     auto x = context.get_input(0);
     auto y = context.get_input(1);
diff --git a/src/frontends/pytorch/src/op/sum.cpp b/src/frontends/pytorch/src/op/sum.cpp
index 3dc4601b1083a9..7a87dc0c507f6c 100644
--- a/src/frontends/pytorch/src/op/sum.cpp
+++ b/src/frontends/pytorch/src/op/sum.cpp
@@ -11,7 +11,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_sum(NodeContext& context) {
+OutputVector translate_sum(const NodeContext& context) {
     num_inputs_check(context, 1, 3);
     bool keep_dims = false;
     ov::Output<ov::Node> axes;
diff --git a/src/frontends/pytorch/src/op/to.cpp b/src/frontends/pytorch/src/op/to.cpp
index 6e5b0ebda639c4..2499b8346f5f02 100644
--- a/src/frontends/pytorch/src/op/to.cpp
+++ b/src/frontends/pytorch/src/op/to.cpp
@@ -16,7 +16,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_to(NodeContext& context) {
+OutputVector translate_to(const NodeContext& context) {
     int dtype_idx;
     int memory_format_idx;
     if (context.get_input_size() == 5) {
diff --git a/src/frontends/pytorch/src/op/topk.cpp b/src/frontends/pytorch/src/op/topk.cpp
index 26addb856c6445..06916c4ea03e2f 100644
--- a/src/frontends/pytorch/src/op/topk.cpp
+++ b/src/frontends/pytorch/src/op/topk.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_topk(NodeContext& context) {
+OutputVector translate_topk(const NodeContext& context) {
     num_inputs_check(context, 5, 5);
     const auto input_tensor = context.get_input(0);
     const auto largest = context.const_input<bool>(3);
diff --git a/src/frontends/pytorch/src/op/transpose.cpp b/src/frontends/pytorch/src/op/transpose.cpp
index 60fee576613374..9a6cddb3ffb896 100644
--- a/src/frontends/pytorch/src/op/transpose.cpp
+++ b/src/frontends/pytorch/src/op/transpose.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_transpose(NodeContext& context) {
+OutputVector translate_transpose(const NodeContext& context) {
     num_inputs_check(context, 3, 3);
     auto dim0 = context.const_input<int64_t>(1);
     auto dim1 = context.const_input<int64_t>(2);
diff --git a/src/frontends/pytorch/src/op/trilu.cpp b/src/frontends/pytorch/src/op/trilu.cpp
index 1726cf2f895956..1ef4d50fd6affa 100644
--- a/src/frontends/pytorch/src/op/trilu.cpp
+++ b/src/frontends/pytorch/src/op/trilu.cpp
@@ -60,11 +60,11 @@ OutputVector translate_base_triu_tril(const NodeContext& context, bool upper) {
 }
 };  // namespace
 
-OutputVector translate_triu(NodeContext& context) {
+OutputVector translate_triu(const NodeContext& context) {
     return translate_base_triu_tril(context, true);
 };
 
-OutputVector translate_tril(NodeContext& context) {
+OutputVector translate_tril(const NodeContext& context) {
     return translate_base_triu_tril(context, false);
 };
 
diff --git a/src/frontends/pytorch/src/op/unfold.cpp b/src/frontends/pytorch/src/op/unfold.cpp
index 949f7991391b7e..e7aa129b2935cc 100644
--- a/src/frontends/pytorch/src/op/unfold.cpp
+++ b/src/frontends/pytorch/src/op/unfold.cpp
@@ -13,7 +13,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-OutputVector translate_unfold(NodeContext& context) {
+OutputVector translate_unfold(const NodeContext& context) {
     num_inputs_check(context, 4, 4);
     // constants
     auto const_0 = context.mark_node(Constant::create(element::i32, Shape{}, {0}));
diff --git a/src/frontends/pytorch/src/op/upsample.cpp b/src/frontends/pytorch/src/op/upsample.cpp
index 111a07a28c70e9..484387b8f3931d 100644
--- a/src/frontends/pytorch/src/op/upsample.cpp
+++ b/src/frontends/pytorch/src/op/upsample.cpp
@@ -69,32 +69,32 @@ OutputVector base_translate_upsample(const NodeContext& context,
 };
 }  // namespace
 
-OutputVector translate_upsample_linear1d(NodeContext& context) {
+OutputVector translate_upsample_linear1d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 1);
 };
 
-OutputVector translate_upsample_bilinear2d(NodeContext& context) {
+OutputVector translate_upsample_bilinear2d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 2);
 };
 
-OutputVector translate_upsample_trilinear3d(NodeContext& context) {
+OutputVector translate_upsample_trilinear3d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 3);
 };
 
-OutputVector translate_upsample_nearest1d(NodeContext& context) {
+OutputVector translate_upsample_nearest1d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 1);
 };
 
-OutputVector translate_upsample_nearest2d(NodeContext& context) {
+OutputVector translate_upsample_nearest2d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 2);
 };
 
-OutputVector translate_upsample_nearest3d(NodeContext& context) {
+OutputVector translate_upsample_nearest3d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 3);
 };
 
 // bicubic is only supported for 2d in pytorch
-OutputVector translate_upsample_bicubic2d(NodeContext& context) {
+OutputVector translate_upsample_bicubic2d(const NodeContext& context) {
     return base_translate_upsample(context, v4::Interpolate::InterpolateMode::CUBIC, 2);
 };
 
diff --git a/src/frontends/pytorch/src/op/var_mean.cpp b/src/frontends/pytorch/src/op/var_mean.cpp
index 936038fecdcc2c..f021161722cd39 100644
--- a/src/frontends/pytorch/src/op/var_mean.cpp
+++ b/src/frontends/pytorch/src/op/var_mean.cpp
@@ -20,7 +20,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_var_mean(NodeContext& context) {
+OutputVector translate_var_mean(const NodeContext& context) {
     num_inputs_check(context, 1, 4);
     auto data = context.get_input(0);
     bool unbiased = true;
@@ -75,7 +75,7 @@ OutputVector translate_var_mean(NodeContext& context) {
     return {var, mean};
 };
 
-OutputVector translate_var(NodeContext& context) {
+OutputVector translate_var(const NodeContext& context) {
     auto res = translate_var_mean(context);
     return {res[0]};
 }
diff --git a/src/frontends/pytorch/src/op/where.cpp b/src/frontends/pytorch/src/op/where.cpp
index 454d23938a2c0c..4a9de9f69edab8 100644
--- a/src/frontends/pytorch/src/op/where.cpp
+++ b/src/frontends/pytorch/src/op/where.cpp
@@ -14,7 +14,7 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_where(NodeContext& context) {
+OutputVector translate_where(const NodeContext& context) {
     num_inputs_check(context, 1, 3);
     auto cond = context.get_input(0);
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(1), "aten::where(cond) unsupported");
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
index b3e54233f50feb..c42024fa36f4df 100644
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -12,7 +12,7 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
-#define OP_CONVERTER(op) OutputVector op(NodeContext& node)
+#define OP_CONVERTER(op) OutputVector op(const NodeContext& node)
 
 OP_CONVERTER(translate_adaptive_avg_pool3d);
 OP_CONVERTER(translate_adaptive_max_pool2d);
@@ -130,7 +130,7 @@ OP_CONVERTER(translate_zeros_like);
 
 }  // namespace op
 
-const std::map<std::string, PytorchCreatorFunction> get_supported_ops() {
+const std::map<std::string, CreatorFunction> get_supported_ops() {
     return {
         {"aten::__and__", op::translate_1to1_match_2_inputs<opset10::LogicalAnd>},  // TODO: cover numerical cases
         {"aten::__getitem__", op::translate_getitem},
diff --git a/src/frontends/pytorch/src/op_table.hpp b/src/frontends/pytorch/src/op_table.hpp
index 7a67c9101578b0..e15a988e98175b 100644
--- a/src/frontends/pytorch/src/op_table.hpp
+++ b/src/frontends/pytorch/src/op_table.hpp
@@ -10,7 +10,7 @@ namespace ov {
 namespace frontend {
 namespace pytorch {
 
-const std::map<std::string, PytorchCreatorFunction> get_supported_ops();
+const std::map<std::string, CreatorFunction> get_supported_ops();
 
 }  // namespace pytorch
 }  // namespace frontend
diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp
index 025a325eebf939..376b466c0a268c 100644
--- a/src/frontends/pytorch/src/translate_session.cpp
+++ b/src/frontends/pytorch/src/translate_session.cpp
@@ -20,7 +20,7 @@ namespace pytorch {
 using namespace ov::op;
 
 TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model,
-                                   const std::map<std::string, PytorchCreatorFunction>& translator_map)
+                                   const std::map<std::string, CreatorFunction>& translator_map)
     : m_input_model(input_model),
       m_translator_map(translator_map),
       m_ov_model(nullptr) {}
@@ -45,9 +45,9 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
     const std::unordered_map<size_t, PlaceDesc>& external_descriptors) {
     std::shared_ptr<Model> resulting_model;  // define here to make a conversion in a nested scope
     {
-        ParameterVector parameters;
-        TensorMap tensor_map;  // tensor map of the current context
-        std::set<size_t> mutated_tensors;
+        auto parameters = std::make_shared<ParameterVector>();
+        auto tensor_map = std::make_shared<TensorMap>();  // tensor map of the current context
+        auto mutated_tensors = std::make_shared<std::set<size_t>>();
 
         //  Go over all pytorch_model inputs and register them in the tensor map:
         auto inputs = pytorch_model->inputs();
@@ -74,7 +74,7 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
             if (!input_node) {
                 auto parameter = std::make_shared<v0::Parameter>(type, pshape);
                 encode_tensor_name(parameter->output(0), inputs.at(i), pytorch_model->get_input_debug_name(i));
-                parameters.push_back(parameter);
+                parameters->push_back(parameter);
                 input_node = parameter;
                 auto order = pytorch_model->get_input_transpose_order(i);
                 if (order.size() > 0 && !std::is_sorted(order.begin(), order.end())) {
@@ -91,7 +91,7 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
                     input_node = transpose;
                 }
             }
-            tensor_map[inputs.at(i)] = input_node;
+            (*tensor_map)[inputs.at(i)] = input_node;
         }
 
         auto node_visitor = [&](std::shared_ptr<TorchDecoder> node) {
@@ -102,7 +102,7 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
             auto raw_inputs = node->inputs();
             for (size_t i = 0; i < raw_inputs.size(); ++i) {
                 auto input = raw_inputs.at(i);
-                if (tensor_map.find(input) == tensor_map.end()) {
+                if (tensor_map->find(input) == tensor_map->end()) {
                     // Input refers value in the outer scope, need to create a new Parameter in the current scope
                     // Linkage to external scope will be performed on the level of the parent operation (if or loop)
                     // TODO: Eliminate duplication with the main code for Parameters creation
@@ -111,18 +111,15 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
                     // TODO: Use special API to set custom type specification
                     auto parameter = std::make_shared<v0::Parameter>(element::dynamic, ps);
                     // TODO: Missing get_input_transpose_order handling for not trivial layouts
-                    tensor_map[input] = parameter;
+                    (*tensor_map)[input] = parameter;
                     // set name of parameter to the index of node in the model
                     encode_tensor_name(parameter->output(0), input);
-                    parameters.push_back(parameter);
+                    parameters->push_back(parameter);
                 }
             }
-            auto context = NodeContext(node, &tensor_map, &parameters, external_tensor_map, this);
+            auto context = NodeContext(node, external_tensor_map, tensor_map, parameters, mutated_tensors, this);
             auto converted_outputs = convert_node(context);
 
-            auto mutated_t = context.get_mutated_tensors();
-            mutated_tensors.insert(mutated_t.begin(), mutated_t.end());
-
             auto fw_outputs = node->outputs();
             // Ops with subgraphs or with mutated inputs may have more outputs after conversion compared to pytorch ones
             FRONT_END_OP_CONVERSION_CHECK(fw_outputs.size() <= converted_outputs.size(),
@@ -134,10 +131,10 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
             // FIXME: Now it is not true for at least prim::Constant
             for (size_t i = 0; i < fw_outputs.size(); ++i) {
                 size_t fw_tensor_id = node->output(i);
-                FRONT_END_GENERAL_CHECK(tensor_map.find(fw_tensor_id) == tensor_map.end(),
+                FRONT_END_GENERAL_CHECK(tensor_map->find(fw_tensor_id) == tensor_map->end(),
                                         "Duplicated producer for PT value with unique ID: ",
                                         fw_tensor_id);
-                tensor_map[fw_tensor_id] = converted_outputs[i];
+                (*tensor_map)[fw_tensor_id] = converted_outputs[i];
                 encode_tensor_name(converted_outputs[i], fw_tensor_id, node->get_output_debug_name(i));
             }
         };
@@ -148,14 +145,14 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
         ResultVector results;
         for (size_t i = 0; i < pytorch_model->num_of_outputs(); ++i) {
             size_t id = pytorch_model->output(i);
-            if (tensor_map.find(id) == tensor_map.end()) {
+            if (tensor_map->find(id) == tensor_map->end()) {
                 // Not found in this scope, adding Parameter to connect to external scope
                 auto parameter = std::make_shared<v0::Parameter>(element::dynamic, PartialShape::dynamic());
                 encode_tensor_name(parameter->output(0), id);
-                parameters.push_back(parameter);
-                tensor_map[id] = parameter;
+                parameters->push_back(parameter);
+                (*tensor_map)[id] = parameter;
             }
-            auto ov_output = tensor_map[id];
+            auto ov_output = tensor_map->at(id);
             auto order = pytorch_model->get_output_transpose_order(i);
             FRONT_END_GENERAL_CHECK(order.size() == 0 || std::is_sorted(order.begin(), order.end()),
                                     "Output strides have wrong order.");
@@ -168,32 +165,32 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
 
         // Since parameters can be added we need to list all current parameters
         std::set<size_t> param_names;
-        for (const auto& param : parameters) {
+        for (const auto& param : *parameters) {
             auto input_idx = decode_tensor_name(param->output(0));
             param_names.insert(input_idx);
         }
-        for (const auto& tensor_id : mutated_tensors) {
+        for (const auto& tensor_id : *mutated_tensors) {
             if (param_names.count(tensor_id)) {
-                FRONT_END_GENERAL_CHECK(tensor_map.count(tensor_id),
+                FRONT_END_GENERAL_CHECK(tensor_map->count(tensor_id),
                                         "Tensor with id: ",
                                         tensor_id,
                                         " doesn't exist in tensor map.");
                 // model input was mutated we need to make a result for it
-                auto mutated_tensor = tensor_map.at(tensor_id);
+                auto mutated_tensor = tensor_map->at(tensor_id);
                 // empty external_tensor_map means this is main body of the model and we don't want to create
                 // additional outputs in that case.
                 if (mutated_tensor.get_target_inputs().empty() && !external_tensor_map.empty())
-                    results.push_back(std::make_shared<v0::Result>(tensor_map.at(tensor_id)));
+                    results.push_back(std::make_shared<v0::Result>(tensor_map->at(tensor_id)));
             }
         }
-        resulting_model = std::make_shared<Model>(results, parameters);
+        resulting_model = std::make_shared<Model>(results, *parameters);
         // Did a conversion in a nested scope to automatically remove any holders of nodes except those in the graph
     }
 
     return resulting_model;
 }
 
-OutputVector TranslateSession::convert_node(NodeContext& context) {
+OutputVector TranslateSession::convert_node(const NodeContext& context) {
     try {
         auto it = m_translator_map.find(context.get_op_type());
         if (it != m_translator_map.end()) {
diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp
index e33ea31c63091d..4931c274984485 100644
--- a/src/frontends/pytorch/src/translate_session.hpp
+++ b/src/frontends/pytorch/src/translate_session.hpp
@@ -17,7 +17,7 @@ namespace pytorch {
 class TranslateSession {
 public:
     TranslateSession(const frontend::InputModel::Ptr& input_model,
-                     const std::map<std::string, PytorchCreatorFunction>& translator_map);
+                     const std::map<std::string, CreatorFunction>& translator_map);
     std::shared_ptr<Model> get_converted_model();
     std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);
 
@@ -38,10 +38,10 @@ class TranslateSession {
     size_t m_friendly_name_counter = 0;
 
 private:
-    OutputVector convert_node(NodeContext& context);
+    OutputVector convert_node(const NodeContext& context);
 
     const frontend::InputModel::Ptr m_input_model;
-    const std::map<std::string, PytorchCreatorFunction>& m_translator_map;
+    const std::map<std::string, CreatorFunction>& m_translator_map;
 
     std::shared_ptr<Model> m_ov_model;
     std::map<size_t, std::pair<size_t, Output<Node>>> m_counter_map;
diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp
index dd7d1dad5db255..bdae3e9e75e397 100644
--- a/src/frontends/pytorch/src/utils.cpp
+++ b/src/frontends/pytorch/src/utils.cpp
@@ -177,7 +177,7 @@ std::shared_ptr<Node> concat_list_construct(std::shared_ptr<Node> input) {
     return input;
 }
 
-OutputVector make_framework_node(NodeContext& context) {
+OutputVector make_framework_node(const NodeContext& context) {
     auto schema = context.get_schema();
     // TODO: properly process schema to get the actual position of mutable input
     // Hack. Can indicate mutable inputs, but can it be reliable?
diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp
index 07235a2152d8b0..029b349c77bac2 100644
--- a/src/frontends/pytorch/src/utils.hpp
+++ b/src/frontends/pytorch/src/utils.hpp
@@ -48,7 +48,7 @@ op::PadType convert_pad(const std::string& pt_pad);
 
 std::shared_ptr<Node> concat_list_construct(std::shared_ptr<Node> input);
 
-OutputVector make_framework_node(NodeContext& context);
+OutputVector make_framework_node(const NodeContext& context);
 
 std::shared_ptr<op::util::FrameworkNode> cast_fw_node(std::shared_ptr<Node> node, const std::string& type);
 
@@ -63,8 +63,8 @@ void align_eltwise_input_types(const NodeContext& context,
 std::deque<Output<Node>> get_list_as_outputs(const Output<Node>& start);
 
 namespace op {
-template <OutputVector (*T)(NodeContext&), size_t idx = 0>
-OutputVector inplace_op(NodeContext& context) {
+template <OutputVector (*T)(const NodeContext&), size_t idx = 0>
+OutputVector inplace_op(const NodeContext& context) {
     auto translation_res = T(context);
     FRONT_END_OP_CONVERSION_CHECK(translation_res.size() == 1,
                                   "inplace_op function must be used on single output translators");
@@ -73,21 +73,21 @@ OutputVector inplace_op(NodeContext& context) {
 }
 
 template <typename T>
-OutputVector translate_1to1_match_1_inputs(NodeContext& context) {
+OutputVector translate_1to1_match_1_inputs(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0), "Input should not be None.");
     return {context.mark_node(std::make_shared<T>(context.get_input(0)))};
 }
 
 template <typename T>
-OutputVector translate_1to1_match_2_inputs(NodeContext& context) {
+OutputVector translate_1to1_match_2_inputs(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None.");
     return {context.mark_node(std::make_shared<T>(context.get_input(0), context.get_input(1)))};
 }
 
 template <typename T>
-OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) {
+OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None.");
     auto lhs = context.get_input(0);
@@ -96,11 +96,11 @@ OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) {
     return {context.mark_node(std::make_shared<T>(lhs, rhs))};
 }
 
-inline OutputVector return_false_scalar(NodeContext& context) {
+inline OutputVector return_false_scalar(const NodeContext& context) {
     return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))};
 }
 
-inline OutputVector skip_node(NodeContext& context) {
+inline OutputVector skip_node(const NodeContext& context) {
     return {context.get_input(0).get_node_shared_ptr()};
 }
 

From 98237b06b5b61a4df0b4ec2789a3cbb8804674fc Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 21 Mar 2023 08:52:52 +0400
Subject: [PATCH 002/296] [GPU] Update memory_statistics property impl (#16399)

---
 .../include/intel_gpu/plugin/plugin.hpp       |  6 +---
 src/plugins/intel_gpu/src/plugin/plugin.cpp   | 29 ++-----------------
 src/plugins/intel_gpu/src/runtime/engine.cpp  |  1 +
 3 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
index 36fad099a90320..9fad8f53b6a164 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
@@ -23,17 +23,13 @@ class Plugin : public InferenceEngine::IInferencePlugin {
     // key: device_id, value: cldnn device
     std::map<std::string, cldnn::device::ptr> device_map;
     std::map<std::string, ExecutionConfig> m_configs_map;
-    // key: cldnn context, value: memory statistics
-    mutable std::map<RemoteContextImpl::Ptr, std::map<std::string, uint64_t>> statistics_map;
-    mutable std::mutex engine_mutex;
 
-    mutable std::map<std::string, RemoteCLContext::Ptr> m_default_contexts;
+    std::map<std::string, RemoteCLContext::Ptr> m_default_contexts;
 
     InferenceEngine::CNNNetwork clone_and_transform_model(const InferenceEngine::CNNNetwork& network,
                                                           const ExecutionConfig& config) const;
     void transform_model(std::shared_ptr<ov::Model>& model, const ExecutionConfig& config) const;
     void register_primitives();
-    void update_memory_statistics(const RemoteContextImpl::Ptr& context) const;
     std::string get_device_id_from_config(const std::map<std::string, std::string>& config) const;
     std::string get_device_id(const std::map<std::string, std::string>& config) const;
     RemoteCLContext::Ptr get_default_context(const std::string& device_id) const;
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index ac366d192aa361..e68a9094f221dc 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -175,18 +175,6 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
     }
 };
 
-void Plugin::update_memory_statistics(const RemoteContextImpl::Ptr& context) const {
-    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::update_memory_statistics");
-    {
-        std::lock_guard<std::mutex> lock(engine_mutex);
-
-        // if the same context exists, the statistics is replaced with the latest one
-        // (currently, memory usage is accumulated for several networks in the same context)
-        // if it does not exist, a new statistics is added
-        statistics_map[context] = context->get_engine().get_memory_statistics();
-    }
-}
-
 IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
                                                            const std::map<std::string, std::string> &orig_config) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl");
@@ -208,7 +196,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine
     {
         OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork");
         CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config);
-        update_memory_statistics(context->get_impl());
         return exeNetwork;
     }
 }
@@ -542,7 +529,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr
         exeNetwork->setNetworkOutputs(outputs);
         exeNetwork->setInputs(new_params);
         exeNetwork->setOutputs(new_results);
-        update_memory_statistics(context->get_impl());
         return exeNetwork;
     }
 }
@@ -672,19 +658,8 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
     } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS) ||
                name == ov::intel_gpu::memory_statistics) {
-        std::map<std::string, uint64_t> statistics;
-        for (auto const &item : statistics_map) {
-            // Before collecting memory statistics of each context, it's updated with the latest memory statistics from engine.
-            update_memory_statistics(item.first);
-            for (auto const &kv : item.second) {
-                if (!statistics.count(kv.first)) {
-                    statistics[kv.first] = kv.second;
-                } else {
-                    statistics[kv.first] += kv.second;
-                }
-            }
-        }
-        return decltype(ov::intel_gpu::memory_statistics)::value_type {statistics};
+        const auto& ctx = get_default_context(device_id)->get_impl();
+        return decltype(ov::intel_gpu::memory_statistics)::value_type {ctx->get_engine().get_memory_statistics()};
     } else if (name == METRIC_KEY(MAX_BATCH_SIZE) ||
                name == ov::max_batch_size) {
         return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(get_max_batch_size(options))};
diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp
index ad5cc79cca3a26..9cabc43e876257 100644
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@@ -215,6 +215,7 @@ uint64_t engine::get_used_device_memory(allocation_type type) const {
 }
 
 std::map<std::string, uint64_t> engine::get_memory_statistics() const {
+    std::lock_guard<std::mutex> guard(_mutex);
     std::map<std::string, uint64_t> statistics;
     for (auto const& m : _memory_usage_map) {
         std::ostringstream oss;

From 5cb20f8858c60c016f404627a3768b8a387a709e Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 21 Mar 2023 08:54:48 +0400
Subject: [PATCH 003/296] [TF FE] Refactor StridedSlice translator and add
 layer test to precommit (#16376)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../src/op/strided_slice.cpp                  |  57 ++++++----
 .../tensorflow_tests/test_tf_StridedSlice.py  | 100 ++++++++++--------
 2 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/src/frontends/tensorflow_common/src/op/strided_slice.cpp b/src/frontends/tensorflow_common/src/op/strided_slice.cpp
index e0e7e6761bd8f9..e71097557e7e44 100644
--- a/src/frontends/tensorflow_common/src/op/strided_slice.cpp
+++ b/src/frontends/tensorflow_common/src/op/strided_slice.cpp
@@ -16,42 +16,55 @@ namespace tensorflow {
 namespace op {
 
 OutputVector translate_strided_slice_op(const NodeContext& node) {
+    default_op_checks(node, 4, {"StridedSlice", "STRIDED_SLICE"});
     auto input = node.get_input(0);
     auto begin = node.get_input(1);
     auto end = node.get_input(2);
     auto strides = node.get_input(3);
 
-    auto begin_mask = node.get_attribute<int64_t>("begin_mask", 0);
-    auto end_mask = node.get_attribute<int64_t>("end_mask", 0);
-    auto new_axis_mask = node.get_attribute<int64_t>("new_axis_mask", 0);
-    auto ellipsis_mask = node.get_attribute<int64_t>("ellipsis_mask", 0);
-    auto shrink_axis_mask = node.get_attribute<int64_t>("shrink_axis_mask", 0);
-
     auto mask_to_vector = [](int64_t mask) {
-        size_t length = sizeof(mask) * CHAR_BIT;
-        vector<int64_t> vec(length, 0);
         if (mask == 0) {
-            return vec;
+            return vector<int64_t>{};
         }
-        for (size_t i = 0; i < length; ++i) {
-            if (static_cast<unsigned char>(mask >> i & 0x1) == 1) {
+        size_t max_length = sizeof(mask) * CHAR_BIT;
+        vector<int64_t> vec{};
+        for (size_t i = 0; i < max_length; ++i) {
+            if ((mask >> i & 0x1) == 1) {
+                // resize the vector by appending with required number of zeros
+                vec.resize(i + 1, 0);
                 vec[i] = 1;
             }
         }
         return vec;
     };
 
-    auto res = make_shared<StridedSlice>(input,
-                                         begin,
-                                         end,
-                                         strides,
-                                         mask_to_vector(begin_mask),
-                                         mask_to_vector(end_mask),
-                                         mask_to_vector(new_axis_mask),
-                                         mask_to_vector(shrink_axis_mask),
-                                         mask_to_vector(ellipsis_mask));
-    set_node_name(node.get_name(), res);
-    return res->outputs();
+    // retrieve attributes for StridedSlice operation
+    auto begin_mask = mask_to_vector(node.get_attribute<int64_t>("begin_mask", 0));
+    auto end_mask = mask_to_vector(node.get_attribute<int64_t>("end_mask", 0));
+    auto new_axis_mask = mask_to_vector(node.get_attribute<int64_t>("new_axis_mask", 0));
+    auto ellipsis_mask = mask_to_vector(node.get_attribute<int64_t>("ellipsis_mask", 0));
+    auto shrink_axis_mask = mask_to_vector(node.get_attribute<int64_t>("shrink_axis_mask", 0));
+
+    // the masks can be of different length and we need to align them by the maximum length
+    size_t max_length = std::max(
+        {begin_mask.size(), end_mask.size(), new_axis_mask.size(), ellipsis_mask.size(), shrink_axis_mask.size()});
+    begin_mask.resize(max_length, 0);
+    end_mask.resize(max_length, 0);
+    new_axis_mask.resize(max_length, 0);
+    ellipsis_mask.resize(max_length, 0);
+    shrink_axis_mask.resize(max_length, 0);
+
+    auto strided_slice = make_shared<StridedSlice>(input,
+                                                   begin,
+                                                   end,
+                                                   strides,
+                                                   begin_mask,
+                                                   end_mask,
+                                                   new_axis_mask,
+                                                   shrink_axis_mask,
+                                                   ellipsis_mask);
+    set_node_name(node.get_name(), strided_slice);
+    return {strided_slice};
 }
 
 }  // namespace op
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py b/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py
index dac01d739186e0..eb8afc65019cda 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py
@@ -7,61 +7,72 @@
 
 
 class TestStridedSlice(CommonTFLayerTest):
-
-    @staticmethod
-    def create_strided_slice_net(input_shape, begin, end, strides, begin_mask, end_mask,
+    def create_strided_slice_net(self, input_shape, begin_value, end_value, strides_value, begin_mask, end_mask,
                                  ellipsis_mask,
-                                 new_axis_mask, shrink_axis_mask, ir_version, use_new_frontend):
-
+                                 new_axis_mask, shrink_axis_mask):
         import tensorflow as tf
-
         tf.compat.v1.reset_default_graph()
 
         with tf.compat.v1.Session() as sess:
-            input_node = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input')
-            strided_slice = tf.compat.v1.strided_slice(input_node, begin=begin, end=end,
-                                                       strides=strides,
-                                                       begin_mask=begin_mask, end_mask=end_mask,
-                                                       ellipsis_mask=ellipsis_mask,
-                                                       new_axis_mask=new_axis_mask,
-                                                       shrink_axis_mask=shrink_axis_mask)
+            input = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input')
+            begin = tf.constant(begin_value, dtype=tf.int32)
+            end = tf.constant(end_value, dtype=tf.int32)
+            strides = tf.constant(strides_value, dtype=tf.int32)
+            tf.raw_ops.StridedSlice(input=input, begin=begin, end=end, strides=strides, begin_mask=begin_mask,
+                                    end_mask=end_mask, ellipsis_mask=ellipsis_mask, new_axis_mask=new_axis_mask,
+                                    shrink_axis_mask=shrink_axis_mask)
             tf.compat.v1.global_variables_initializer()
+
             tf_net = sess.graph_def
 
-        ref_net = None
-        return tf_net, ref_net
+        return tf_net, None
+
+    test_basic_data = [
+        dict(input_shape=[2, 5, 4, 3], begin_value=[1, 0, 2, 0], end_value=[2, 5, 4, 2], strides_value=[1, 2, 1, 1],
+             begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1),
+        dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 2, 3, 1],
+             begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=8, shrink_axis_mask=0),
+        dict(input_shape=[3, 4, 5, 7], begin_value=[2, 0, 3], end_value=[3, 0, 6], strides_value=[1, 1, 1],
+             begin_mask=6, end_mask=6, ellipsis_mask=2, new_axis_mask=0, shrink_axis_mask=1),
+    ]
+
+    @pytest.mark.parametrize('params', test_basic_data)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_strided_slice_basic(self, params, ie_device, precision, ir_version,
+                                 temp_dir, use_new_frontend, use_old_api):
+        self._test(*self.create_strided_slice_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
 
     test_squeeze_data = [
-        dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0,
+        dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1),
-        dict(input_shape=[5, 1], begin=[0, 0], end=[5, 1], strides=[1, 1], begin_mask=0,
+        dict(input_shape=[5, 1], begin_value=[0, 0], end_value=[5, 1], strides_value=[1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2),
-        dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1),
-        dict(input_shape=[1, 1, 3], begin=[0, 0, 0], end=[1, 1, 3], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 1, 3], begin_value=[0, 0, 0], end_value=[1, 1, 3], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2),
-        dict(input_shape=[1, 5, 1], begin=[0, 0, 0], end=[1, 5, 1], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 5, 1], begin_value=[0, 0, 0], end_value=[1, 5, 1], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=4),
-        pytest.param(dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1],
-                          begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1),
-                     marks=pytest.mark.precommit_tf_fe),
-        dict(input_shape=[1, 1, 5, 3], begin=[0, 0, 0, 0], end=[1, 1, 5, 3], strides=[1, 1, 1, 1],
+        dict(input_shape=[1, 1, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 1, 5, 3], strides_value=[1, 1, 1, 1],
              begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2),
-        dict(input_shape=[1, 5, 1, 3], begin=[0, 0, 0, 0], end=[1, 5, 1, 3], strides=[1, 1, 1, 1],
+        dict(input_shape=[1, 5, 1, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 1, 3], strides_value=[1, 1, 1, 1],
              begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=4),
-        dict(input_shape=[1, 5, 5, 1], begin=[0, 0, 0, 0], end=[1, 5, 1, 1], strides=[1, 1, 1, 1],
+        dict(input_shape=[1, 5, 5, 1], begin_value=[0, 0, 0, 0], end_value=[1, 5, 1, 1], strides_value=[1, 1, 1, 1],
              begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=8),
-        dict(input_shape=[1, 1, 5, 5, 3], begin=[0, 0, 0, 0, 0], end=[1, 1, 5, 5, 3],
-             strides=[1, 1, 1, 1, 1],
+        dict(input_shape=[1, 1, 5, 5, 3], begin_value=[0, 0, 0, 0, 0], end_value=[1, 1, 5, 5, 3],
+             strides_value=[1, 1, 1, 1, 1],
              begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=3),
-        dict(input_shape=[1, 5, 1, 5, 3], begin=[0, 0, 0, 0, 0], end=[1, 5, 1, 5, 3],
-             strides=[1, 1, 1, 1, 1],
+        dict(input_shape=[1, 5, 1, 5, 3], begin_value=[0, 0, 0, 0, 0], end_value=[1, 5, 1, 5, 3],
+             strides_value=[1, 1, 1, 1, 1],
              begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=5),
-        dict(input_shape=[1, 5, 1, 5, 1], begin=[0, 0, 0, 0, 0], end=[1, 5, 1, 5, 1],
-             strides=[1, 1, 1, 1, 1],
+        dict(input_shape=[1, 5, 1, 5, 1], begin_value=[0, 0, 0, 0, 0], end_value=[1, 5, 1, 5, 1],
+             strides_value=[1, 1, 1, 1, 1],
              begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=21),
     ]
 
@@ -69,32 +80,28 @@ def create_strided_slice_net(input_shape, begin, end, strides, begin_mask, end_m
     @pytest.mark.nightly
     def test_strided_slice_replace_with_squeeze(self, params, ie_device, precision, ir_version,
                                                 temp_dir, use_new_frontend, use_old_api):
-        self._test(*self.create_strided_slice_net(**params, ir_version=ir_version,
-                                                  use_new_frontend=use_new_frontend),
+        self._test(*self.create_strided_slice_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_new_frontend=use_new_frontend, use_old_api=use_old_api)
 
     test_unsqueeze_data = [
-        dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0,
+        dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=1, shrink_axis_mask=0),
-        dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0,
+        dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=3, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=3, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=4, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0,
+        dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=5, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1],
-             begin_mask=0,
-             end_mask=0, ellipsis_mask=0, new_axis_mask=8, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1],
+        dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 1, 1, 1],
              begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=4, shrink_axis_mask=0),
-        dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1],
+        dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 1, 1, 1],
              begin_mask=0,
              end_mask=0, ellipsis_mask=0, new_axis_mask=2, shrink_axis_mask=0),
-        dict(input_shape=[16, 4, 64], begin=[0, 0, 0, 0], end=[0, 0, 0, 0], strides=[1, 1, 1, 1],
+        dict(input_shape=[16, 4, 64], begin_value=[0, 0, 0, 0], end_value=[0, 0, 0, 0], strides_value=[1, 1, 1, 1],
              begin_mask=19,
              end_mask=19, ellipsis_mask=0, new_axis_mask=12, shrink_axis_mask=0),
     ]
@@ -103,7 +110,6 @@ def test_strided_slice_replace_with_squeeze(self, params, ie_device, precision,
     @pytest.mark.nightly
     def test_strided_slice_replace_with_unsqueeze(self, params, ie_device, precision, ir_version,
                                                   temp_dir, use_new_frontend, use_old_api):
-        self._test(*self.create_strided_slice_net(**params, ir_version=ir_version,
-                                                  use_new_frontend=use_new_frontend),
+        self._test(*self.create_strided_slice_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_new_frontend=use_new_frontend, use_old_api=use_old_api)

From 60436dee5ab9a5598656ba60181e53dc51283bb7 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 21 Mar 2023 10:52:45 +0400
Subject: [PATCH 004/296] Updated AsyncInferRequest documentation + leftovers
 (#16420)

---
 docs/IE_PLUGIN_DG/AsyncInferRequest.md | 42 ++++++++++++--------------
 docs/IE_PLUGIN_DG/CompiledModel.md     |  2 +-
 docs/IE_PLUGIN_DG/InferRequest.md      |  4 +--
 docs/IE_PLUGIN_DG/Intro.md             |  6 ++--
 docs/IE_PLUGIN_DG/Plugin.md            |  2 +-
 docs/IE_PLUGIN_DG/PluginTesting.md     |  2 +-
 docs/IE_PLUGIN_DG/QuantizedNetworks.md |  8 ++---
 docs/IE_PLUGIN_DG/detailed_guides.md   |  4 +--
 8 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/docs/IE_PLUGIN_DG/AsyncInferRequest.md b/docs/IE_PLUGIN_DG/AsyncInferRequest.md
index d8f45d528b22c9..e45a5799deea8b 100644
--- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md
+++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md
@@ -1,49 +1,45 @@
-# Asynchronous Inference Request {#openvino_docs_ie_plugin_dg_async_infer_request}
+# Asynchronous Inference Request {#openvino_docs_ov_plugin_dg_async_infer_request}
 
 Asynchronous Inference Request runs an inference pipeline asynchronously in one or several task executors depending on a device pipeline structure.
-OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class:
+OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class:
 
-- The class has the `_pipeline` field of `std::vector<std::pair<ITaskExecutor::Ptr, Task> >`, which contains pairs of an executor and executed task.
+- The class has the `m_pipeline` field of `std::vector<std::pair<std::shared_ptr<ov::threading::ITaskExecutor>, ov::threading::Task> >`, which contains pairs of an executor and executed task.
 - All executors are passed as arguments to a class constructor and they are in the running state and ready to run tasks.
-- The class has the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method, which waits for `_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the executable network instance and are not destroyed.
+- The class has the ov::IAsyncInferRequest::stop_and_wait method, which waits for `m_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the compiled model instance and are not destroyed.
 
-`AsyncInferRequest` Class
+AsyncInferRequest Class
 ------------------------
 
-OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation:
+OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class for a custom asynchronous inference request implementation:
 
 @snippet src/async_infer_request.hpp async_infer_request:header
 
-#### Class Fields
+### Class Fields
 
-- `_inferRequest` - a reference to the [synchronous inference request](@ref openvino_docs_ov_plugin_dg_infer_request) implementation. Its methods are reused in the `AsyncInferRequest` constructor to define a device pipeline.
-- `_waitExecutor` - a task executor that waits for a response from a device about device tasks completion
+- `m_wait_executor` - a task executor that waits for a response from a device about device tasks completion
 
-> **NOTE**: If a plugin can work with several instances of a device, `_waitExecutor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel.
+> **NOTE**: If a plugin can work with several instances of a device, `m_wait_executor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel.
 
-### `AsyncInferRequest()`
+### AsyncInferRequest()
 
-The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `_pipeline`. The example below demonstrates `_pipeline` creation with the following stages:
+The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `m_pipeline`. The example below demonstrates `m_pipeline` creation with the following stages:
 
-- `inferPreprocess` is a CPU compute task.
-- `startPipeline` is a CPU ligthweight task to submit tasks to a remote device.
-- `waitPipeline` is a CPU non-compute task that waits for a response from a remote device.
-- `inferPostprocess` is a CPU compute task.
+- `infer_preprocess_and_start_pipeline` is a CPU ligthweight task to submit tasks to a remote device.
+- `wait_pipeline` is a CPU non-compute task that waits for a response from a remote device.
+- `infer_postprocess` is a CPU compute task.
 
 @snippet src/async_infer_request.cpp async_infer_request:ctor
 
 The stages are distributed among two task executors in the following way:
 
-- `inferPreprocess` and `startPipeline` are combined into a single task and run on `_requestExecutor`, which computes CPU tasks.
+- `infer_preprocess_and_start_pipeline` prepare input tensors and run on `m_request_executor`, which computes CPU tasks.
 - You need at least two executors to overlap compute tasks of a CPU and a remote device the plugin works with. Otherwise, CPU and device tasks are executed serially one by one.
-- `waitPipeline` is sent to `_waitExecutor`, which works with the device.
+- `wait_pipeline` is sent to `m_wait_executor`, which works with the device.
 
-> **NOTE**: `callbackExecutor` is also passed to the constructor and it is used in the base InferenceEngine::AsyncInferRequestThreadSafeDefault class, which adds a pair of `callbackExecutor` and a callback function set by the user to the end of the pipeline.
+> **NOTE**: `m_callback_executor` is also passed to the constructor and it is used in the base ov::IAsyncInferRequest class, which adds a pair of `callback_executor` and a callback function set by the user to the end of the pipeline.
 
-Inference request stages are also profiled using IE_PROFILING_AUTO_SCOPE, which shows how pipelines of multiple asynchronous inference requests are run in parallel via the [Intel® VTune™ Profiler](https://software.intel.com/en-us/vtune) tool.
+### ~AsyncInferRequest()
 
-### `~AsyncInferRequest()`
-
-In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method of the base class.
+In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the ov::IAsyncInferRequest::stop_and_wait method of the base class.
 
 @snippet src/async_infer_request.cpp async_infer_request:dtor
diff --git a/docs/IE_PLUGIN_DG/CompiledModel.md b/docs/IE_PLUGIN_DG/CompiledModel.md
index 45d990101d132a..fa80ee81cb75ab 100644
--- a/docs/IE_PLUGIN_DG/CompiledModel.md
+++ b/docs/IE_PLUGIN_DG/CompiledModel.md
@@ -54,7 +54,7 @@ The method creates an synchronous inference request and returns it.
 While the public OpenVINO API has a single interface for inference request, which can be executed in synchronous and asynchronous modes, a plugin library implementation has two separate classes:
 
 - [Synchronous inference request](@ref openvino_docs_ov_plugin_dg_infer_request), which defines pipeline stages and runs them synchronously in the `infer` method.
-- [Asynchronous inference request](@ref openvino_docs_ie_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages:
+- [Asynchronous inference request](@ref openvino_docs_ov_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages:
    - For single-stage pipelines, there is no need to define this method and create a class derived from ov::IAsyncInferRequest. For single stage pipelines, a default implementation of this method creates ov::IAsyncInferRequest wrapping a synchronous inference request and runs it asynchronously in the `m_request_executor` executor.
    - For pipelines with multiple stages, such as performing some preprocessing on host, uploading input data to a device, running inference on a device, or downloading and postprocessing output data, schedule stages on several task executors to achieve better device use and performance. You can do it by creating a sufficient number of inference requests running in parallel. In this case, device stages of different inference requests are overlapped with preprocessing and postprocessing stage giving better performance.
    > **IMPORTANT**: It is up to you to decide how many task executors you need to optimally execute a device pipeline.
diff --git a/docs/IE_PLUGIN_DG/InferRequest.md b/docs/IE_PLUGIN_DG/InferRequest.md
index bd08278e90cd0c..b40f23221fe90f 100644
--- a/docs/IE_PLUGIN_DG/InferRequest.md
+++ b/docs/IE_PLUGIN_DG/InferRequest.md
@@ -2,7 +2,7 @@
 
 `InferRequest` class functionality:
 - Allocate input and output tensors needed for a backend-dependent network inference.
-- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) implementation.
+- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) implementation.
 - Call inference stages one by one synchronously.
 
 InferRequest Class
@@ -81,4 +81,4 @@ The method returns the profiling info which was measured during pipeline stages
 
 @snippet src/sync_infer_request.cpp infer_request:get_profiling_info
 
-The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) class.
+The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) class.
diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md
index d1a6a12e104036..ed3d101ea4a6bc 100644
--- a/docs/IE_PLUGIN_DG/Intro.md
+++ b/docs/IE_PLUGIN_DG/Intro.md
@@ -10,7 +10,7 @@
    Implement Plugin Functionality <openvino_docs_ov_plugin_dg_plugin>
    Implement Compiled Model Functionality <openvino_docs_ov_plugin_dg_compiled_model>
    Implement Synchronous Inference Request <openvino_docs_ov_plugin_dg_infer_request>
-   Implement Asynchronous Inference Request <openvino_docs_ie_plugin_dg_async_infer_request>
+   Implement Asynchronous Inference Request <openvino_docs_ov_plugin_dg_async_infer_request>
    Implement Remote Context <openvino_docs_ov_plugin_dg_remote_context>
    Implement Remote Tensor <openvino_docs_ov_plugin_dg_remote_tensor>
    openvino_docs_ov_plugin_dg_plugin_build
@@ -43,7 +43,7 @@ OpenVINO plugin dynamic library consists of several main components:
 3. [Inference Request class](@ref openvino_docs_ov_plugin_dg_infer_request):
     - Runs an inference pipeline serially.
     - Can extract performance counters for an inference pipeline execution profiling.
-4. [Asynchronous Inference Request class](@ref openvino_docs_ie_plugin_dg_async_infer_request):
+4. [Asynchronous Inference Request class](@ref openvino_docs_ov_plugin_dg_async_infer_request):
     - Wraps the [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class and runs pipeline stages in parallel on several task executors based on a device-specific pipeline structure.
 5. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context):
     - Provides the device specific remote context. Context allows to create remote tensors.
@@ -61,7 +61,7 @@ Detailed guides
 
 * [Build](@ref openvino_docs_ov_plugin_dg_plugin_build) a plugin library using CMake
 * Plugin and its components [testing](@ref openvino_docs_ov_plugin_dg_plugin_testing)
-* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks)
+* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models)
 * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide
 * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide
 
diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md
index 2fa02a009e8056..96326fabcb574a 100644
--- a/docs/IE_PLUGIN_DG/Plugin.md
+++ b/docs/IE_PLUGIN_DG/Plugin.md
@@ -85,7 +85,7 @@ Actual model compilation is done in the `CompiledModel` constructor. Refer to th
 
 The function accepts a const shared pointer to `ov::Model` object and applies common and device-specific transformations on a copied model to make it more friendly to hardware operations. For details how to write custom device-specific transformation, please, refer to [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide. See detailed topics about model representation:
     * [Intermediate Representation and Operation Sets](@ref openvino_docs_MO_DG_IR_and_opsets)
-    * [Quantized models](@ref openvino_docs_ie_plugin_dg_quantized_networks).
+    * [Quantized models](@ref openvino_docs_ov_plugin_dg_quantized_models).
 
 @snippet template/src/plugin.cpp plugin:transform_model
 
diff --git a/docs/IE_PLUGIN_DG/PluginTesting.md b/docs/IE_PLUGIN_DG/PluginTesting.md
index 6a0cecfb05a1c4..ca19d5ea2fbbae 100644
--- a/docs/IE_PLUGIN_DG/PluginTesting.md
+++ b/docs/IE_PLUGIN_DG/PluginTesting.md
@@ -8,7 +8,7 @@ OpenVINO Plugin tests are included in the `openvino::funcSharedTests` CMake targ
 
 Test definitions are split into tests class declaration (see `src/tests/functional/plugin/shared/include`) and tests class implementation (see `src/tests/functional/plugin/shared/src`) and include the following scopes of plugin conformance tests:
 
-1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple executable networks support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters.
+1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple compiled models support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters.
 
 2. **Single layer tests** (`single_layer_tests` sub-folder). This groups of tests checks that a particular single layer can be inferenced on a device. An example of test instantiation based on test definition from `openvino::funcSharedTests` library:
 
diff --git a/docs/IE_PLUGIN_DG/QuantizedNetworks.md b/docs/IE_PLUGIN_DG/QuantizedNetworks.md
index 57deb94281de05..f3c712e2f618b5 100644
--- a/docs/IE_PLUGIN_DG/QuantizedNetworks.md
+++ b/docs/IE_PLUGIN_DG/QuantizedNetworks.md
@@ -1,8 +1,8 @@
-# Quantized networks compute and restrictions {#openvino_docs_ie_plugin_dg_quantized_networks}
+# Quantized models compute and restrictions {#openvino_docs_ov_plugin_dg_quantized_models}
 
-One of the feature of Inference Engine is the support of quantized networks with different precisions: INT8, INT4, etc.
+One of the feature of OpenVINO is the support of quantized models with different precisions: INT8, INT4, etc.
 However, it is up to the plugin to define what exact precisions are supported by the particular HW.
-All quantized networks which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. 
+All quantized models which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. 
 For more details about low-precision model representation please refer to this [document](@ref openvino_docs_ie_plugin_dg_lp_representation).
 
 ### Interpreting FakeQuantize at runtime
@@ -44,6 +44,6 @@ Below we define these rules as follows:
 - Per-channel quantization of activations for channel-wise and element-wise operations, e.g. Depthwise Convolution, Eltwise Add/Mul, ScaleShift.
 - Symmetric and asymmetric quantization of weights and activations with the support of per-channel scales and zero-points.
 - Non-unified quantization parameters for Eltwise and Concat operations.  
-- Non-quantized network output, i.e. there are no quantization parameters for it.
+- Non-quantized models output, i.e. there are no quantization parameters for it.
 
 [qdq_propagation]: images/qdq_propagation.png
diff --git a/docs/IE_PLUGIN_DG/detailed_guides.md b/docs/IE_PLUGIN_DG/detailed_guides.md
index 934c53cc1e2ca5..2076afb0d94447 100644
--- a/docs/IE_PLUGIN_DG/detailed_guides.md
+++ b/docs/IE_PLUGIN_DG/detailed_guides.md
@@ -6,13 +6,13 @@
    :maxdepth: 1
    :hidden:
 
-   openvino_docs_ie_plugin_dg_quantized_networks
+   openvino_docs_ov_plugin_dg_quantized_models
    openvino_docs_OV_UG_lpt
 
 @endsphinxdirective
 
 The guides below provides extra information about specific features of OpenVINO needed for understanding during OpenVINO plugin development:
 
-* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks)
+* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models)
 * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide
 * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide

From 82a992b95d2264b8c26c5d3af6feb1d877bcf3ac Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 21 Mar 2023 12:31:10 +0400
Subject: [PATCH 005/296] [TF FE] Fix leftovers from code review (#16422)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow_common/src/op/strided_slice.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/frontends/tensorflow_common/src/op/strided_slice.cpp b/src/frontends/tensorflow_common/src/op/strided_slice.cpp
index e71097557e7e44..50ceb763a80658 100644
--- a/src/frontends/tensorflow_common/src/op/strided_slice.cpp
+++ b/src/frontends/tensorflow_common/src/op/strided_slice.cpp
@@ -27,9 +27,10 @@ OutputVector translate_strided_slice_op(const NodeContext& node) {
             return vector<int64_t>{};
         }
         size_t max_length = sizeof(mask) * CHAR_BIT;
-        vector<int64_t> vec{};
+        vector<int64_t> vec;
+        vec.reserve(max_length);
         for (size_t i = 0; i < max_length; ++i) {
-            if ((mask >> i & 0x1) == 1) {
+            if (((mask >> i) & 0x1) == 1) {
                 // resize the vector by appending with required number of zeros
                 vec.resize(i + 1, 0);
                 vec[i] = 1;

From 63797db257f1a1130a59e17470a04aa49ad85114 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Tue, 21 Mar 2023 10:02:37 +0100
Subject: [PATCH 006/296] Review ROIPooling class for shape inference aspects
 (#16403)

* Review ROIPooling class
- check interval shape and label propagation
- add template shape_infer
- add shape infer into cpu plugin
- add test with StaticShape

* Use get_output_roi instead of get_output_size

* Add missing includes
---
 src/core/include/openvino/op/roi_pooling.hpp  |  20 +-
 .../include/roi_pooling_shape_inference.hpp   | 107 +++++++++
 src/core/src/op/roi_pooling.cpp               | 108 +++------
 src/core/tests/type_prop/roi_pooling.cpp      | 218 +++++++++++-------
 src/core/tests/visitors/op/roi_pooling.cpp    |   2 +-
 .../intel_cpu/src/nodes/roi_pooling.cpp       |   6 +-
 .../utils/shape_inference/shape_inference.cpp |   7 +-
 .../roi_pooling_shape_inference_test.cpp      |  74 ++++++
 8 files changed, 385 insertions(+), 157 deletions(-)
 create mode 100644 src/core/shape_inference/include/roi_pooling_shape_inference.hpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp

diff --git a/src/core/include/openvino/op/roi_pooling.hpp b/src/core/include/openvino/op/roi_pooling.hpp
index b0b04648d7b3ea..57799954a7641f 100644
--- a/src/core/include/openvino/op/roi_pooling.hpp
+++ b/src/core/include/openvino/op/roi_pooling.hpp
@@ -34,12 +34,30 @@ class OPENVINO_API ROIPooling : public Op {
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
+    /// \brief Set the output ROI feature map (pooled_h, pooled_w).
+    /// \param output_size Shape with pooling attributes pooled_h and pooled_w sizes.
+    void set_output_roi(Shape output_size);
+
+    /// \brief Get the output ROI feature map shape (H x W)
+    /// \return Shape with pooled_h and pooled_w attributes.
+    const Shape& get_output_roi() const;
+
+    OPENVINO_DEPRECATED("Use 'get_output_roi' instead. Use of this member can be ambiguous with Node base "
+                        "'get_output_size' which return number of outputs.")
     const Shape& get_output_size() const {
         return m_output_size;
     }
+
+    /// \brief Set the spatial scale value.
+    /// \param scale Scale value to set.
+    void set_spatial_scale(float scale);
     float get_spatial_scale() const {
         return m_spatial_scale;
     }
+
+    /// \brief Set the method of pooling
+    /// \param method_name Pooling method name.
+    void set_method(std::string method_name);
     const std::string& get_method() const {
         return m_method;
     }
@@ -47,7 +65,7 @@ class OPENVINO_API ROIPooling : public Op {
 
 private:
     Shape m_output_size{0, 0};
-    float m_spatial_scale{0};
+    float m_spatial_scale{0.0f};
     std::string m_method = "max";
 };
 }  // namespace v0
diff --git a/src/core/shape_inference/include/roi_pooling_shape_inference.hpp b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp
new file mode 100644
index 00000000000000..1568ce3cbe960c
--- /dev/null
+++ b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cmath>
+
+#include "compare.hpp"
+#include "dimension_util.hpp"
+#include "openvino/op/roi_pooling.hpp"
+
+namespace ov {
+namespace op {
+namespace pooling {
+namespace validate {
+template <class TROIPooling, class TShape>
+void rois_input_shape(const TROIPooling* op, const TShape rois_shape) {
+    if (rois_shape.rank().is_static()) {
+        NODE_VALIDATION_CHECK(op,
+                              rois_shape.size() == 2,
+                              "Expected a 2D tensor for the ROIs input with box coordinates. Got: ",
+                              rois_shape);
+
+        NODE_VALIDATION_CHECK(op,
+                              rois_shape[1].compatible(5),
+                              "The second dimension of ROIs input should contain batch id and box coordinates. ",
+                              "This dimension is expected to be equal to 5. Got: ",
+                              rois_shape[1]);
+    }
+}
+
+template <class TROIPooling>
+void output_roi_attr(const TROIPooling* op) {
+    const auto& out_roi = op->get_output_roi();
+
+    NODE_VALIDATION_CHECK(op,
+                          out_roi.size() == 2,
+                          "The dimension of pooled size is expected to be equal to 2. Got: ",
+                          out_roi.size());
+
+    NODE_VALIDATION_CHECK(op,
+                          std::none_of(out_roi.cbegin(), out_roi.cend(), cmp::Less<size_t>(1)),
+                          "Pooled size attributes pooled_h and pooled_w should should be positive integers. Got: ",
+                          out_roi[0],
+                          " and: ",
+                          out_roi[1],
+                          "respectively");
+}
+
+template <class TROIPooling>
+void scale_attr(const TROIPooling* op) {
+    const auto scale = op->get_spatial_scale();
+    NODE_VALIDATION_CHECK(op,
+                          std::isnormal(scale) && !std::signbit(scale),
+                          "The spatial scale attribute should be a positive floating point number. Got: ",
+                          scale);
+}
+
+template <class TROIPooling>
+void method_attr(const TROIPooling* op) {
+    const auto& method = op->get_method();
+    NODE_VALIDATION_CHECK(op,
+                          method == "max" || method == "bilinear",
+                          "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ",
+                          method);
+}
+}  // namespace validate
+}  // namespace pooling
+
+namespace v0 {
+template <class TShape>
+std::vector<TShape> shape_infer(const ROIPooling* op, const std::vector<TShape>& input_shapes) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    using namespace ov::util;
+
+    const auto& feat_shape = input_shapes[0];
+    const auto& rois_shape = input_shapes[1];
+    const auto& feat_rank = feat_shape.rank();
+
+    NODE_VALIDATION_CHECK(op,
+                          feat_rank.compatible(4),
+                          "Expected a 4D tensor for the feature maps input. Got: ",
+                          feat_shape);
+
+    pooling::validate::rois_input_shape(op, rois_shape);
+    pooling::validate::output_roi_attr(op);
+    pooling::validate::scale_attr(op);
+    pooling::validate::method_attr(op);
+
+    TShape out_shape;
+    out_shape.reserve(4);
+
+    out_shape.emplace_back(rois_shape.rank().is_static() ? rois_shape[0] : dim::inf_bound);
+    out_shape.emplace_back(feat_rank.is_static() ? feat_shape[1] : dim::inf_bound);
+    std::copy(op->get_output_roi().cbegin(), op->get_output_roi().cend(), std::back_inserter(out_shape));
+
+    return {out_shape};
+}
+
+template <class TShape>
+void shape_infer(const ROIPooling* op, const std::vector<TShape>& input_shapes, std::vector<TShape>& output_shapes) {
+    output_shapes = shape_infer(op, input_shapes);
+}
+}  // namespace v0
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/src/op/roi_pooling.cpp b/src/core/src/op/roi_pooling.cpp
index d0baa803933db5..00ee8dacf46447 100644
--- a/src/core/src/op/roi_pooling.cpp
+++ b/src/core/src/op/roi_pooling.cpp
@@ -2,18 +2,22 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/roi_pooling.hpp"
+#include "openvino/op/roi_pooling.hpp"
 
 #include "itt.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "roi_pooling_shape_inference.hpp"
 
 using namespace std;
-using namespace ngraph;
 
-op::ROIPooling::ROIPooling(const Output<Node>& input,
-                           const Output<Node>& coords,
-                           const ov::Shape& output_size,
-                           const float spatial_scale,
-                           const string& method)
+namespace ov {
+namespace op {
+namespace v0 {
+ROIPooling::ROIPooling(const Output<Node>& input,
+                       const Output<Node>& coords,
+                       const ov::Shape& output_size,
+                       const float spatial_scale,
+                       const string& method)
     : Op({input, coords}),
       m_output_size(output_size),
       m_spatial_scale(spatial_scale),
@@ -21,10 +25,10 @@ op::ROIPooling::ROIPooling(const Output<Node>& input,
     constructor_validate_and_infer_types();
 }
 
-void op::ROIPooling::validate_and_infer_types() {
+void ROIPooling::validate_and_infer_types() {
     OV_OP_SCOPE(v0_ROIPooling_validate_and_infer_types);
-    auto feat_maps_et = get_input_element_type(0);
-    auto coords_et = get_input_element_type(1);
+    const auto& feat_maps_et = get_input_element_type(0);
+    const auto& coords_et = get_input_element_type(1);
     NODE_VALIDATION_CHECK(this,
                           feat_maps_et.is_real() && coords_et.is_real(),
                           "The data type for input and ROIs is expected to be a floating point type. Got: ",
@@ -34,72 +38,16 @@ void op::ROIPooling::validate_and_infer_types() {
 
     NODE_VALIDATION_CHECK(this,
                           feat_maps_et == coords_et,
-                          "Type of feature maps (inputs) and rois is expected to be the same. Got: ",
+                          "Type of feature maps (inputs) and ROIs is expected to be the same. Got: ",
                           feat_maps_et,
                           " and: ",
                           coords_et);
 
-    NODE_VALIDATION_CHECK(this,
-                          m_output_size.size() == 2,
-                          "The dimension of pooled size is expected to be equal to 2. Got: ",
-                          m_output_size.size());
-
-    NODE_VALIDATION_CHECK(this,
-                          m_output_size[0] > 0 && m_output_size[1] > 0,
-                          "Pooled size attributes pooled_h and pooled_w should should be "
-                          "non-negative integers. Got: ",
-                          m_output_size[0],
-                          " and: ",
-                          m_output_size[1],
-                          "respectively");
-
-    NODE_VALIDATION_CHECK(this,
-                          m_spatial_scale > 0,
-                          "The spatial scale attribute should be a positive floating point number. Got: ",
-                          m_spatial_scale);
-
-    NODE_VALIDATION_CHECK(this,
-                          m_method == "max" || m_method == "bilinear",
-                          "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ",
-                          m_method);
+    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    set_output_type(0, feat_maps_et, output_shapes[0]);
 
     const auto& feat_maps_ps = get_input_partial_shape(0);
-    NODE_VALIDATION_CHECK(this,
-                          feat_maps_ps.rank().compatible(4),
-                          "Expected a 4D tensor for the feature maps input. Got: ",
-                          feat_maps_ps);
-
     const auto& coords_ps = get_input_partial_shape(1);
-    NODE_VALIDATION_CHECK(this,
-                          coords_ps.rank().compatible(2),
-                          "Expected a 2D tensor for the ROIs input with box coordinates. Got: ",
-                          coords_ps);
-
-    if (coords_ps.rank().is_static()) {
-        const auto coords_second_dim = coords_ps[1];
-        NODE_VALIDATION_CHECK(this,
-                              coords_second_dim.compatible(5),
-                              "The second dimension of ROIs input should contain batch id and box coordinates. ",
-                              "This dimension is expected to be equal to 5. Got: ",
-                              coords_second_dim);
-    }
-
-    // output shape should be {NUM_ROIS, C, pooled_h, pooled_w}
-    auto output_shape = ov::PartialShape{{Dimension::dynamic(),
-                                          Dimension::dynamic(),
-                                          Dimension{static_cast<int64_t>(m_output_size[0])},
-                                          Dimension{static_cast<int64_t>(m_output_size[1])}}};
-
-    if (coords_ps.rank().is_static()) {
-        output_shape[0] = coords_ps[0];
-    }
-
-    if (feat_maps_ps.rank().is_static()) {
-        output_shape[1] = feat_maps_ps[1];
-    }
-
-    set_output_size(1);
-    set_output_type(0, feat_maps_et, output_shape);
 
     // if channel dimension, C, not known
     // feature maps input is used by shape specialization pass
@@ -114,13 +62,13 @@ void op::ROIPooling::validate_and_infer_types() {
     }
 }
 
-shared_ptr<Node> op::ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const {
+shared_ptr<Node> ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const {
     OV_OP_SCOPE(v0_ROIPooling_clone_with_new_inputs);
     check_new_args_count(this, new_args);
     return make_shared<ROIPooling>(new_args.at(0), new_args.at(1), m_output_size, m_spatial_scale, m_method);
 }
 
-bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) {
+bool ROIPooling::visit_attributes(AttributeVisitor& visitor) {
     OV_OP_SCOPE(v0_ROIPooling_visit_attributes);
     visitor.on_attribute("output_size", m_output_size);
     visitor.on_attribute("pooled_h", m_output_size[0]);
@@ -129,3 +77,21 @@ bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) {
     visitor.on_attribute("method", m_method);
     return true;
 }
+
+void ROIPooling::set_output_roi(Shape output_size) {
+    m_output_size = std::move(output_size);
+}
+const Shape& ROIPooling::get_output_roi() const {
+    return m_output_size;
+}
+
+void ROIPooling::set_spatial_scale(float scale) {
+    m_spatial_scale = scale;
+}
+
+void ROIPooling::set_method(std::string method_name) {
+    m_method = std::move(method_name);
+}
+}  // namespace v0
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/tests/type_prop/roi_pooling.cpp b/src/core/tests/type_prop/roi_pooling.cpp
index 0fa337a37ea5be..e86b52eef52641 100644
--- a/src/core/tests/type_prop/roi_pooling.cpp
+++ b/src/core/tests/type_prop/roi_pooling.cpp
@@ -2,109 +2,171 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "common_test_utils/test_assertions.hpp"
 #include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "type_prop.hpp"
 
 using namespace std;
-using namespace ngraph;
-
-TEST(type_prop, roi_pooling_basic_shape_inference) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{1, 3, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{4, 5});
-    const auto op = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f);
-    ASSERT_EQ(op->get_method(), "max");
-    ASSERT_EQ(op->get_shape(), (Shape{4, 3, 2, 2}));
+using namespace ov;
+using namespace ov::opset11;
+using namespace testing;
+
+class TypePropROIPoolingV0 : public TypePropOpTest<op::v0::ROIPooling> {
+protected:
+    float spatial_scale = 0.625f;
+    Shape pooling_roi_2x2{2, 2};
+};
+
+TEST_F(TypePropROIPoolingV0, default_ctor) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, PartialShape{{0, 3}, {1, 3}, {1, 6}, {1, 6}});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{{2, 4}, {1, 5}});
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{feat_maps, rois});
+    op->set_spatial_scale(spatial_scale);
+    op->set_method("max");
+    op->set_output_roi({3, 4});
+    op->validate_and_infer_types();
+
+    EXPECT_FLOAT_EQ(op->get_spatial_scale(), spatial_scale);
+    EXPECT_EQ(op->get_output_roi(), Shape({3, 4}));
+    EXPECT_EQ(op->get_method(), "max");
+    EXPECT_EQ(op->get_input_size(), 2);
+    EXPECT_EQ(op->get_element_type(), element::f32);
+    EXPECT_EQ(static_cast<Node*>(op.get())->get_output_size(), 1);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{{2, 4}, {1, 3}, 3, 4}));
 }
 
-TEST(type_prop, roi_pooling_dynamic_channels_dim) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, PartialShape{1, Dimension(), 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{4, 5});
-    const auto op = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "max");
-    ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2}));
+TEST_F(TypePropROIPoolingV0, basic_shape_inference) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{1, 3, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, Shape{4, 5});
+    const auto op = make_op(feat_maps, rois, pooling_roi_2x2, 0.625f);
+
+    EXPECT_EQ(op->get_element_type(), element::f32);
+    EXPECT_EQ(op->get_method(), "max");
+    EXPECT_EQ(op->get_shape(), (Shape{4, 3, 2, 2}));
 }
 
-TEST(type_prop, roi_pooling_dynamic_num_rois_dim) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{1, 3, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, PartialShape{Dimension(), 5});
-    const auto op = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f);
-    ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2}));
+TEST_F(TypePropROIPoolingV0, dynamic_channels_dim) {
+    auto feat_shape = PartialShape{1, -1, 6, 6};
+    auto rois_shape = PartialShape{4, 5};
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(rois_shape, 20);
+
+    const auto feat_maps = make_shared<Parameter>(element::f32, feat_shape);
+    const auto rois = make_shared<Parameter>(element::f32, rois_shape);
+    const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max");
+
+    EXPECT_EQ(op->get_element_type(), element::f32);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label));
 }
 
-TEST(type_prop, roi_pooling_dynamic_rank_feat_maps) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{4, 5});
-    const auto op = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f);
-    ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2}));
+TEST_F(TypePropROIPoolingV0, dynamic_num_rois_dim) {
+    auto feat_shape = PartialShape{1, 3, 6, 6};
+    auto rois_shape = PartialShape{-1, 5};
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(rois_shape, 20);
+
+    const auto feat_maps = make_shared<Parameter>(element::f64, feat_shape);
+    const auto rois = make_shared<Parameter>(element::f64, rois_shape);
+    const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear");
+
+    EXPECT_EQ(op->get_element_type(), element::f64);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label));
 }
 
-TEST(type_prop, roi_pooling_dynamic_rank_rois) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{1, 3, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
-    const auto op = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f);
-    ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2}));
+TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_maps) {
+    const auto feat_maps = make_shared<Parameter>(element::f16, PartialShape::dynamic());
+    const auto rois = make_shared<Parameter>(element::f16, Shape{4, 5});
+    const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale);
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label));
 }
 
-TEST(type_prop, roi_pooling_incompatible_input_rank) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{1, 3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{3, 5});
-    // feat_maps must be of rank 4
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_rois) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{1, 3, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape::dynamic());
+    const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale);
+
+    EXPECT_EQ(op->get_element_type(), element::f32);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label));
 }
 
-TEST(type_prop, roi_pooling_incompatible_pooling_shape) {
-    Shape pool_shape{2, 2, 2};
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{3, 5});
-    // pool_shape must be of rank 2 {pooled_h, pooled_w}
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, pool_shape, 0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, incompatible_input_rank) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{1, 3, 6, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"),
+                    NodeValidationFailure,
+                    HasSubstr("Expected a 4D tensor for the feature maps input"));
 }
 
-TEST(type_prop, roi_pooling_incompatible_rois_second_dim) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{3, 4});
-    // the second dim of rois must be 5. [batch_id, x_1, y_1, x_2, y_2]
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, incompatible_pooling_shape) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{2, 2, 2}, spatial_scale, "max"),
+                    NodeValidationFailure,
+                    HasSubstr("The dimension of pooled size is expected to be equal to 2"));
+}
+
+TEST_F(TypePropROIPoolingV0, incompatible_rois_second_dim) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 4});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"),
+                    NodeValidationFailure,
+                    HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This "
+                              "dimension is expected to be equal to 5"));
 }
 
-TEST(type_prop, roi_pooling_incompatible_feature_maps_element_type) {
-    const auto feat_maps = make_shared<op::Parameter>(element::i32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f32, Shape{3, 5});
-    // feat_maps element type must be floating point type
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, incompatible_feature_maps_element_type) {
+    const auto feat_maps = make_shared<Parameter>(element::i32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"),
+                    NodeValidationFailure,
+                    HasSubstr("The data type for input and ROIs is expected to be a floating point type"));
 }
 
-TEST(type_prop, roi_pooling_incompatible_rois_element_type) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f16, Shape{3, 5});
-    // rois element type must be equal to feat_maps element type (floating point type)
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "bilinear"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, incompatible_rois_element_type) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::i16, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear"),
+                    NodeValidationFailure,
+                    HasSubstr("The data type for input and ROIs is expected to be a floating point type"));
 }
 
-TEST(type_prop, roi_pooling_invalid_pooling_method) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f16, Shape{3, 5});
-    // ROIPooling method is invalid: not max nor bilinear
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, 0.625f, "invalid"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, invalid_pooling_method) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "invalid"),
+                    NodeValidationFailure,
+                    HasSubstr("Pooling method attribute should be either \'max\' or \'bilinear\'"));
 }
 
-TEST(type_prop, roi_pooling_invalid_spatial_scale) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f16, Shape{3, 5});
-    // ROIPooling spatial scale attribute must be a positive floating point number
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{2, 2}, -0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, invalid_spatial_scale) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, -1.0f),
+                    NodeValidationFailure,
+                    HasSubstr("The spatial scale attribute should be a positive floating point number"));
 }
 
-TEST(type_prop, roi_pooling_invalid_pooled_size) {
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, Shape{3, 2, 6, 6});
-    const auto rois = make_shared<op::Parameter>(element::f16, Shape{3, 5});
-    // ROIPooling pooled_h and pooled_w must be non-negative integers
-    ASSERT_THROW(const auto unused = make_shared<op::v0::ROIPooling>(feat_maps, rois, Shape{1, 0}, 0.625f, "max"),
-                 ngraph::NodeValidationFailure);
+TEST_F(TypePropROIPoolingV0, invalid_pooled_size) {
+    const auto feat_maps = make_shared<Parameter>(element::f32, Shape{3, 2, 6, 6});
+    const auto rois = make_shared<Parameter>(element::f32, PartialShape{3, 5});
+
+    OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{1, 0}, spatial_scale),
+                    NodeValidationFailure,
+                    HasSubstr("Pooled size attributes pooled_h and pooled_w should should be positive integers"));
 }
diff --git a/src/core/tests/visitors/op/roi_pooling.cpp b/src/core/tests/visitors/op/roi_pooling.cpp
index 8438a797728eb1..a5b49fe9cca3d2 100644
--- a/src/core/tests/visitors/op/roi_pooling.cpp
+++ b/src/core/tests/visitors/op/roi_pooling.cpp
@@ -25,7 +25,7 @@ TEST(attributes, roi_pooling_op) {
     NodeBuilder builder(op, {data, coords});
     const auto g_op = ov::as_type_ptr<opset3::ROIPooling>(builder.create());
 
-    EXPECT_EQ(g_op->get_output_size(), op->get_output_size());
+    EXPECT_EQ(g_op->get_output_roi(), op->get_output_roi());
     EXPECT_EQ(g_op->get_spatial_scale(), op->get_spatial_scale());
     EXPECT_EQ(g_op->get_method(), op->get_method());
 }
diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp
index 3f25a167134715..496307ede5bb6b 100644
--- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp
@@ -393,10 +393,10 @@ ROIPooling::ROIPooling(const std::shared_ptr<ngraph::Node>& op, const GraphConte
     std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
 
     auto roiPooling = ngraph::as_type_ptr<const ngraph::opset2::ROIPooling>(op);
-    refParams.pooled_h = roiPooling->get_output_size()[0];
-    refParams.pooled_w = roiPooling->get_output_size()[1];
+    refParams.pooled_h = roiPooling->get_output_roi()[0];
+    refParams.pooled_w = roiPooling->get_output_roi()[1];
     refParams.spatial_scale = roiPooling->get_spatial_scale();
-    std::string m = roiPooling->get_method();
+    const auto& m = roiPooling->get_method();
     if (m == "max") {
         algorithm = Algorithm::ROIPoolingMax;
     } else if (m == "bilinear") {
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
index f27dbf76453f5e..1961157ae93ddc 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
@@ -62,6 +62,7 @@
 #include "reverse_sequence_shape_inference.hpp"
 #include "reverse_shape_inference.hpp"
 #include "roi_align_shape_inference.hpp"
+#include "roi_pooling_shape_inference.hpp"
 #include "roll_shape_inference.hpp"
 #include "scatter_elements_update_shape_inference.hpp"
 #include "scatter_nd_base_shape_inference.hpp"
@@ -125,9 +126,8 @@ class entryIO : public entryBase {
 
     IShapeInferCommon::Result
     infer(const std::vector<StaticShape>& input_shapes, const std::map<size_t, HostTensorPtr>& constant_data) override {
-        auto op = static_cast<OP*>(node.get());
-        std::vector<StaticShape> output_shapes(op->get_output_size());
-        shape_infer(op, input_shapes, output_shapes);
+        std::vector<StaticShape> output_shapes(node->get_output_size());
+        shape_infer(static_cast<OP*>(node.get()), input_shapes, output_shapes);
         return {std::move(output_shapes), ShapeInferStatus::success};
     }
 };
@@ -597,6 +597,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(Reshape, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ReverseSequence, entryIO),
     _OV_OP_SHAPE_INFER_REG(ROIAlign, entryIO),
+    _OV_OP_SHAPE_INFER_REG(ROIPooling, entryIO),
     _OV_OP_SHAPE_INFER_REG(Roll, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ScatterElementsUpdate, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ScatterNDUpdate, entryIO),
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp
new file mode 100644
index 00000000000000..0999e278c95ae6
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class ROIPoolingV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest<op::v0::ROIPooling> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(ROIPoolingV0StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_output_roi({3, 3});
+    op->set_method("max");
+    op->set_spatial_scale(0.34f);
+
+    input_shapes = ShapeVector{{1, 5, 10, 10}, {2, 5}};
+    auto shape_infer = make_shape_inference(op);
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 3, 3}));
+}
+
+TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_dynamic_rank) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+
+    op = make_op(feat, rois, ov::Shape{5, 5}, 0.9f);
+
+    input_shapes = ShapeVector{{2, 3, 100, 100}, {10, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 5, 5}));
+}
+
+TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_static_rank) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(4));
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(2));
+
+    op = make_op(feat, rois, ov::Shape{7, 5}, 1.9f, "max");
+
+    input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 7, 5}));
+}
+
+TEST_F(ROIPoolingV0StaticShapeInferenceTest, invalid_rois_batch_size) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(4));
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+
+    op = make_op(feat, rois, ov::Shape{7, 5}, 1.9f, "max");
+
+    input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 6}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This "
+                              "dimension is expected to be equal to 5"));
+}

From 7d56c75d65f0072ff9fc36c5e747ce35d15d031a Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 21 Mar 2023 10:28:58 +0100
Subject: [PATCH 007/296] Fix MO Reader for Squeeze without axes (#16398)

* Fix MO Reader for Squeeze without axes

* Fix style

* Update tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py
---
 .../utils/ir_reader/internal_ops/squeeze.py   | 19 ++++++++++---
 .../mo/utils/ir_reader/layer_to_class_test.py | 28 +++++++++++++++++++
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py b/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py
index 67bfc80dea5e69..5e9702e30f8ea0 100644
--- a/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py
+++ b/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py
@@ -3,12 +3,23 @@
 
 from openvino.tools.mo.graph.graph import Node
 from openvino.tools.mo.ops.squeeze import Squeeze
+from openvino.tools.mo.front.common.partial_infer.utils import shape_array, is_fully_defined
 
 
 class SqueezeInternal(Squeeze):
     @staticmethod
     def infer(node: Node):
-        axis_value = node.in_port(1).data.get_value()
-        Squeeze.infer(node)
-        # preserve initial axis value
-        node.in_port(1).data.set_value(axis_value)
+        if node.is_in_port_connected(1):
+            axis_value = node.in_port(1).data.get_value()
+            Squeeze.infer(node)
+            # preserve initial axis value
+            node.in_port(1).data.set_value(axis_value)
+        else:
+            # Squeeze without axes provided
+            node_name = node.soft_get('name', node.id)
+            input_shape = node.in_port(0).data.get_shape()
+            assert is_fully_defined(
+                input_shape), 'Squeeze dimensions are not defined for op "{}"'.format(node_name)
+            output_shape = [s for s in shape_array(input_shape).tolist() if s != 1]
+            node.out_port(0).data.set_shape(shape_array(output_shape))
+
diff --git a/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py b/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
index f86e4514ca7acb..8dd6a17aba63e2 100644
--- a/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
+++ b/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
@@ -166,6 +166,34 @@ def test_squeeze(self):
         (flag, resp) = compare_graphs(graph, graph_ref, 'result', check_op_attrs=True)
         self.assertTrue(flag, resp)
 
+    def test_squeeze_no_axes(self):
+        nodes_attributes = {
+            'input': {'kind': 'op', 'type': 'Parameter'},
+            'input_data': {'shape': [2, 1, 3], 'kind': 'data'},
+
+            'squeeze': {'kind': 'op', 'type': 'Squeeze'},
+            'squeeze_data': {'shape': [2, 3], 'kind': 'data', 'value': None},
+
+            'result': {'kind': 'op', 'type': 'Result'}
+        }
+
+        edges = [('input', 'input_data'),
+                 ('input_data', 'squeeze'),
+                 ('squeeze', 'squeeze_data'),
+                 ('squeeze_data', 'result'),
+                 ]
+
+        graph = build_graph(nodes_attributes, edges, nodes_with_edges_only=True)
+
+        squeeze_node = Node(graph, 'squeeze')
+        SqueezeInternal.infer(squeeze_node)
+
+        graph_ref = build_graph(nodes_attributes, edges, nodes_with_edges_only=True)
+
+        # Check that graph wasn't changed after shape infer
+        (flag, resp) = compare_graphs(graph, graph_ref, 'result', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
     def test_unsqueeze(self):
         nodes_attributes = {
             'input': {'kind': 'op', 'type': 'Parameter'},

From ec0a1e58d16777afa9c9aac598ad4cb938ff3d64 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 21 Mar 2023 13:34:37 +0400
Subject: [PATCH 008/296] Fixed some leftovers for 2.0 dev api (#16421)

* Fixed some leftovers for 2.0 dev api

* Fixed build issue
---
 src/inference/dev_api/ie_icore.hpp            | 18 ------
 .../openvino/runtime/device_id_parser.hpp     | 36 +++++++++++
 .../dev_api/openvino/runtime/iplugin.hpp      | 14 ++++-
 src/inference/src/core.cpp                    |  5 +-
 src/inference/src/dev/core_impl.cpp           | 30 +++++-----
 src/inference/src/dev/core_impl_ie.cpp        | 13 ++--
 src/inference/src/dev/device_id_parser.cpp    | 60 ++++++++++---------
 src/inference/src/ie_core.cpp                 |  9 +--
 src/plugins/auto/plugin.cpp                   | 15 ++---
 src/plugins/auto/utils/plugin_config.hpp      |  5 +-
 src/plugins/auto_batch/src/auto_batch.cpp     |  5 +-
 src/plugins/hetero/plugin.cpp                 | 16 ++---
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  3 +-
 .../src/base/layer_test_utils.cpp             |  3 +-
 14 files changed, 138 insertions(+), 94 deletions(-)
 create mode 100644 src/inference/dev_api/openvino/runtime/device_id_parser.hpp

diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp
index 32f08028d3f012..03c298af681f72 100644
--- a/src/inference/dev_api/ie_icore.hpp
+++ b/src/inference/dev_api/ie_icore.hpp
@@ -209,22 +209,4 @@ class ICore : public ov::ICore {
     virtual RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) = 0;
 };
 
-/**
- * @private
- */
-class INFERENCE_ENGINE_API_CLASS(DeviceIDParser) {
-    std::string deviceName;
-    std::string deviceID;
-
-public:
-    explicit DeviceIDParser(const std::string& deviceNameWithID);
-
-    std::string getDeviceID() const;
-    std::string getDeviceName() const;
-
-    static std::vector<std::string> getHeteroDevices(std::string fallbackDevice);
-    static std::vector<std::string> getMultiDevices(std::string devicesList);
-    static std::string getBatchDevice(std::string devicesList);
-};
-
 }  // namespace InferenceEngine
diff --git a/src/inference/dev_api/openvino/runtime/device_id_parser.hpp b/src/inference/dev_api/openvino/runtime/device_id_parser.hpp
new file mode 100644
index 00000000000000..ecd95c6a6a7859
--- /dev/null
+++ b/src/inference/dev_api/openvino/runtime/device_id_parser.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief Provides parser for device name
+ * @file openvino/runtime/device_id_paeser.hpp
+ */
+
+#pragma once
+
+#include <string>
+
+#include "openvino/runtime/common.hpp"
+
+namespace ov {
+
+/**
+ * @brief Class parses device name and id
+ */
+class OPENVINO_RUNTIME_API DeviceIDParser {
+    std::string m_device_name;
+    std::string m_device_id;
+
+public:
+    explicit DeviceIDParser(const std::string& device_name_with_id);
+
+    const std::string& get_device_id() const;
+    const std::string& get_device_name() const;
+
+    static std::vector<std::string> get_hetero_devices(const std::string& fallbackDevice);
+    static std::vector<std::string> get_multi_devices(const std::string& devicesList);
+    static std::string get_batch_device(const std::string& devicesList);
+};
+
+}  // namespace ov
diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp
index 5d752ab5b15d08..a64073b3ebe0d1 100644
--- a/src/inference/dev_api/openvino/runtime/iplugin.hpp
+++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp
@@ -20,6 +20,7 @@
 #include "openvino/runtime/icore.hpp"
 #include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
+#include "openvino/util/pp.hpp"
 
 namespace InferenceEngine {
 
@@ -256,7 +257,11 @@ OPENVINO_RUNTIME_API std::unordered_set<std::string> get_supported_nodes(
     std::function<void(std::shared_ptr<ov::Model>&)> transform,
     std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported);
 
-}  // namespace ov
+/**
+ * @private
+ */
+using CreatePluginFunc = void(std::shared_ptr<::ov::IPlugin>&);
+
 /**
  * @def OV_CREATE_PLUGIN
  * @brief Defines a name of a function creating plugin instance
@@ -266,6 +271,13 @@ OPENVINO_RUNTIME_API std::unordered_set<std::string> get_supported_nodes(
 #    define OV_CREATE_PLUGIN CreatePluginEngine
 #endif
 
+/**
+ * @private
+ */
+constexpr static const auto create_plugin_function = OV_PP_TOSTRING(OV_CREATE_PLUGIN);
+
+}  // namespace ov
+
 /**
  * @def OV_DEFINE_PLUGIN_CREATE_FUNCTION(PluginType, version)
  * @brief Defines the exported `OV_CREATE_PLUGIN` function which is used to create a plugin instance
diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp
index 01454656e7f3ad..9da46ee74fae3e 100644
--- a/src/inference/src/core.cpp
+++ b/src/inference/src/core.cpp
@@ -9,6 +9,7 @@
 #include "dev/converter_utils.hpp"
 #include "dev/core_impl.hpp"
 #include "ie_itt.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "so_extension.hpp"
 
 #ifdef OPENVINO_STATIC_LIBRARY
@@ -252,8 +253,8 @@ void Core::register_plugin(const std::string& plugin, const std::string& device_
 
 void Core::unload_plugin(const std::string& device_name) {
     OV_CORE_CALL_STATEMENT({
-        ie::DeviceIDParser parser(device_name);
-        std::string devName = parser.getDeviceName();
+        ov::DeviceIDParser parser(device_name);
+        std::string devName = parser.get_device_name();
 
         _impl->unload_plugin(devName);
     });
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 44fc79a4987f59..ed39bc67f1f94e 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -28,6 +28,7 @@
 #include "openvino/core/preprocess/pre_post_process.hpp"
 #include "openvino/core/version.hpp"
 #include "openvino/pass/manager.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/itensor.hpp"
 #include "openvino/runtime/remote_context.hpp"
@@ -276,9 +277,9 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, const An
         updated_device_name = deviceName.substr(0, pos);
         parsed_device_priority = deviceName.substr(pos + 1);
     } else {
-        InferenceEngine::DeviceIDParser parser(deviceName);
-        updated_device_name = parser.getDeviceName();
-        parsed_device_priority = parser.getDeviceID();
+        ov::DeviceIDParser parser(deviceName);
+        updated_device_name = parser.get_device_name();
+        parsed_device_priority = parser.get_device_id();
     }
 
     // checks and updates device priority
@@ -416,8 +417,7 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const {
         } else {
             so = ov::util::load_shared_object(desc.libraryLocation.c_str());
             std::shared_ptr<ov::IPlugin> plugin_impl;
-            reinterpret_cast<InferenceEngine::CreatePluginEngineFunc*>(
-                ov::util::get_symbol(so, InferenceEngine::create_plugin_function))(plugin_impl);
+            reinterpret_cast<ov::CreatePluginFunc*>(ov::util::get_symbol(so, ov::create_plugin_function))(plugin_impl);
             plugin = Plugin{plugin_impl, so};
         }
 
@@ -425,8 +425,8 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const {
             plugin.set_name(deviceName);
 
             // Set Core class reference to plugins
-            std::weak_ptr<InferenceEngine::ICore> mutableCore =
-                std::const_pointer_cast<InferenceEngine::ICore>(shared_from_this());
+            std::weak_ptr<ov::ICore> mutableCore =
+                std::const_pointer_cast<ov::ICore>(std::dynamic_pointer_cast<const ov::ICore>(shared_from_this()));
             plugin.set_core(mutableCore);
         }
 
@@ -472,9 +472,9 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const {
                     // for each such .0, .1, .# device to make sure plugin can handle different settings for different
                     // device IDs
                     for (auto pluginDesc : pluginRegistry) {
-                        InferenceEngine::DeviceIDParser parser(pluginDesc.first);
-                        if (pluginDesc.first.find(deviceName) != std::string::npos && !parser.getDeviceID().empty()) {
-                            pluginDesc.second.defaultConfig[deviceKey] = parser.getDeviceID();
+                        ov::DeviceIDParser parser(pluginDesc.first);
+                        if (pluginDesc.first.find(deviceName) != std::string::npos && !parser.get_device_id().empty()) {
+                            pluginDesc.second.defaultConfig[deviceKey] = parser.get_device_id();
                             plugin.set_property(pluginDesc.second.defaultConfig);
                         }
                     }
@@ -795,7 +795,7 @@ void ov::CoreImpl::apply_auto_batching(const std::shared_ptr<const ov::Model>& m
         if (pos == std::string::npos)
             return;  // BATCH device is already configured via the config
         deviceNameWithBatchSize = deviceName.substr(pos + 1);
-        deviceNameWithoutBatch = InferenceEngine::DeviceIDParser::getBatchDevice(deviceNameWithBatchSize);
+        deviceNameWithoutBatch = ov::DeviceIDParser::get_batch_device(deviceNameWithBatchSize);
         // when user sets the BATCH device explicitly, we may check the dims less strictly
         // as the result is being checked by the user
         strictly_check_dims = false;
@@ -982,8 +982,8 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st
         return;
     }
 
-    InferenceEngine::DeviceIDParser parser(deviceName);
-    std::string clearDeviceName = parser.getDeviceName();
+    ov::DeviceIDParser parser(deviceName);
+    std::string clearDeviceName = parser.get_device_name();
 
     std::vector<std::pair<std::string, ov::Plugin>> created_plugins;
     {
@@ -1065,8 +1065,8 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st
                 const std::string deviceKey =
                     supportsConfigDeviceID ? CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID) : CONFIG_KEY(DEVICE_ID);
 
-                if (!parser.getDeviceID().empty()) {
-                    configCopy[deviceKey] = parser.getDeviceID();
+                if (!parser.get_device_id().empty()) {
+                    configCopy[deviceKey] = parser.get_device_id();
                 }
             }
             plugin.second.set_property(configCopy);
diff --git a/src/inference/src/dev/core_impl_ie.cpp b/src/inference/src/dev/core_impl_ie.cpp
index cbf60f265ba209..a04d46081040a7 100644
--- a/src/inference/src/dev/core_impl_ie.cpp
+++ b/src/inference/src/dev/core_impl_ie.cpp
@@ -18,6 +18,7 @@
 #include "ngraph/op/constant.hpp"
 #include "ngraph/pass/constant_folding.hpp"
 #include "openvino/itt.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/itensor.hpp"
 #include "openvino/util/common_util.hpp"
@@ -231,25 +232,25 @@ std::map<std::string, InferenceEngine::Version> ov::CoreImpl::GetVersions(const
         if (deviceName.find("HETERO") == 0) {
             auto pos = deviceName.find_first_of(":");
             if (pos != std::string::npos) {
-                deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1));
+                deviceNames = ov::DeviceIDParser::get_hetero_devices(deviceName.substr(pos + 1));
             }
             deviceNames.push_back("HETERO");
         } else if (deviceName.find("MULTI") == 0) {
             auto pos = deviceName.find_first_of(":");
             if (pos != std::string::npos) {
-                deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
+                deviceNames = ov::DeviceIDParser::get_multi_devices(deviceName.substr(pos + 1));
             }
             deviceNames.push_back("MULTI");
         } else if (deviceName.find("AUTO") == 0) {
             auto pos = deviceName.find_first_of(":");
             if (pos != std::string::npos) {
-                deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
+                deviceNames = ov::DeviceIDParser::get_multi_devices(deviceName.substr(pos + 1));
             }
             deviceNames.emplace_back("AUTO");
         } else if (deviceName.find("BATCH") == 0) {
             auto pos = deviceName.find_first_of(":");
             if (pos != std::string::npos) {
-                deviceNames = {InferenceEngine::DeviceIDParser::getBatchDevice(deviceName.substr(pos + 1))};
+                deviceNames = {ov::DeviceIDParser::get_batch_device(deviceName.substr(pos + 1))};
             }
             deviceNames.push_back("BATCH");
         } else {
@@ -258,8 +259,8 @@ std::map<std::string, InferenceEngine::Version> ov::CoreImpl::GetVersions(const
     }
 
     for (auto&& deviceName_ : deviceNames) {
-        ie::DeviceIDParser parser(deviceName_);
-        std::string deviceNameLocal = parser.getDeviceName();
+        ov::DeviceIDParser parser(deviceName_);
+        std::string deviceNameLocal = parser.get_device_name();
 
         ov::Plugin cppPlugin = get_plugin(deviceNameLocal);
 
diff --git a/src/inference/src/dev/device_id_parser.cpp b/src/inference/src/dev/device_id_parser.cpp
index 73309ee1cc9fc8..ba64a1cf08b831 100644
--- a/src/inference/src/dev/device_id_parser.cpp
+++ b/src/inference/src/dev/device_id_parser.cpp
@@ -2,47 +2,51 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ie_icore.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 
-namespace InferenceEngine {
+#include <set>
+
+namespace ov {
 
 DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) {
-    deviceName = deviceNameWithID;
+    m_device_name = deviceNameWithID;
 
-    auto pos = deviceName.find('.');
+    auto pos = m_device_name.find('.');
     if (pos != std::string::npos) {
-        deviceName = deviceNameWithID.substr(0, pos);
-        deviceID = deviceNameWithID.substr(pos + 1, deviceNameWithID.size());
+        m_device_name = deviceNameWithID.substr(0, pos);
+        m_device_id = deviceNameWithID.substr(pos + 1, deviceNameWithID.size());
     }
 }
 
-std::string DeviceIDParser::getDeviceID() const {
-    return deviceID;
+const std::string& DeviceIDParser::get_device_id() const {
+    return m_device_id;
 }
 
-std::string DeviceIDParser::getDeviceName() const {
-    return deviceName;
+const std::string& DeviceIDParser::get_device_name() const {
+    return m_device_name;
 }
 
-std::vector<std::string> DeviceIDParser::getHeteroDevices(std::string fallbackDevice) {
+std::vector<std::string> DeviceIDParser::get_hetero_devices(const std::string& fallbackDevice) {
     std::vector<std::string> deviceNames;
+    std::string fallback_dev = fallbackDevice;
 
     std::string cdevice;
     char delimiter = ',';
     size_t pos = 0;
 
-    while ((pos = fallbackDevice.find(delimiter)) != std::string::npos) {
-        deviceNames.push_back(fallbackDevice.substr(0, pos));
-        fallbackDevice.erase(0, pos + 1);
+    while ((pos = fallback_dev.find(delimiter)) != std::string::npos) {
+        deviceNames.push_back(fallback_dev.substr(0, pos));
+        fallback_dev.erase(0, pos + 1);
     }
 
-    if (!fallbackDevice.empty())
-        deviceNames.push_back(fallbackDevice);
+    if (!fallback_dev.empty())
+        deviceNames.push_back(fallback_dev);
 
     return deviceNames;
 }
 
-std::vector<std::string> DeviceIDParser::getMultiDevices(std::string devicesList) {
+std::vector<std::string> DeviceIDParser::get_multi_devices(const std::string& devicesList) {
+    std::string dev_list = devicesList;
     std::set<std::string> deviceNames;
     auto trim_request_info = [](const std::string& device_with_requests) {
         auto opening_bracket = device_with_requests.find_first_of('(');
@@ -53,37 +57,37 @@ std::vector<std::string> DeviceIDParser::getMultiDevices(std::string devicesList
     size_t pos = 0;
     // in addition to the list of devices, every device can have a #requests in the brackets e.g. "CPU(100)"
     // we skip the #requests info here
-    while ((pos = devicesList.find(delimiter)) != std::string::npos) {
-        auto d = devicesList.substr(0, pos);
+    while ((pos = dev_list.find(delimiter)) != std::string::npos) {
+        auto d = dev_list.substr(0, pos);
         if (d.find("BATCH") == 0) {
             deviceNames.insert("BATCH");
             auto p = d.find_first_of(":");
             if (p != std::string::npos)
-                deviceNames.insert(DeviceIDParser::getBatchDevice(d.substr(p + 1)));
+                deviceNames.insert(DeviceIDParser::get_batch_device(d.substr(p + 1)));
         } else {
             deviceNames.insert(trim_request_info(d));
         }
-        devicesList.erase(0, pos + 1);
+        dev_list.erase(0, pos + 1);
     }
 
-    if (!devicesList.empty()) {
-        if (devicesList.find("BATCH") == 0) {
+    if (!dev_list.empty()) {
+        if (dev_list.find("BATCH") == 0) {
             deviceNames.insert("BATCH");
-            auto p = devicesList.find_first_of(":");
+            auto p = dev_list.find_first_of(":");
             if (p != std::string::npos)
-                deviceNames.insert(DeviceIDParser::getBatchDevice(devicesList.substr(p + 1)));
+                deviceNames.insert(DeviceIDParser::get_batch_device(dev_list.substr(p + 1)));
         } else {
-            deviceNames.insert(trim_request_info(devicesList));
+            deviceNames.insert(trim_request_info(dev_list));
         }
     }
     return std::vector<std::string>(deviceNames.begin(), deviceNames.end());
 }
 
-std::string DeviceIDParser::getBatchDevice(std::string device) {
+std::string DeviceIDParser::get_batch_device(const std::string& device) {
     auto trim_request_info = [](const std::string& device_with_requests) {
         auto opening_bracket = device_with_requests.find_first_of('(');
         return device_with_requests.substr(0, opening_bracket);
     };
     return trim_request_info(device);
 }
-}  // namespace InferenceEngine
+}  // namespace ov
diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp
index 799e284f58ba38..cc138a0f13d17a 100644
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@@ -40,6 +40,7 @@
 #include "openvino/op/result.hpp"
 #include "openvino/runtime/compiled_model.hpp"
 #include "openvino/runtime/core.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
@@ -251,8 +252,8 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
     }
 
     std::string deviceName_ = context->getDeviceName();
-    DeviceIDParser device(deviceName_);
-    std::string deviceName = device.getDeviceName();
+    ov::DeviceIDParser device(deviceName_);
+    std::string deviceName = device.get_device_name();
 
     auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ov::any_copy(config));
     auto exec = _impl->get_plugin(deviceName)
@@ -350,8 +351,8 @@ void Core::RegisterPlugins(const std::string& xmlConfigFile) {
 }
 
 void Core::UnregisterPlugin(const std::string& deviceName_) {
-    DeviceIDParser parser(deviceName_);
-    std::string deviceName = parser.getDeviceName();
+    ov::DeviceIDParser parser(deviceName_);
+    std::string deviceName = parser.get_device_name();
 
     _impl->unload_plugin(deviceName);
 }
diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp
index 641de18db3ce38..aa05129cb46f10 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@@ -17,6 +17,7 @@
 #include <ie_performance_hints.hpp>
 #include <threading/ie_executor_manager.hpp>
 #include "openvino/runtime/auto/properties.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "plugin.hpp"
 #include <ie_algorithm.hpp>
 #include <ie_icore.hpp>
@@ -189,8 +190,8 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
             }
         }
 
-        DeviceIDParser parsed{deviceName};
-        std::string deviceid = parsed.getDeviceID();
+        ov::DeviceIDParser parsed{deviceName};
+        std::string deviceid = parsed.get_device_id();
         std::vector<std::string> sameTypeDevices;
         // if AUTO:GPU case, replace GPU with GPU.0 and GPU.1
         // Disable AUTO:MYRIAD here because of below test case
@@ -212,19 +213,19 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
         }
 
         for (auto&& deviceNameWithID : sameTypeDevices) {
-            DeviceIDParser newParsed{deviceNameWithID};
+            ov::DeviceIDParser newParsed{deviceNameWithID};
             std::string defaultDeviceID = "";
             std::string tempDeviceID = "";
-            if (newParsed.getDeviceID().empty()) {
+            if (newParsed.get_device_id().empty()) {
                 defaultDeviceID = getDefaultDeviceID(deviceNameWithID);
                 tempDeviceID = defaultDeviceID;
             } else {
-                tempDeviceID = newParsed.getDeviceID();
+                tempDeviceID = newParsed.get_device_id();
             }
 
             std::string fullDeviceName = "";
             std::string uniqueName = "";
-            if (newParsed.getDeviceName() == "GPU") {
+            if (newParsed.get_device_name() == "GPU") {
                 auto supportedMetrics = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
                 if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) {
                     fullDeviceName = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
@@ -232,7 +233,7 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
             }
 
             if (fullDeviceName.empty()) {
-                uniqueName = newParsed.getDeviceName() + "_" + tempDeviceID;
+                uniqueName = newParsed.get_device_name() + "_" + tempDeviceID;
             } else {
                 uniqueName = fullDeviceName + "_" + tempDeviceID;
             }
diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/utils/plugin_config.hpp
index 6677916f429c4e..f0221d471920d4 100644
--- a/src/plugins/auto/utils/plugin_config.hpp
+++ b/src/plugins/auto/utils/plugin_config.hpp
@@ -10,6 +10,7 @@
 #include "ie_icore.hpp"
 #include "openvino/runtime/auto/properties.hpp"
 #include "log.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include <string>
 #include <map>
 #include <vector>
@@ -199,7 +200,7 @@ class PluginConfig {
         if (realDevName.empty()) {
             return false;
         }
-        realDevName = DeviceIDParser(realDevName).getDeviceName();
+        realDevName = ov::DeviceIDParser(realDevName).get_device_name();
         std::string::size_type realEndPos = 0;
         if ((realEndPos = realDevName.find('(')) != std::string::npos) {
             realDevName = realDevName.substr(0, realEndPos);
@@ -239,4 +240,4 @@ class PluginConfig {
     BaseValidator::Ptr device_property_validator;
     static const std::set<std::string> _availableDevices;
 };
-} // namespace MultiDevicePlugin
\ No newline at end of file
+} // namespace MultiDevicePlugin
diff --git a/src/plugins/auto_batch/src/auto_batch.cpp b/src/plugins/auto_batch/src/auto_batch.cpp
index 5a35ee4385b77c..8dbaea000ecaad 100644
--- a/src/plugins/auto_batch/src/auto_batch.cpp
+++ b/src/plugins/auto_batch/src/auto_batch.cpp
@@ -19,6 +19,7 @@
 #include "ie_ngraph_utils.hpp"
 #include "ie_performance_hints.hpp"
 #include "openvino/pass/manager.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
 #include "transformations/common_optimizations/dimension_tracking.hpp"
 #include "transformations/init_node_info.hpp"
@@ -692,8 +693,8 @@ DeviceInformation AutoBatchInferencePlugin::ParseBatchDevice(const std::string&
 DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg,
                                                             const std::map<std::string, std::string>& config) const {
     auto getDeviceConfig = [&](const DeviceName& deviceWithID) {
-        DeviceIDParser deviceParser(deviceWithID);
-        std::string deviceName = deviceParser.getDeviceName();
+        ov::DeviceIDParser deviceParser(deviceWithID);
+        std::string deviceName = deviceParser.get_device_name();
         std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
         // passthrough the cache dir to core->loadnetwork when underlying device does not support cache dir
         auto deviceConfig = GetCore()->GetSupportedConfig(deviceWithID, tconfig);
diff --git a/src/plugins/hetero/plugin.cpp b/src/plugins/hetero/plugin.cpp
index 9152f7d8161818..10edbe72fe78c9 100644
--- a/src/plugins/hetero/plugin.cpp
+++ b/src/plugins/hetero/plugin.cpp
@@ -4,6 +4,7 @@
 
 // clang-format off
 #include "ie_metric_helpers.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "plugin.hpp"
 #include <memory>
 #include <vector>
@@ -95,7 +96,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
 
 Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback,
                                                           const Configs& localConfig) const {
-    auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
+    auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(targetFallback);
     Engine::DeviceMetaInformationMap metaDevices;
     for (auto&& deviceName : fallbackDevices) {
         auto itPlugin = metaDevices.find(deviceName);
@@ -140,7 +141,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const Configs
     }
 
     //  WARNING: Here is devices with user set priority
-    auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(fallbackDevicesStr);
+    auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(fallbackDevicesStr);
 
     for (auto&& deviceName : fallbackDevices) {
         for (auto&& layerQueryResult : queryResults[deviceName].supportedLayersMap) {
@@ -187,17 +188,18 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
     }
 }
 std::string Engine::DeviceArchitecture(const std::string& targetFallback) const {
-    auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
+    auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(targetFallback);
     std::string resArch;
     for (const auto& device : fallbackDevices) {
-        InferenceEngine::DeviceIDParser parser(device);
+        ov::DeviceIDParser parser(device);
 
-        auto supportedMetricKeys =
-            GetCore()->GetMetric(parser.getDeviceName(), METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
+        auto supportedMetricKeys = GetCore()
+                                       ->GetMetric(parser.get_device_name(), METRIC_KEY(SUPPORTED_METRICS))
+                                       .as<std::vector<std::string>>();
         auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(DEVICE_ARCHITECTURE));
         auto arch = (it != supportedMetricKeys.end())
                         ? GetCore()->GetMetric(device, METRIC_KEY(DEVICE_ARCHITECTURE)).as<std::string>()
-                        : parser.getDeviceName();
+                        : parser.get_device_name();
         resArch += " " + arch;
     }
     return resArch;
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index e68a9094f221dc..ddf75aefaafed5 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -32,6 +32,7 @@
 #include "ie_plugin_config.hpp"
 #include "gpu/gpu_config.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "openvino/runtime/device_id_parser.hpp"
 #include "ie_icore.hpp"
 
 #include "dimension_tracker.hpp"
@@ -207,7 +208,7 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine
     check_inputs(_networkInputs);
 
     auto context_impl = get_context_impl(context);
-    auto device_id = InferenceEngine::DeviceIDParser{context_impl->get_device_name()}.getDeviceID();
+    auto device_id = ov::DeviceIDParser{context_impl->get_device_name()}.get_device_id();
 
     OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id);
 
diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
index ea7befd88f3426..66486df59a1ef1 100644
--- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -9,6 +9,7 @@
 
 #include <thread>
 
+#include "openvino/runtime/device_id_parser.hpp"
 #include <openvino/pass/serialize.hpp>
 #include <ngraph/opsets/opset.hpp>
 #include "shared_test_classes/base/layer_test_utils.hpp"
@@ -121,7 +122,7 @@ void LayerTestsCommon::QueryNetwork() {
             ASSERT_EQ(res.second, ctx->getDeviceName());
         } catch (...) {
             // otherwise, compare with originally used device name
-            ASSERT_EQ(InferenceEngine::DeviceIDParser(res.second).getDeviceName(), targetDevice);
+            ASSERT_EQ(ov::DeviceIDParser(res.second).get_device_name(), targetDevice);
         }
         actual.insert(res.first);
     }

From 5af4a8e8d64edb71c85107324cbd516dfc1407d7 Mon Sep 17 00:00:00 2001
From: "Wang, Yang" <yang4.wang@intel.com>
Date: Tue, 21 Mar 2023 17:46:44 +0800
Subject: [PATCH 009/296] Take VPUX out of AUTO default candidate device list
 (#16037)

* 1. Add device blacklist for AUTO plugin.
2. Update the logic to parse out the device candidate list from the inputting config MULTI_DEVICE_PRIORITIES.
3. Update the corresponding mock test cases.
4. Ignore the GTEST warning for the test cases.

Signed-off-by: Wang, Yang <yang4.wang@intel.com>

* Update.

* Update.

* Update.

* Add description about blacklist.

* Apply suggestions from code review

Update.

Co-authored-by: yanlan song <bell.song@intel.com>

* Update.

* Apply suggestions from code review

Updated.

Co-authored-by: yanlan song <bell.song@intel.com>
Co-authored-by: River Li <river.li@intel.com>

* Update test case.

* Update test case.

* Update test case.

* Update.

* Update.

---------

Signed-off-by: Wang, Yang <yang4.wang@intel.com>
Co-authored-by: yanlan song <bell.song@intel.com>
Co-authored-by: River Li <river.li@intel.com>
Co-authored-by: Shen, Wanglei <wanglei.shen@intel.com>
---
 src/plugins/auto/plugin.cpp              | 139 ++++++++++++-----------
 src/plugins/auto/plugin_config.cpp       |   4 +
 src/plugins/auto/utils/plugin_config.hpp |  15 +--
 src/tests/unit/auto/get_device_list.cpp  | 117 ++++++++++++-------
 4 files changed, 163 insertions(+), 112 deletions(-)

diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp
index aa05129cb46f10..165f5c3db6cf3c 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@@ -850,83 +850,92 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
     std::string allDevices;
     auto deviceList = GetCore()->GetAvailableDevices();
     auto deviceListConfig = config.find(ov::device::priorities.name());
-    if (deviceListConfig->second.empty()) {
-        for (auto&& device : deviceList) {
-            // filter out the supported devices
-            if (!_pluginConfig.isSupportedDevice(device))
-                continue;
-            allDevices += device + ",";
-        }
-    } else {
+    for (auto&& device : deviceList) {
+        // filter out the supported devices
+        if (!_pluginConfig.isSupportedDevice(device))
+            continue;
+        allDevices += device + ",";
+    }
+    std::vector<std::string> devicesMerged;
+    if (deviceListConfig != config.end() && !deviceListConfig->second.empty()) {
         auto priorities = deviceListConfig->second;
         // parsing the string and splitting the comma-separated tokens
-        std::vector<std::string> deviceVec = _pluginConfig.ParsePrioritiesDevices(priorities);
-        std::vector<std::string> devicesToBeDeleted;
-        auto updateDeviceVec = [&](const std::string& delPattern = "") {
-            auto iter = deviceVec.begin();
-            while (iter != deviceVec.end()) {
-                if (delPattern.empty()) {
-                    if ((*iter).find("-") == 0) {
-                        devicesToBeDeleted.push_back((*iter).erase(0, 1));
-                        iter = deviceVec.erase(iter);
-                    } else {
-                        iter++;
-                    }
-                } else {
-                    if ((*iter).find(delPattern) != std::string::npos)
-                        iter = deviceVec.erase(iter);
-                    else
-                        iter++;
-                }
-            }
+        std::vector<std::string> devicesToBeMerged = _pluginConfig.ParsePrioritiesDevices(priorities);
+        std::vector<std::string> devicesToBeDeleted(devicesToBeMerged.size());
+        const auto& iterDel = std::copy_if(devicesToBeMerged.begin(),
+                                           devicesToBeMerged.end(),
+                                           devicesToBeDeleted.begin(),
+                                           [](const std::string& item) {
+                                               return item.front() == '-';
+                                           });
+        devicesToBeDeleted.resize(std::distance(devicesToBeDeleted.begin(), iterDel));
+        const auto& iterMerge =
+            std::remove_if(devicesToBeMerged.begin(), devicesToBeMerged.end(), [](const std::string& item) {
+                return item.front() == '-';
+            });
+        devicesToBeMerged.resize(std::distance(devicesToBeMerged.begin(), iterMerge));
+        for (auto&& device : devicesToBeDeleted)
+            LOG_INFO_TAG("remove %s from device candidate list", device.c_str());
+        auto isAnyDev = [](std::string& device, const std::vector<std::string>& devices) {
+            auto iter = std::find_if(devices.begin(), devices.end(), [device](const std::string& devItem) {
+                return devItem.find(device) != std::string::npos;
+            });
+            return iter != devices.end();
         };
-        updateDeviceVec();
-        if (devicesToBeDeleted.size() == 0) {
-            allDevices = deviceListConfig->second;
+        auto deviceWithDefaultID = [](std::string& device) {
+            // AUTO assume the default device ID will be "0" for the single device.
+            return device.find(".") == std::string::npos ? device + ".0" : device;
+        };
+        if (devicesToBeMerged.empty()) {
+            for (auto&& device : deviceList) {
+                if (isAnyDev(device, devicesToBeDeleted) || !_pluginConfig.isSupportedDevice(device))
+                    continue;
+                devicesMerged.push_back(device);
+            }
         } else {
-            auto deviceNeedToMerge = [&](const std::string& devicename) {
-                for (auto&& iter : devicesToBeDeleted) {
-                    if (iter.find(devicename) != std::string::npos)
-                        return true;
-                }
-                return false;
-            };
-            auto mergeDeviceList = [&]() {
-                std::vector<std::string> mergedList;
-                auto prevSize = mergedList.size();
-                for (auto&& iter : deviceVec) {
-                    for (auto&& viter : deviceList) {
-                        if (viter.find(iter) != std::string::npos && deviceNeedToMerge(iter))
-                            mergedList.push_back(std::move(viter));
+            for (auto&& device : devicesToBeMerged) {
+                if (!isAnyDev(device, deviceList)) {
+                    DeviceIDParser parsed{device};
+                    auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.getDeviceName());
+                    if (iter != devicesMerged.end() && parsed.getDeviceName() != device && parsed.getDeviceID() == "0")
+                        // The device is the device with default device ID (eg. GPU.0) and
+                        // its wide name (eg. GPU) has been in device candidate list.
+                        continue;
+                    // Add user specified device into candidate list
+                    devicesMerged.push_back(device);
+                } else {
+                    // Update device name if supported device with id existed
+                    for (auto&& item : deviceList) {
+                        auto realDevice = deviceWithDefaultID(item);
+                        if (isAnyDev(realDevice, devicesToBeDeleted) || item.find(device) == std::string::npos)
+                            continue;
+                        auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), deviceWithDefaultID(item));
+                        // Remove the device with default device id from candidate device list (eg. GPU.0)
+                        // if its wide name is a single device (eg. GPU).
+                        DeviceIDParser parsed{item};
+                        if (parsed.getDeviceName() == item && iter != devicesMerged.end())
+                            devicesMerged.erase(iter);
+                        // continue if targe device has been in the candidate device list.
+                        if (std::find(devicesMerged.begin(), devicesMerged.end(), item) != devicesMerged.end())
+                            continue;
+                        devicesMerged.push_back(item);
                     }
-                    // if virtual devices or mock devices
-                    if (mergedList.size() == prevSize)
-                        mergedList.push_back(std::move(iter));
-                    prevSize = mergedList.size();
                 }
-                return mergedList;
-            };
-
-            deviceVec = deviceVec.size() == 0 ? deviceList : mergeDeviceList();
-            for (auto& iter : devicesToBeDeleted) {
-                LOG_INFO_TAG("remove %s from device candidate list", iter.c_str());
-                updateDeviceVec(iter);
-            }
-            for (auto&& device : deviceVec) {
-                if (!_pluginConfig.isSupportedDevice(device))
-                    continue;
-                allDevices += device + ",";
             }
         }
     }
-
-    // remove the last ',' if exist
-    if (allDevices.back() == ',')
-        allDevices.pop_back();
-
+    if (devicesMerged.size()) {
+        allDevices.clear();
+        std::for_each(devicesMerged.begin(), devicesMerged.end(), [&allDevices](const std::string& device) {
+            allDevices += device + ",";
+        });
+    }
     if (allDevices.empty()) {
         IE_THROW() << "Please, check environment due to no supported devices can be used";
     }
+    // remove the last ',' if exist
+    if (allDevices.back() == ',')
+        allDevices.pop_back();
 
     return allDevices;
 }
diff --git a/src/plugins/auto/plugin_config.cpp b/src/plugins/auto/plugin_config.cpp
index b344585a06c096..7fffa536a5b566 100644
--- a/src/plugins/auto/plugin_config.cpp
+++ b/src/plugins/auto/plugin_config.cpp
@@ -5,6 +5,10 @@
 
 namespace MultiDevicePlugin {
 const std::set<std::string> PluginConfig::_availableDevices = {"AUTO", "CPU", "GPU", "TEMPLATE", "NVIDIA", "VPUX", "MULTI", "HETERO", "mock"};
+// AUTO will enable the blocklist if
+// 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);)
+// 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);).
+const std::set<std::string> PluginConfig::_deviceBlocklist = {"VPUX", "GNA"};
 
 PluginConfig::PluginConfig() {
     set_default();
diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/utils/plugin_config.hpp
index f0221d471920d4..243e1800199c19 100644
--- a/src/plugins/auto/utils/plugin_config.hpp
+++ b/src/plugins/auto/utils/plugin_config.hpp
@@ -205,7 +205,7 @@ class PluginConfig {
         if ((realEndPos = realDevName.find('(')) != std::string::npos) {
             realDevName = realDevName.substr(0, realEndPos);
         }
-        if (_availableDevices.end() == std::find(_availableDevices.begin(), _availableDevices.end(), realDevName)) {
+        if (_deviceBlocklist.end() != std::find(_deviceBlocklist.begin(), _deviceBlocklist.end(), realDevName)) {
             return false;
         }
         return true;
@@ -217,17 +217,13 @@ class PluginConfig {
         std::string::size_type endpos = 0;
         while ((endpos = priorities.find(separator, pos)) != std::string::npos) {
             auto subStr = priorities.substr(pos, endpos - pos);
-            if (!isSupportedDevice(subStr)) {
-                IE_THROW() << "Unavailable device name: " << subStr;
-            }
-            devices.push_back(subStr);
+            if (!subStr.empty())
+                devices.push_back(subStr);
             pos = endpos + 1;
         }
         auto subStr = priorities.substr(pos, priorities.length() - pos);
-        if (!isSupportedDevice(subStr)) {
-            IE_THROW() << "Unavailable device name: " << subStr;
-        }
-        devices.push_back(subStr);
+        if (!subStr.empty())
+            devices.push_back(subStr);
         return devices;
     }
 
@@ -239,5 +235,6 @@ class PluginConfig {
     std::map<std::string, BaseValidator::Ptr> property_validators;
     BaseValidator::Ptr device_property_validator;
     static const std::set<std::string> _availableDevices;
+    static const std::set<std::string> _deviceBlocklist;
 };
 } // namespace MultiDevicePlugin
diff --git a/src/tests/unit/auto/get_device_list.cpp b/src/tests/unit/auto/get_device_list.cpp
index 73e0a8938e3131..27456ffcd99368 100644
--- a/src/tests/unit/auto/get_device_list.cpp
+++ b/src/tests/unit/auto/get_device_list.cpp
@@ -23,6 +23,7 @@ using ::testing::Property;
 using ::testing::Eq;
 using ::testing::AnyNumber;
 using ::testing::ReturnRef;
+using ::testing::NiceMock;
 using ::testing::AtLeast;
 using ::testing::InvokeWithoutArgs;
 using Config = std::map<std::string, std::string>;
@@ -33,24 +34,35 @@ const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)";
 const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)";
 // const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU";
 // const char vpuxFullDeviceName[] = "";
-const std::vector<std::string> availableDevs = {"CPU", "GPU.0", "GPU.1", "VPUX", "UNSUPPORTED_DEVICE"};
+const std::vector<std::string> availableDevs = {"CPU", "GPU", "VPUX"};
+const std::vector<std::string> availableDevsWithId = {"CPU", "GPU.0", "GPU.1", "VPUX"};
+using Params = std::tuple<std::string, std::string>;
 using ConfigParams = std::tuple<
-        std::string,                        // Priority devices
-        std::string                         // expect metaDevices
+        std::vector<std::string>,           // Available devices retrieved from Core
+        Params                              // Params {devicePriority, expect metaDevices}
         >;
 class GetDeviceListTest : public ::testing::TestWithParam<ConfigParams> {
 public:
-    std::shared_ptr<MockICore>                      core;
-    std::shared_ptr<MockMultiDeviceInferencePlugin> plugin;
+    std::shared_ptr<NiceMock<MockICore>> core;
+    std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
 
 public:
     static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        Params priorityAndMetaDev;
         std::string priorityDevices;
         std::string metaDevices;
-        std::tie(priorityDevices, metaDevices) = obj.param;
+        std::vector<std::string> availableDevices;
+        std::tie(availableDevices, priorityAndMetaDev) = obj.param;
+        std::tie(priorityDevices, metaDevices) = priorityAndMetaDev;
         std::ostringstream result;
         result << "priorityDevices_" << priorityDevices;
         result << "_expectedDevices" << metaDevices;
+        result << "_availableDevicesList";
+        std::string devicesStr;
+        for (auto&& device : availableDevices) {
+            devicesStr += "_" + device;
+        }
+        result << devicesStr;
         return result.str();
     }
 
@@ -61,15 +73,12 @@ class GetDeviceListTest : public ::testing::TestWithParam<ConfigParams> {
 
     void SetUp() override {
        // prepare mockicore and cnnNetwork for loading
-       core  = std::shared_ptr<MockICore>(new MockICore());
-       auto* origin_plugin = new MockMultiDeviceInferencePlugin();
-       plugin  = std::shared_ptr<MockMultiDeviceInferencePlugin>(origin_plugin);
+       core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
+       auto* origin_plugin = new NiceMock<MockMultiDeviceInferencePlugin>();
+       plugin = std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>>(origin_plugin);
        // replace core with mock Icore
        plugin->SetCore(core);
 
-
-       ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
-
        ON_CALL(*plugin, GetDeviceList).WillByDefault([this](
                    const std::map<std::string, std::string>& config) {
                return plugin->MultiDeviceInferencePlugin::GetDeviceList(config);
@@ -79,40 +88,72 @@ class GetDeviceListTest : public ::testing::TestWithParam<ConfigParams> {
 
 TEST_P(GetDeviceListTest, GetDeviceListTestWithExcludeList) {
     // get Parameter
+    Params priorityAndMetaDev;
     std::string priorityDevices;
     std::string metaDevices;
-    std::tie(priorityDevices, metaDevices) = this->GetParam();
+    std::vector<std::string> availableDevs;
+    std::tie(availableDevs, priorityAndMetaDev) = this->GetParam();
+    std::tie(priorityDevices, metaDevices) = priorityAndMetaDev;
 
-    //EXPECT_CALL(*plugin, GetDeviceList(_)).Times(1);
+    ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
     EXPECT_CALL(*core, GetAvailableDevices()).Times(1);
     auto result = plugin->GetDeviceList({{ov::device::priorities.name(), priorityDevices}});
     EXPECT_EQ(result, metaDevices);
 }
 
-
-// ConfigParams details
-// example
-// ConfigParams {devicePriority, expect metaDevices, ifThrowException}
-
-const std::vector<ConfigParams> testConfigs = {
-    //
-    ConfigParams {"CPU,GPU,VPUX",
-        "CPU,GPU,VPUX"},
-    ConfigParams {"VPUX,GPU,CPU,-GPU.0",
-        "VPUX,GPU.1,CPU"},
-    ConfigParams {"-GPU.0,GPU,CPU",
-        "GPU.1,CPU"},
-    ConfigParams {"-GPU.0,GPU",
-        "GPU.1"},
-    ConfigParams {"-GPU.0", "CPU,GPU.1,VPUX"},
-    ConfigParams {"-GPU.0,-GPU.1", "CPU,VPUX"},
-    ConfigParams {"-GPU.0,-CPU", "GPU.1,VPUX"}
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, GetDeviceListTest,
-                ::testing::ValuesIn(testConfigs),
-            GetDeviceListTest::getTestCaseName);
+const std::vector<Params> testConfigsWithId = {Params{" ", " "},
+                                         Params{"", "CPU,GPU.0,GPU.1"},
+                                         Params{"CPU, ", "CPU, "},
+                                         Params{" ,CPU", " ,CPU"},
+                                         Params{"CPU,", "CPU"},
+                                         Params{"CPU,,GPU", "CPU,GPU.0,GPU.1"},
+                                         Params{"CPU, ,GPU", "CPU, ,GPU.0,GPU.1"},
+                                         Params{"CPU,GPU,GPU.1", "CPU,GPU.0,GPU.1"},
+                                         Params{"CPU,GPU,VPUX,INVALID_DEVICE", "CPU,GPU.0,GPU.1,VPUX,INVALID_DEVICE"},
+                                         Params{"VPUX,GPU,CPU,-GPU.0", "VPUX,GPU.1,CPU"},
+                                         Params{"-GPU.0,GPU,CPU", "GPU.1,CPU"},
+                                         Params{"-GPU.0,GPU", "GPU.1"},
+                                         Params{"-GPU,GPU.0", "GPU.0"},
+                                         Params{"-GPU.0", "CPU,GPU.1"},
+                                         Params{"-GPU.0,-GPU.1", "CPU"},
+                                         Params{"-GPU.0,-GPU.1,INVALID_DEVICE", "INVALID_DEVICE"},
+                                         Params{"-GPU.0,-GPU.1,-INVALID_DEVICE", "CPU"},
+                                         Params{"-GPU.0,-CPU", "GPU.1"}};
+
+const std::vector<Params> testConfigs = {Params{" ", " "},
+                                         Params{"", "CPU,GPU"},
+                                         Params{"GPU", "GPU"},
+                                         Params{"GPU.0", "GPU.0"},
+                                         Params{"GPU,GPU.0", "GPU"},
+                                         Params{"CPU", "CPU"},
+                                         Params{" ,CPU", " ,CPU"},
+                                         Params{" ,GPU", " ,GPU"},
+                                         Params{"GPU, ", "GPU, "},
+                                         Params{"CPU,GPU", "CPU,GPU"},
+                                         Params{"CPU,-GPU", "CPU"},
+                                         Params{"CPU,-GPU,GPU.0", "CPU,GPU.0"},
+                                         Params{"CPU,-GPU,GPU.1", "CPU,GPU.1"},
+                                         Params{"CPU,GPU,-GPU.0", "CPU"},
+                                         Params{"CPU,GPU,-GPU.1", "CPU,GPU"},
+                                         Params{"CPU,GPU.0,GPU", "CPU,GPU"},
+                                         Params{"CPU,GPU,GPU.0", "CPU,GPU"},
+                                         Params{"CPU,GPU,GPU.1", "CPU,GPU,GPU.1"},
+                                         Params{"CPU,GPU.1,GPU", "CPU,GPU.1,GPU"},
+                                         Params{"CPU,VPUX", "CPU,VPUX"},
+                                         Params{"CPU,-VPUX", "CPU"},
+                                         Params{"CPU,-INVALID_DEVICE", "CPU"},
+                                         Params{"CPU,GPU,VPUX", "CPU,GPU,VPUX"}};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests_GetDeviceListWithID,
+                         GetDeviceListTest,
+                         ::testing::Combine(::testing::Values(availableDevsWithId),
+                                            ::testing::ValuesIn(testConfigsWithId)),
+                         GetDeviceListTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests_GetDeviceList,
+                         GetDeviceListTest,
+                         ::testing::Combine(::testing::Values(availableDevs), ::testing::ValuesIn(testConfigs)),
+                         GetDeviceListTest::getTestCaseName);
 
 //toDo need add test for ParseMetaDevices(_, config) to check device config of
 //return metaDevices

From 5d6cd626bc91595e0b1d12181d604216de43fca1 Mon Sep 17 00:00:00 2001
From: "Min, Byungil" <byungil.min@intel.com>
Date: Tue, 21 Mar 2023 18:55:06 +0900
Subject: [PATCH 010/296] Fix unit test on dGPU (#16295)

* Resolve failed cases and queue-type issue
+ Resolved out_of_order queue-type issue
+ Added get_test_default_config for setting default config of onednn
+ Cleared failed case

Signed-off-by: Min, Byungil <byungil.min@intel.com>
Co-authored-by: tuxedcat <tuxedcat@gmail.com>
---
 .../dynamic_execution/memory_realloc_test.cpp |   2 +-
 .../tests/fusions/activation_fusion_test.cpp  |   2 +-
 .../tests/fusions/concatenate_fusion_test.cpp |  10 +-
 .../tests/fusions/convolution_fusion_test.cpp |   4 +-
 .../tests/fusions/fusion_test_common.hpp      |   7 +-
 .../tests/fusions/gemm_fusion_test.cpp        |   1 -
 .../tests/fusions/lrn_fusion_test.cpp         |   2 +-
 .../tests/fusions/pooling_fusion_test.cpp     |  13 +-
 .../graph_manipulation_gpu_test.cpp           |   6 +-
 .../intel_gpu/tests/passes/handle_reshape.cpp |   2 +-
 .../passes/prepare_buffer_fusing_test.cpp     |   4 +-
 .../passes/prepare_primitive_fusing_test.cpp  |  18 +-
 .../remove_redundant_reorders_tests.cpp       |   2 +-
 .../tests/passes/reorder_inputs_test.cpp      |  10 +-
 .../passes/select_preferred_formats_test.cpp  |   2 +-
 .../passes/test_module_fusing_reorder.cpp     |  33 +++-
 .../tests/shape_infer/broadcast_si_test.cpp   |   7 +-
 .../test_cases/activation_simple_gpu_test.cpp |  60 +++---
 .../adaptive_avg_pooling_gpu_test.cpp         |   2 +-
 .../adaptive_max_pooling_gpu_test.cpp         |   4 +-
 .../test_cases/add_reorders_gpu_test.cpp      |   4 +-
 .../tests/test_cases/arg_max_gpu_test.cpp     |  24 +--
 .../tests/test_cases/barriers_test.cpp        |   8 +-
 .../test_cases/batch_to_space_gpu_test.cpp    |  28 +--
 .../binary_convolution_gpu_test.cpp           |   6 +-
 .../tests/test_cases/border_gpu_test.cpp      |  46 ++---
 .../tests/test_cases/broadcast_gpu_test.cpp   |   7 +-
 .../tests/test_cases/bucketize_gpu_test.cpp   |   2 +-
 .../tests/test_cases/cl_mem_input_test.cpp    |   4 +-
 .../tests/test_cases/command_queue_test.cpp   |  44 ++++-
 .../test_cases/concatenation_gpu_test.cpp     |  38 ++--
 .../tests/test_cases/condition_gpu_test.cpp   |  18 +-
 .../test_cases/convert_color_gpu_test.cpp     |  20 +-
 .../tests/test_cases/convolution_gpu_test.cpp | 187 +++++++++---------
 .../tests/test_cases/crop_gpu_test.cpp        |  58 +++---
 .../tests/test_cases/ctc_loss_gpu_test.cpp    |   2 +-
 .../tests/test_cases/cum_sum_gpu_test.cpp     |   6 +-
 .../test_cases/custom_gpu_primitive_test.cpp  |  12 +-
 .../test_cases/deconvolution_gpu_test.cpp     |  77 ++++----
 .../test_cases/depth_concatenate_gpu_test.cpp |  38 ++--
 .../test_cases/depth_to_space_gpu_test.cpp    |  16 +-
 .../test_cases/detection_output_test.cpp      |  26 +--
 .../tests/test_cases/dft_gpu_test.cpp         |   6 +-
 .../tests/test_cases/eltwise_gpu_test.cpp     | 126 ++++++------
 .../test_cases/embedding_bag_gpu_test.cpp     |  34 ++--
 .../test_cases/empty_tensor_gpu_test.cpp      |   2 +-
 ...al_detectron_detection_output_gpu_test.cpp |   6 +-
 ...nerate_proposals_single_image_gpu_test.cpp |   4 +-
 ...etectron_prior_grid_generator_gpu_test.cpp |   2 +-
 ...tectron_roi_feature_extractor_gpu_test.cpp |   6 +-
 ...erimental_detectron_topk_rois_gpu_test.cpp |   6 +-
 .../extract_image_patches_gpu_test.cpp        |  14 +-
 .../intel_gpu/tests/test_cases/eye.cpp        |   2 +-
 .../test_cases/fully_connected_gpu_test.cpp   |  52 ++---
 .../test_cases/gather_elements_gpu_test.cpp   |   4 +-
 .../tests/test_cases/gather_gpu_test.cpp      |  54 ++---
 .../tests/test_cases/gather_nd_gpu_test.cpp   |   2 +-
 .../tests/test_cases/gather_tree_gpu_test.cpp |   2 +-
 .../tests/test_cases/gemm_gpu_test.cpp        |  16 +-
 .../generate_proposals_gpu_test.cpp           |  13 +-
 .../tests/test_cases/grid_sample_gpu_test.cpp |   2 +-
 .../tests/test_cases/hash_key_gpu_test.cpp    |  18 +-
 .../tests/test_cases/loop_gpu_test.cpp        |   6 +-
 .../tests/test_cases/lrn_gpu_test.cpp         |   8 +-
 .../test_cases/lstm_dynamic_gpu_test.cpp      |  14 +-
 .../tests/test_cases/lstm_gpu_test.cpp        |  18 +-
 .../tests/test_cases/matrix_nms_gpu_test.cpp  |   2 +-
 .../tests/test_cases/memory_test.cpp          |  22 ++-
 .../test_cases/multiclass_nms_gpu_test.cpp    |   4 +-
 .../test_cases/multiple_streams_gpu_test.cpp  |   2 +-
 .../tests/test_cases/mvn_gpu_test.cpp         |  38 ++--
 .../test_cases/non_max_suppression_test.cpp   |  18 +-
 .../tests/test_cases/non_zero_gpu_test.cpp    |  10 +-
 .../tests/test_cases/normalizel2_gpu_test.cpp |   2 +-
 .../tests/test_cases/one_hot_gpu_test.cpp     |  26 +--
 .../tests/test_cases/permute_gpu_test.cpp     |  62 +++---
 .../tests/test_cases/pooling_gpu_test.cpp     | 108 +++++-----
 .../tests/test_cases/prior_box_gpu_test.cpp   |   2 +-
 .../propagate_constants_gpu_test.cpp          |   2 +-
 .../test_cases/pyramid_roi_align_gpu_test.cpp |   2 +-
 .../tests/test_cases/quantize_gpu_test.cpp    |  20 +-
 .../test_cases/random_uniform_gpu_test.cpp    |   2 +-
 .../tests/test_cases/range_gpu_test.cpp       |  14 +-
 .../tests/test_cases/reduce_gpu_test.cpp      |  70 +++----
 .../tests/test_cases/region_yolo_gpu_test.cpp |   2 +-
 .../test_cases/removing_output_node_test.cpp  |   4 +-
 .../tests/test_cases/reorder_gpu_test.cpp     | 151 +++++++-------
 .../tests/test_cases/reorg_yolo_gpu_test.cpp  |   2 +-
 .../tests/test_cases/resample_gpu_test.cpp    |  57 +++---
 .../tests/test_cases/reshape_gpu_test.cpp     |  22 +--
 .../tests/test_cases/reverse_gpu_test.cpp     |   2 +-
 .../test_cases/reverse_sequence_gpu_test.cpp  |  26 +--
 .../tests/test_cases/roi_align_gpu_test.cpp   |   3 +-
 .../tests/test_cases/roi_pooling_gpu_test.cpp |   2 +-
 .../tests/test_cases/roll_gpu_test.cpp        |   2 +-
 .../scatter_elements_update_gpu_test.cpp      |   4 +-
 .../test_cases/scatter_nd_update_gpu_test.cpp |  72 +++----
 .../test_cases/scatter_update_gpu_test.cpp    |  32 +--
 .../tests/test_cases/select_gpu_test.cpp      |  82 ++++----
 .../test_cases/set_output_memory_gpu_test.cpp |  14 +-
 .../tests/test_cases/shape_of_gpu_test.cpp    |  10 +-
 .../test_cases/shuffle_channels_test.cpp      |  18 +-
 .../intel_gpu/tests/test_cases/slice.cpp      |   2 +-
 .../tests/test_cases/softmax_gpu_test.cpp     |  22 +--
 .../test_cases/space_to_batch_gpu_test.cpp    |  28 +--
 .../test_cases/space_to_depth_gpu_test.cpp    |  36 ++--
 .../spatial_concatenate_gpu_test.cpp          |  22 +--
 .../tests/test_cases/split_gpu_test.cpp       |  12 +-
 .../tests/test_cases/streams_test.cpp         |  10 +-
 .../test_cases/strided_slice_gpu_test.cpp     |  64 +++---
 .../test_device_mem_usage_estimation.cpp      |   8 +-
 .../tests/test_cases/tile_gpu_test.cpp        |  16 +-
 .../test_cases/trim_to_outputs_gpu_test.cpp   |   6 +-
 .../intel_gpu/tests/test_cases/variable.cpp   |   6 +-
 .../intel_gpu/tests/test_utils/test_utils.cpp |  35 +++-
 .../intel_gpu/tests/test_utils/test_utils.h   |   9 +
 116 files changed, 1270 insertions(+), 1196 deletions(-)

diff --git a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
index 891c4d7913ba03..682b9dc68c5646 100644
--- a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
+++ b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
@@ -42,7 +42,7 @@ TEST(softmax_gpu_dynamic_f32_test_upper_bound, input_same_values) {
         layout(ov::PartialShape{ov::Dimension{1, 10}, ov::Dimension{1, 10}, ov::Dimension{1, 10}, ov::Dimension{1, 10}},
                data_types::f32,
                format::bfyx);
-    network network(engine, topology(input_layout("input", in_layout), softmax("softmax", input_info("input"), 3)));
+    network network(engine, topology(input_layout("input", in_layout), softmax("softmax", input_info("input"), 3)), get_test_default_config(engine));
 
     // First run
     float out_buffer_1[out_size_1];
diff --git a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
index 34d73bbe75feb7..bf7378cfdab595 100644
--- a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
@@ -32,7 +32,7 @@ class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
     void execute(activation_test_params& p) {
         auto input_prim = get_mem(get_input_layout(p));
 
-        ExecutionConfig cfg;
+        ExecutionConfig cfg = get_test_default_config(engine);
         ov::intel_gpu::ImplementationDesc activation_impl = { p.input_format, p.kernel_name };
         cfg.set_property(ov::intel_gpu::optimize_data(true));
         cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "act", activation_impl } }));
diff --git a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp
index a22264e3b8b75e..c4dcbdee66d7c1 100644
--- a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp
@@ -42,12 +42,14 @@ class ConcatOneDNNFusingTest : public ::BaseFusingTest<concat_test_params> {
         ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl };
 
         // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
-        ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
+        ExecutionConfig cldnn_cfg = get_test_default_config(engine,
+                                  {ov::intel_gpu::queue_type(QueueTypes::in_order),
                                   ov::intel_gpu::optimize_data(true),
-                                  ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", cldnn_impl } })};
-        ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
+                                  ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", cldnn_impl } })});
+        ExecutionConfig onednn_cfg = get_test_default_config(engine,
+                                   {ov::intel_gpu::queue_type(QueueTypes::in_order),
                                    ov::intel_gpu::optimize_data(true),
-                                   ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", onednn_impl } })};
+                                   ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", onednn_impl } })});
         network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg);
         network network_fused_onednn(this->engine, this->topology_fused, onednn_cfg);
 
diff --git a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
index d88a740f441c80..1f4cef03037989 100644
--- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
@@ -208,7 +208,7 @@ class ConvFusingForceKernelTest : public BaseFusingTest<bc_force_kernel_params>
     public:
     void execute(bc_force_kernel_params& p) {
         auto input_prim = get_mem(get_input_layout(p));
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         ov::intel_gpu::ImplementationDesc conv_impl = { p.input_format, p.kernel_name };
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } }));
@@ -4178,8 +4178,6 @@ class PermuteOptimizingTestOnednn : public BaseFusingTest<convolution_test_param
         p.expected_fused_primitives = p.expected_fused_primitives_onednn;
 
         cldnn::memory::ptr input_prim = get_mem(get_input_layout(p));
-        cfg_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-        cfg_not_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
         network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
         network network_fused(this->engine, this->topology_fused, cfg_fused);
diff --git a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp
index 78a5781e93bf85..e50df802bb5fd4 100644
--- a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp
+++ b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp
@@ -31,13 +31,12 @@ class BaseFusingTest : public ::testing::TestWithParam<T> {
     static const int max_random = 200;
 
     void SetUp() override {
+        cfg_fused = get_test_default_config(engine);
+        cfg_not_fused = get_test_default_config(engine);
+
         cfg_fused.set_property(ov::intel_gpu::optimize_data(true));
         cfg_not_fused.set_property(ov::intel_gpu::optimize_data(false));
         cfg_not_fused.set_property(ov::intel_gpu::allow_static_input_reorder(true));
-        if (engine.get_device_info().supports_immad) {
-            cfg_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-            cfg_not_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
-        }
     }
 
     void compare(network& not_fused, network& fused, T& p, bool count_reorder = false) {
diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
index 5e7ab52c861e09..34b35f26c054ef 100644
--- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
@@ -288,7 +288,6 @@ TEST_P(gemm_2in_add, eltwise_postop) {
     if (engine.get_device_info().supports_immad) {
         ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn };
         cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } }));
-        cfg_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
     }
 
     auto add_data_layout = get_output_layout(p);
diff --git a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
index b4d0a522ae1fed..9b35647f3c6f89 100644
--- a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
@@ -35,7 +35,7 @@ class LrnFusingTest : public ::BaseFusingTest<lrn_test_params> {
     void execute(lrn_test_params& p) {
         auto input_prim = get_mem(get_input_layout(p));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         ov::intel_gpu::ImplementationDesc lrn_impl = { p.input_format, p.kernel_name };
         config.set_property(ov::intel_gpu::optimize_data(true));
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "lrn_norm", lrn_impl } }));
diff --git a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp
index ca58be33337166..7c99523050cea8 100644
--- a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp
@@ -35,8 +35,9 @@ class PoolingFusingTest : public ::BaseFusingTest<pooling_test_params> {
     void execute(pooling_test_params& p) {
         if (engine.get_device_info().supports_immad)
             p.expected_fused_primitives = p.expected_fused_primitives_onednn;
+
         auto input_prim = get_mem(get_input_layout(p));
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         if (!p.kernel_name.empty()) {
             ov::intel_gpu::ImplementationDesc impl = { p.input_format, p.kernel_name };
@@ -540,12 +541,14 @@ class PoolingOneDNNFusingTest : public ::BaseFusingTest<pooling_test_params> {
         ov::intel_gpu::ImplementationDesc onednn_impl = { p.input_format, "", impl_types::onednn };
         ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl };
 
-        ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
+        ExecutionConfig cldnn_cfg = get_test_default_config(engine,
+                                  {ov::intel_gpu::queue_type(QueueTypes::in_order),
                                   ov::intel_gpu::optimize_data(true),
-                                  ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", cldnn_impl } })};
-        ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
+                                  ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", cldnn_impl } })});
+        ExecutionConfig onednn_cfg = get_test_default_config(engine,
+                                   {ov::intel_gpu::queue_type(QueueTypes::in_order),
                                    ov::intel_gpu::optimize_data(true),
-                                   ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", onednn_impl } })};
+                                   ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", onednn_impl } })});
 
         // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
         network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg);
diff --git a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp
index 52adccb6f97a71..7cad52b8497f73 100644
--- a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp
@@ -28,7 +28,7 @@ using namespace ::tests;
    in similar way as it is done in tests utilizing clDNN API */
 TEST(basic, test1) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
@@ -67,9 +67,9 @@ TEST(basic, test1) {
 // Thus, a single method from program like add_intermediate might be tested separately.
 TEST(add_intermediate_gpu, test1)
 {
-    ExecutionConfig config;
     topology topology;
     auto& engine = get_test_engine();
+    ExecutionConfig config = get_test_default_config(engine);
 
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} });
@@ -124,9 +124,9 @@ TEST(add_intermediate_gpu, test1)
 // Disabled for now as it produces wrong results
 TEST(add_intermediate_gpu, test2)
 {
-    ExecutionConfig config;
     topology topology;
     auto& engine = get_test_engine();
+    ExecutionConfig config = get_test_default_config(engine);
 
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
diff --git a/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp b/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp
index d1d9b9a592ed54..7634b0347112d8 100644
--- a/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp
@@ -35,7 +35,7 @@ TEST(handle_reshape, dont_remove_reshape_that_changes_rank) {
     topology.add(reshape("reshape", input_info("e1"), false, {1}, {1}));
     topology.add(eltwise("e2", input_info("reshape"), input_info("data1"), eltwise_mode::sum));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto prog = program::build_program(engine, topology, config, false, true);
diff --git a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
index eab4f99a3c7217..0a5358d19fdccc 100644
--- a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
@@ -34,7 +34,7 @@ TEST(prepare_buffer_fusing, optimize_reshape) {
     topology.add(permute("permute2", input_info("reshape"), {0, 3, 2, 1}));
     topology.add(reorder("reorder", input_info("permute2"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
 
@@ -76,7 +76,7 @@ TEST(prepare_buffer_fusing, static_node_after_optimized_out_dyn_reshape) {
     topology.add(fully_connected("fc", input_info("reshape"), "weights", "", {}, 2));
     topology.add(reorder("reorder", input_info("fc"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
     ASSERT_NE(prog, nullptr);
diff --git a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp
index b4175a32ed778d..6a265633151451 100644
--- a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp
@@ -35,7 +35,7 @@ TEST(prepare_primitive_fusing, fuse_activation_to_fc_dyn) {
     topology.add(activation("act", input_info("fc"), activation_func::relu));
     topology.add(reorder("reorder", input_info("act"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
 
@@ -61,7 +61,7 @@ TEST(prepare_primitive_fusing, dont_fuse_incompatible_eltwise) {
     topology.add(eltwise("eltw", { input_info("input"), input_info("reduce") }, eltwise_mode::sum));
     topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
 
@@ -87,7 +87,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal) {
     topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum));
     topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -129,7 +129,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal) {
     topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input")}, eltwise_mode::sum));
     topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -185,7 +185,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_const) {
     topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum));
     topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -239,7 +239,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal_scalar_const_broadca
     topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum));
     topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -296,7 +296,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_1) {
     topology.add(activation("act_fc2", input_info("eltw"), activation_func::relu));
     topology.add(reorder("reorder", input_info("act_fc2"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -365,7 +365,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_2) {
     topology.add(activation("act_fc3", input_info("eltw"), activation_func::relu));
     topology.add(reorder("reorder", input_info("act_fc3"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
@@ -428,7 +428,7 @@ TEST(prepare_primitive_fusing, dont_remove_only_dep_reshape) {
     topology.add(reshape("reshape2", input_info("reshape1"), true, output_pattern, ov::PartialShape::dynamic(4)));
     topology.add(gemm("gemm", { input_info("reshape2"), input_info("input2") }, data_types::f32, false, false));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     auto prog = program::build_program(engine, topology, config, false, true);
diff --git a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
index b2c62a5506b302..0d679659d6671d 100644
--- a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
+++ b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
@@ -45,7 +45,7 @@ TEST(remove_redundant_reorders, remove_dep_dynamic) {
     topology.add(reorder("reorder", input_info("conv"), format::any, data_types::f32));
     topology.add(softmax("softmax", input_info("reorder"), 1));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp
index eeace41001b5f8..7619c70c730ca2 100644
--- a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp
@@ -42,7 +42,7 @@ TEST(reorder_inputs, propagation) {
     topology.add(pooling("pool", input_info("conv1"), pooling_mode::max, { 1, 1 }, { 1, 1 }));
     topology.add(convolution("conv2", input_info("pool"), { "weights" }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto prog = program::build_program(engine, topology, config);
 
@@ -79,7 +79,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) {
 
     ov::intel_gpu::ImplementationDesc pool_impl = { format::yxfb, "" };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} }));
 
     network network(engine, topology, config);
@@ -117,7 +117,7 @@ TEST(reorder_inputs, impl_forcing_not_existing) {
 
     ov::intel_gpu::ImplementationDesc pool_impl = { format::any, "NOT_EXISTING" };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} }));
 
     ASSERT_ANY_THROW(network network(engine, topology, config));
@@ -133,7 +133,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
 
     ov::intel_gpu::ImplementationDesc actv_impl = { format::yxfb, "activation_ref" };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"actv", actv_impl} }));
 
     network network(engine, topology, config);
@@ -189,7 +189,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
 //    for (auto impl : possible_impls) {
 //        SCOPED_TRACE(to_string(impl));
 //
-//        ExecutionConfig config;
+//        ExecutionConfig config = get_test_default_config(engine);
 //        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} }));
 //
 //        network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp
index 17ce61f257166e..90346e82de1e1e 100644
--- a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp
@@ -33,7 +33,7 @@ TEST(test_select_preferred_formats, setting_target_conv_format) {
     topology.add(reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f16));
     topology.add(convolution("conv1", input_info("reorder"), { "weights" }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} }));
diff --git a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp
index 7f294eaea71de1..4de2436e999c4b 100644
--- a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp
+++ b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp
@@ -63,7 +63,7 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_onednn)
     topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false));
     topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
@@ -100,7 +100,7 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_cldnn)
     topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false));
     topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false);
@@ -172,7 +172,7 @@ TEST_P(test_fused_reorder_deep_depth, no_removal_for_deep_depth_conv)
     topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }));
     topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
@@ -223,7 +223,13 @@ TEST_P(test_can_fuse_reorder_cldnn, reorder_for_firstconv_cldnn)
     topology.add(cldnn::convolution("conv2", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, p.out_shape, p.input_data_type, false));
     topology.add(reorder("reorder_conv", input_info("conv2"), reorder_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    if (engine.get_device_info().supports_immad) {
+        // Enable this test for out_of_order queue-type if Onednn supports out_of_order
+        return;
+    }
+
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false);
@@ -269,7 +275,7 @@ TEST_P(test_can_fuse_reorder_onednn, reorder_for_firstconv_onednn)
     topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }));
     topology.add(reorder("reorder_result", input_info("conv"), reorder_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
@@ -326,7 +332,12 @@ TEST_P(can_fuse_reorder, surface_input_reorder) {
 
     topology.add(input_layout_prim, weights_data_prim, surface_input_reorder_prim, conv_input_reorder_prim, conv_prim);
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    if (engine.get_device_info().supports_immad) {
+        // Enable this test for out_of_order queue-type if Onednn supports out_of_order
+        return;
+    }
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, lo);
@@ -384,7 +395,13 @@ TEST_P(can_fuse_reorder, surface_input_reorder_batched) {
                  surface_input_reorder_prim1, surface_input_reorder_prim2,
                  conv_input_reorder_prim, concat, conv_prim);
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    if (engine.get_device_info().supports_immad) {
+        // Enable this test for out_of_order queue-type if Onednn supports out_of_order
+        return;
+    }
+
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, lo);
@@ -437,7 +454,7 @@ TEST_P(test_can_fuse_reorder_onednn_errata, errata_case_for_conv) {
     topology.add(convolution("conv", { input_info("reorder_conv") }, { "weights" }));
     topology.add(reorder("reorder_result", input_info("conv"), p.conv_layout));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    ExecutionConfig cfg = get_test_default_config(engine);
     program::ptr prog = program::build_program(engine, topology, cfg, false, true);
     layout_optimizer lo = layout_optimizer();
     lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
diff --git a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp
index bd41b1cf042de6..32f9e8e2f9c425 100644
--- a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp
+++ b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp
@@ -104,10 +104,9 @@ TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) {
                 broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode)
     );
 
-    ExecutionConfig config {
-        ov::intel_gpu::optimize_data(true),
-        ov::intel_gpu::allow_new_shape_infer(true)
-    };
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    config.set_property(ov::intel_gpu::optimize_data(true));
 
     std::vector<int32_t> input_data(p.data_layout.get_linear_size(), 1);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp
index 0d444a85902695..ce292cfc5c2eb6 100644
--- a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp
@@ -36,7 +36,7 @@ TEST(activation_f32_fw_gpu, dynamic) {
         topology topology(input_layout("input", in_layout));
         topology.add(activation("activation", input_info("input"), func));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         network network(engine, topology, config);
 
@@ -121,7 +121,7 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("not", input_info("input"), activation_func::negation));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -165,7 +165,7 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::erf));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -211,7 +211,7 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::hard_sigmoid, params));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -257,7 +257,7 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::reciprocal));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -304,7 +304,7 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::selu, params));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -351,7 +351,7 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::softplus));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -397,7 +397,7 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::softsign));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -433,7 +433,7 @@ TEST(activation_f16_fw_gpu, softsign_basic_yxfb) {
 
     topology topology(input_layout("input", input->get_layout()),
                       activation("not", input_info("input"), activation_func::softsign));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -478,7 +478,7 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) {
     topology topology(
             input_layout("input", input->get_layout()),
             activation("not", input_info("input"), activation_func::sign));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -516,7 +516,7 @@ TEST(activation_f32_fw_gpu, pow_basic_yxfb) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("pow", input_info("input"), activation_func::pow, { 2.0f, 0.0f }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -552,7 +552,7 @@ TEST(activation_f16_fw_gpu, pow_basic_yxfb) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("pow", input_info("input"), activation_func::pow, { FLOAT16(3.0f), FLOAT16(0.0f) }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -609,7 +609,7 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -685,7 +685,7 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -782,7 +782,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
                 topology.add(activation("activation", input_info("input"), "input_params", func));
             }
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             auto outputs = network.execute();
             ASSERT_EQ(outputs.size(), size_t(1));
@@ -932,7 +932,7 @@ TEST(activation_f16_fw_gpu, basic_bfyx_all_functions)
                 topology.add(activation("activation", input_info("input"), "input_params", func));
             }
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             auto outputs = network.execute();
             ASSERT_EQ(outputs.size(), size_t(1));
@@ -1010,7 +1010,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan)
         topology topology(input_layout("input", input->get_layout()));
         topology.add(activation("activation", input_info("input"), func));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
         auto outputs = network.execute();
         ASSERT_EQ(outputs.size(), size_t(1));
@@ -1096,7 +1096,7 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) {
             input_layout("input", input->get_layout()),
             reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })),
             activation("relu", input_info("reorder"), activation_func::acosh, {0.5f, 0.f}, padding{ { 0, 0, 0, 0 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.begin()->first, "relu");
@@ -1162,7 +1162,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) {
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })),
         activation("relu", input_info("reorder"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.begin()->first, "relu");
@@ -1249,7 +1249,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) {
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1, 0 }, 0 })),
         activation("relu", input_info("reorder"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.begin()->first, "relu");
@@ -1322,7 +1322,7 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) {
     topology topology(
         input_layout("input", input->get_layout()),
         activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 3, 3 }, 0 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -1365,7 +1365,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil)
         topology topology(input_layout("input", input->get_layout()));
         topology.add(activation("activation", input_info("input"), func));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
         auto outputs = network.execute();
         ASSERT_EQ(outputs.size(), size_t(1));
@@ -1429,7 +1429,7 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs)
         topology.add(input_layout("input", input->get_layout()));
         topology.add(activation("activation", input_info("input"), func));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
         auto outputs = network.execute();
 
@@ -1487,7 +1487,7 @@ TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(activation("activation", input_info("input"), func, params));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
         auto outputs = network.execute();
 
@@ -1553,7 +1553,7 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) {
         cldnn::reorder("out", input_info("actv"), cldnn::format::bfyx, cldnn::data_types::f32)
     );
 
-    cldnn::network net(eng, topo);
+    cldnn::network net(eng, topo, get_test_default_config(eng));
     set_values(in_mem, flatten_4d(format::bfyx, in_data));
     net.set_input_data("in", in_mem);
 
@@ -1693,7 +1693,8 @@ struct activation_random_test : testing::TestWithParam<activation_random_test_pa
         prim.additional_params = additional_params;
         topo.add(prim);
 
-        ExecutionConfig config{ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation"})};
+        ExecutionConfig config = get_test_default_config(engine,
+                                    ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation"}));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1714,10 +1715,9 @@ struct activation_random_test : testing::TestWithParam<activation_random_test_pa
 
         auto activation_impl_desc = ov::intel_gpu::ImplementationDesc();
         activation_impl_desc.output_format = input_format;
-        ExecutionConfig config_opt{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation_blocked", "res_to_input_format"}),
-            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"activation_blocked", {input_format, "activation_ref"}}})
-        };
+        ExecutionConfig config_opt = get_test_default_config(engine,
+                                        {ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation_blocked", "res_to_input_format"}),
+                                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"activation_blocked", {input_format, "activation_ref"}}})});
 
         network net_opt(engine, topo_opt, config_opt);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp
index 57b627a76d5cbb..55e12ebc384e02 100644
--- a/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp
@@ -135,7 +135,7 @@ struct adaptive_avg_pooling_test
         topology.add(adaptive_pooling("adaptive_avg_pooling_blocked", input_info("input_reordered"), params.outputTensor));
         topology.add(reorder("adaptive_avg_pooling", input_info("adaptive_avg_pooling_blocked"), plain_layout, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp
index 2c582dd4b57b66..faa9520af558dd 100644
--- a/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp
@@ -162,7 +162,7 @@ struct adaptive_max_pooling_test
             result_id = reorder_result_id;
         }
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data(input_data_id, input_mem);
 
@@ -192,7 +192,7 @@ struct adaptive_max_pooling_test
             cldnn::topology reorder_topology;
             reorder_topology.add(input_layout("indices", indices_layout));
             reorder_topology.add(reorder("plane_indices", input_info("indices"), plain_layout, data_types::i32));
-            cldnn::network reorder_net{engine, reorder_topology};
+            cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)};
             reorder_net.set_input_data("indices", indices_mem);
             const auto second_output_result = reorder_net.execute();
             const auto plane_indices_mem = second_output_result.at("plane_indices").get_memory();
diff --git a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
index bf962234217131..dee1ac04ec4705 100644
--- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
@@ -24,7 +24,7 @@ add_reorders optimization pass.
 //concatenation of incompatible convolutions
 TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(false));
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
@@ -123,7 +123,7 @@ void test_add_reorders_gpu_basic_reshape_and_tile(bool is_caching_test) {
     set_values(input, input_vec);
     tile_ref<T>(input, output_ref, 2, 4);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
index 5192ecf819b01b..752ab7270cbe92 100644
--- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
@@ -83,7 +83,7 @@ TYPED_TEST(argmax_gpu_test, base) {
                                     /*b1f3*/ 4.f,  0.5f,  8.f,   8.2f};
     set_values(input, this->getTypedVector(input_vec));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -127,7 +127,7 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -194,7 +194,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -259,7 +259,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -316,7 +316,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -360,7 +360,7 @@ TEST(top_k_layer_tests, second_output) {
                                     /*b1f3*/ 4.f,  0.5f,  8.f,   8.2f};
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -454,7 +454,7 @@ TEST(top_k_layer_tests, second_output2) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -539,7 +539,7 @@ TEST(top_k_layer_tests, multiple_outputs) {
 
     set_values(input, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
@@ -601,7 +601,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -658,7 +658,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
 
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -701,7 +701,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test)
 
     set_values(input, input_vec);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     auto outputs = network->execute();
@@ -851,7 +851,7 @@ void test_top_k_layer_md_sync(bool is_caching_test) {
                              true));
     topology.add(mutable_data("arg_max.1", { input_info("arg_max.0") }, shared_memory));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input1", input1);
     auto outputs = network->execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp
index 73fc50647464bf..a739d6397216e4 100644
--- a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp
@@ -41,7 +41,13 @@ TEST(DISABLED_oooq_test, simple) {
     tpl.add(reorder("r8", input_info("c6"), concat_layout, std::vector<float>{ 8 }));
     tpl.add(concatenation("c9", { input_info("r7"), input_info("r8") }, 2));
 
-    ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    ExecutionConfig cfg = get_test_default_config(*eng);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    if (eng->get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order queue-type
+        return;
+    }
+
     network net{ *eng, tpl, cfg };
 
     net.set_input_data("in", input_mem);
diff --git a/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp
index 3edf37b85ee59e..36eb362a7034bb 100644
--- a/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp
@@ -38,7 +38,7 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) {
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {1,2,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -85,7 +85,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) {
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {1,6,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -135,7 +135,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) {
                                                                        tensor(format::bfyx, {0,0,1,0}, 0),
                                                                        tensor(format::bfyx, {0,1,0,1}, 0),
                                                                        tensor(format::bfyx, {1,5,1,1}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -182,7 +182,7 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) {
                                                                        tensor(format::bfzyx, {0,2,0,0,0}, 0),
                                                                        tensor(format::bfzyx, {0,0,1,1,0}, 0),
                                                                        tensor(format::bfzyx, {1,2,2,1,1}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -231,7 +231,7 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) {
                                                                        tensor(format::bfwzyx, {0,0,1,0,0,0}, 0),
                                                                        tensor(format::bfwzyx, {0,0,0,2,0,0}, 0),
                                                                        tensor(format::bfwzyx, {1,1,3,1,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -281,7 +281,7 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) {
                                                                            tensor(format::bfyx, {1,16,1,2}, 1)));
     topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f16));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -332,7 +332,7 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) {
                                                                            tensor(format::bfyx, {1,6,1,4}, 1)));
     topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f16));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -377,7 +377,7 @@ TEST(batch_to_space_fp32_gpu, i8111_bs1222_cb0000_ce0000) {
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {1,2,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -424,7 +424,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0000_ce0000) {
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {0,0,0,0}, 0),
                                                                        tensor(format::bfyx, {1,6,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -474,7 +474,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0010_ce0101) {
                                                                        tensor(format::bfyx, {0,0,1,0}, 0),
                                                                        tensor(format::bfyx, {0,1,0,1}, 0),
                                                                        tensor(format::bfyx, {1,5,1,1}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -521,7 +521,7 @@ TEST(batch_to_space_fp32_gpu, i62121_bs12311_cb02000_ce00110) {
                                                                        tensor(format::bfzyx, {0,2,0,0,0}, 0),
                                                                        tensor(format::bfzyx, {0,0,1,1,0}, 0),
                                                                        tensor(format::bfzyx, {1,2,2,1,1}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -570,7 +570,7 @@ TEST(batch_to_space_fp32_gpu, i1212112_bs112321_cb02000_ce00110) {
                                                                        tensor(format::bfwzyx, {0,0,1,0,0,0}, 0),
                                                                        tensor(format::bfwzyx, {0,0,0,2,0,0}, 0),
                                                                        tensor(format::bfwzyx, {1,1,3,1,2,2}, 1)));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -624,7 +624,7 @@ TEST(batch_to_space_fp32_gpu, i21621_bs1112_cb0201_ce0810_b_fs_yx_fsv16) {
                                                                            tensor(format::bfyx, {1,6,1,1}, 1)));
     topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input);
 
@@ -677,7 +677,7 @@ void test_batch_to_space_fp32_gpu_i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16(bool
                                                                            tensor(format::bfyx, {1,8,3,1}, 1)));
     topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp
index 71e198f022e7b4..e4e6549db79022 100644
--- a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp
@@ -185,7 +185,7 @@ TEST_P(binary_convolution_test, conv) {
     if(engine.get_device_info().supports_immad)
         return;
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     topology topology_bin;
 
@@ -382,7 +382,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) {
                                padding{ { 0,0,0,0 }, 0 })
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
@@ -468,7 +468,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) {
                                padding{ { 0,0,0,0 }, 0 })
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp
index 83a9bc19940be3..2298f1e83397e1 100644
--- a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp
@@ -87,7 +87,7 @@ class border_test : public ::testing::TestWithParam<border_test_param<T>> {
                                    pad_mode,
                                    pad_value),
                             reorder("output", input_info("border"), cldnn::format::bfyx, T_dt));
-        cldnn::network::ptr target_network = get_network(engine, target_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr target_network = get_network(engine, target_topology,  get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         target_network->set_input_data("input", input);
         auto target_output = target_network->execute().at("output").get_memory();
         cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
@@ -102,7 +102,7 @@ class border_test : public ::testing::TestWithParam<border_test_param<T>> {
                                  pad_mode,
                                  pad_value));
 
-        cldnn::network base_network(engine, base_topology);
+        cldnn::network base_network(engine, base_topology, get_test_default_config(engine));
         base_network.set_input_data("input", input);
         auto base_output = base_network.execute().at("border").get_memory();
         cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
@@ -233,7 +233,7 @@ TEST(border_gpu, bsv16fsv16_without_reorder) {
                                ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()),
                                pad_mode,
                                pad_value));
-    cldnn::network target_network(engine, target_topology);
+    cldnn::network target_network(engine, target_topology, get_test_default_config(engine));
     target_network.set_input_data("input", input_b16f16);
     auto target_output = target_network.execute().at("border").get_memory();
     cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
@@ -247,7 +247,7 @@ TEST(border_gpu, bsv16fsv16_without_reorder) {
                              ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()),
                              pad_mode,
                              pad_value));
-    cldnn::network base_network(engine, base_topology);
+    cldnn::network base_network(engine, base_topology, get_test_default_config(engine));
     base_network.set_input_data("input", input);
     auto base_output = base_network.execute().at("border").get_memory();
     cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
@@ -290,7 +290,7 @@ TEST(border_gpu, zyx_bsv16fsv16) {
                                pad_mode,
                                pad_value),
                         reorder("output", input_info("border"), cldnn::format::bfzyx, T_dt));
-    cldnn::network target_network(engine, target_topology);
+    cldnn::network target_network(engine, target_topology, get_test_default_config(engine));
     target_network.set_input_data("input", input);
     auto target_output = target_network.execute().at("output").get_memory();
     cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
@@ -304,7 +304,7 @@ TEST(border_gpu, zyx_bsv16fsv16) {
                              ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()),
                              pad_mode,
                              pad_value));
-    cldnn::network base_network(engine, base_topology);
+    cldnn::network base_network(engine, base_topology, get_test_default_config(engine));
     base_network.set_input_data("input", input);
     auto base_output = base_network.execute().at("border").get_memory();
     cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
@@ -364,7 +364,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
     };
     set_values(input, input_data);
 
-    cldnn::network network(engine, topology);
+    cldnn::network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -441,7 +441,7 @@ TEST(border_gpu, basic_fsv16_0x0x1x2_0x0x3x4_border_constant) {
     };
     set_values(input, input_data);
 
-    cldnn::network network(engine, topology);
+    cldnn::network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -541,7 +541,7 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -647,7 +647,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) {
     };
     set_values(input, input_data);
 
-    cldnn::network network(engine, topology);
+    cldnn::network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -725,7 +725,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -798,7 +798,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -862,7 +862,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -940,7 +940,7 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1026,7 +1026,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1103,7 +1103,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1197,7 +1197,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1276,7 +1276,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1336,7 +1336,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1401,7 +1401,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1464,7 +1464,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1527,7 +1527,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -1594,7 +1594,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant_dynamic) {
     std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
     set_values(input, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
diff --git a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp
index 0ed3d621325b36..ba447364c2c343 100644
--- a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp
@@ -64,7 +64,8 @@ void start_broadcast_test(format cldnn_format, data_types cldnn_data_type, std::
 
     set_values(input, input_data);
 
-    network network(engine, topology);
+    ExecutionConfig cfg = get_test_default_config(engine);
+    network network(engine, topology, cfg);
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -140,7 +141,7 @@ void start_broadcast_test_dynamic(format input_format,
         set_values<int32_t>(target_shape_mem, target_shape_data);
     }
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     set_values(input, input_data);
@@ -215,7 +216,7 @@ void start_broadcast_test_5d(format cldnn_format, data_types cldnn_data_type, st
 
     set_values(input, input_data);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     auto outputs = network->execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp
index bfaa6383fd6e5c..34ab112ab3da5c 100644
--- a/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp
@@ -59,7 +59,7 @@ struct bucketize_test : testing::TestWithParam<bucketize_test_params<I, B, O>> {
         topology.add(
             reorder("plane_bucketize_left_bound", input_info("bucketize_left_bound"), format::bfyx, type_to_data_type<O>::value));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         network->set_input_data("buckets", buckets);
diff --git a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp
index 68b90c33a13f55..c7f574371f3f25 100644
--- a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp
@@ -127,7 +127,7 @@ void start_cl_mem_check_2_inputs(bool is_caching_test) {
     topology.add(input2);
     topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout));
 
-    cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input_memory);
     network->set_input_data("input2", input_memory2);
@@ -249,7 +249,7 @@ TEST(cl_mem_check, check_input) {
     topology.add(input);
     topology.add(reorder("reorder", input_info("input"), output_layout));
 
-    network network(*engine, topology);
+    network network(*engine, topology, get_test_default_config(*engine));
     network.set_input_data("input", input_memory);
 
     auto outputs = network.execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp
index 809aab070f9cf4..4d0b83b87fb231 100644
--- a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp
@@ -57,31 +57,55 @@ void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_
 }  // namespace
 
 TEST(command_queue_test, test_priority_hints) {
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order),
-                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::LOW)};
     auto engine = engine::create(engine_types::ocl, runtime_types::ocl);
+    ExecutionConfig cfg = get_test_default_config(*engine,
+                        {ov::intel_gpu::queue_type(QueueTypes::out_of_order),
+                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::LOW)});
+    if (engine->get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order queue-type
+        return;
+    }
+
     exexute_network(*engine, cfg);
 }
 
 TEST(command_queue_test, test_throttle_hints) {
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order),
-                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::HIGH)};
     auto engine = engine::create(engine_types::ocl, runtime_types::ocl);
+    ExecutionConfig cfg = get_test_default_config(*engine,
+                        {ov::intel_gpu::queue_type(QueueTypes::out_of_order),
+                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::HIGH)});
+    if (engine->get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order queue-type
+        return;
+    }
+
     exexute_network(*engine, cfg);
 }
 
 TEST(command_queue_test, test_priority_and_throttle_hints) {
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order),
-                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH),
-                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)};
     auto engine = engine::create(engine_types::ocl, runtime_types::ocl);
+    ExecutionConfig cfg = get_test_default_config(*engine,
+                        {ov::intel_gpu::queue_type(QueueTypes::out_of_order),
+                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH),
+                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)});
+    if (engine->get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order queue-type
+        return;
+    }
+
     exexute_network(*engine, cfg);
 }
 
 TEST(export_import_command_queue_test, test_priority_and_throttle_hints) {
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order),
-                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH),
-                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)};
     auto engine = engine::create(engine_types::ocl, runtime_types::ocl);
+    ExecutionConfig cfg = get_test_default_config(*engine,
+                        {ov::intel_gpu::queue_type(QueueTypes::out_of_order),
+                        ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH),
+                        ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)});
+    if (engine->get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order queue-type
+        return;
+    }
+
     exexute_network(*engine, cfg, true);
 }
diff --git a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp
index 62aa7d2fb82962..d3e0c90cd7ec06 100644
--- a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp
@@ -59,7 +59,7 @@ TEST(concat_gpu, mixed_input_types) {
                           padding{ { 0,0,0,0 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input0", input0);
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -326,7 +326,7 @@ TEST(concat_gpu, mixed_input_types_5d) {
                           padding{ { 0,0,0,0 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input0", input0);
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -399,7 +399,7 @@ TEST(concat_gpu, i8_optimization_with_pool) {
                                     data_types::i8,
                                     padding{{0, 0, 0, 0}, 0}),
                       reorder("reorder", input_info("concat"), reorder_layout));
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input0", input0);
@@ -501,7 +501,7 @@ TEST(concat_gpu, i8_optimization_with_conv) {
                       data("weights", weights),
                       convolution("conv", input_info("concat"), { "weights" }, { 2, 1 }),
                       reorder("output", input_info("conv"), reorder_layout));
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input0", input0);
@@ -602,7 +602,7 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) {
                       data("weights", weights),
                       convolution("conv", input_info("concat"), {"weights"}, {1, 1}, {0, 1}),
                       reorder("output", input_info("conv"), reorder_layout) );
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input0", input0);
@@ -775,7 +775,7 @@ struct concat_gpu_4d : public concat_gpu {
 
         topology.add(concatenation("concat", input_ids, 1));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         network network(engine, topology, config);
 
@@ -861,7 +861,7 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu {
 
         topology.add(concatenation("concat", input_ids, 3));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         network network(engine, topology, config);
 
@@ -1025,7 +1025,7 @@ struct concat_id_conv_gpu_4d : public concat_gpu {
         topology.add(data("weights", weights_mem));
         topology.add(convolution("conv", input_info("concat"), { "weights" }));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         auto conv_forcing = ov::intel_gpu::ImplementationDesc{ fmt, std::string() };
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {primitive_id("conv"), conv_forcing} }));
@@ -1198,13 +1198,13 @@ struct concat_gpu_4d_implicit : public concat_gpu {
         auto input = generate_input();
 
         // implicit concat
-        ExecutionConfig config1;
+        ExecutionConfig config1 = get_test_default_config(get_test_engine());
         config1.set_property(ov::intel_gpu::optimize_data(true));
         auto out_mem1 = run_concat_network(input, fmt, config1);
         cldnn::mem_lock<Type> out_ptr1(out_mem1, get_test_stream());
 
         // explicit concat
-        ExecutionConfig config2;
+        ExecutionConfig config2 = get_test_default_config(get_test_engine());
         config2.set_property(ov::intel_gpu::optimize_data(false));
         auto out_mem2 = run_concat_network(input, fmt, config2);
         cldnn::mem_lock<Type> out_ptr2(out_mem2, get_test_stream());
@@ -1285,9 +1285,9 @@ TEST(concat_gpu_onednn, basic_input_types) {
 
     ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn };
 
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                        ov::intel_gpu::custom_outputs(std::vector<std::string>{ "concat" }),
-                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"concat", impl} })};
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "concat" }));
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"concat", impl} }));
     network network(engine, topology, cfg);
     network.set_input_data("input0", input0);
     network.set_input_data("input1", input1);
@@ -1425,19 +1425,17 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu {
         auto input = generate_input();
 
         // implicit concat
-        ExecutionConfig config1;
+        ExecutionConfig config1 = get_test_default_config(engine);
         config1.set_property(ov::intel_gpu::optimize_data(true));
         ov::intel_gpu::ImplementationDesc impl = { fmt, std::string(""), impl_types::onednn };
         config1.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} }));
-        config1.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
         auto out_mem1 = run_concat_network(input, fmt, config1);
         cldnn::mem_lock<Type> out_ptr1(out_mem1, stream);
 
         // explicit concat
-        ExecutionConfig config2;
+        ExecutionConfig config2 = get_test_default_config(engine);
         config2.set_property(ov::intel_gpu::optimize_data(false));
-        config2.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
         auto out_mem2 = run_concat_network(input, fmt, config2);
         cldnn::mem_lock<Type> out_ptr2(out_mem2, stream);
 
@@ -1594,19 +1592,17 @@ struct concat_gpu_4d_explict : public concat_gpu {
         auto input = generate_input();
 
         // implicit concat when batch size is 1.
-        ExecutionConfig config1;
+        ExecutionConfig config1 = get_test_default_config(engine);
         config1.set_property(ov::intel_gpu::optimize_data(true));
         ov::intel_gpu::ImplementationDesc impl = { fmt, std::string(""), impl_types::onednn };
         config1.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"conv", impl}}));
-        config1.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
         auto out_mem1 = run_concat_network(input, fmt, config1);
         cldnn::mem_lock<Type> out_ptr1(out_mem1, stream);
 
         // explicit concat
-        ExecutionConfig config2;
+        ExecutionConfig config2 = get_test_default_config(engine);
         config2.set_property(ov::intel_gpu::optimize_data(false));
-        config2.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
         auto out_mem2 = run_concat_network(input, fmt, config2);
         cldnn::mem_lock<Type> out_ptr2(out_mem2, stream);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp
index 1583987ce8bee2..aaa32a4dc3ea51 100644
--- a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp
@@ -87,7 +87,7 @@ std::pair<std::vector<float>, std::vector<float>> get_values_to_compare(const cl
 
 TEST(DISABLED_condition_gpu, basic_equal_comp) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
@@ -139,7 +139,7 @@ TEST(DISABLED_condition_gpu, basic_equal_comp) {
 
 TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
@@ -212,7 +212,7 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
 
 TEST(DISABLED_condition_gpu, generic_test_true_false) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } });
     std::vector<float> input_data(50);
@@ -321,7 +321,7 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
         <prims...>
     */
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
@@ -391,7 +391,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) {
     <prims...>
     */
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
@@ -473,7 +473,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) {
 
 TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
@@ -497,7 +497,7 @@ TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
 
 TEST(DISABLED_condition_gpu, negative_too_big_offset) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
@@ -521,7 +521,7 @@ TEST(DISABLED_condition_gpu, negative_too_big_offset) {
 
 TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
@@ -552,7 +552,7 @@ TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
 
 TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
diff --git a/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp
index 5c1dbb41576ccb..77cd01b1b492a4 100644
--- a/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp
@@ -81,7 +81,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) {
     topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input_y", input_y);
     network.set_input_data("input_uv", input_uv);
 
@@ -120,7 +120,7 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) {
     topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input_y", input_y);
     network.set_input_data("input_uv", input_uv);
 
@@ -160,7 +160,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) {
     topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input_y", input_y);
     network.set_input_data("input_uv", input_uv);
 
@@ -200,7 +200,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) {
     topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input_y", input_y);
     network.set_input_data("input_uv", input_uv);
 
@@ -238,7 +238,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) {
     topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -274,7 +274,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) {
     topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -350,7 +350,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) {
     topology.add(convert_color("convert_color", { input_info("input"), input_info("input2") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::image, output_layout));
 
-    network network(*engine, topology);
+    network network(*engine, topology, get_test_default_config(*engine));
     network.set_input_data("input", input_memory);
     network.set_input_data("input2", input_memory2);
 
@@ -414,7 +414,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
     topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::image, output_layout));
 
-    network network(*engine, topology);
+    network network(*engine, topology, get_test_default_config(*engine));
     network.set_input_data("input", input_memory);
 
     auto outputs = network.execute();
@@ -507,7 +507,7 @@ TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) {
     topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_u"), input_info("input_v") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::buffer, output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input_y", input_y);
     network.set_input_data("input_u", input_u);
     network.set_input_data("input_v", input_v);
@@ -593,7 +593,7 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test
     topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
                                cldnn::convert_color::memory_type::image, output_layout));
 
-    cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input_memory);
     network->set_input_data("input2", input_memory2);
diff --git a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
index c8607728374de4..88b267e8796c1b 100644
--- a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
@@ -339,7 +339,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_
                     { 1, 4, 4, 4 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     network.set_input_data("trans", trans);
 
@@ -470,7 +470,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1)
                     { 1, 4, 4, 4 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     network.set_input_data("trans", trans);
 
@@ -633,7 +633,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) {
                     { 1, 4, 4, 4 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     network.set_input_data("trans", trans);
 
@@ -696,7 +696,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
         data("weights", weights),
         convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -773,7 +773,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
         convolution("conv", input_info("to_int"), { "weights" }, {2, 1 }),
         reorder("output", input_info("conv"), { data_types::f32, format::bfyx, { 1, 1, 3, 2 } }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -825,7 +825,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) {
         data("weights", weights),
         convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -961,7 +961,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) {
         data("biases", biases),
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, {1, 1, 1}, {0, 0, 0}, {1, 1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1091,7 +1091,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
         data("biases", biases),
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, 2, {1, 1, 1}, {0, 0, 0}, {1, 1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1140,7 +1140,7 @@ TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
         convolution::create_with_output_size("conv2", input_info("input"), { "weights2" }, { 1, 64, 320, 320 }, { 1, 1 }, { 3, 3 })
         );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1181,7 +1181,7 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
         convolution("conv3", input_info("conv2"), { "weights" })
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -1252,7 +1252,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
         data("biases", biases),
         convolution( "conv", input_info("input"), { "weights" }, { "biases" }, {2, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1310,7 +1310,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
         input_layout("weights", weights->get_layout()),
         input_layout("biases", biases->get_layout()),
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 }));
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -1371,7 +1371,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout_non_
         input_layout("weights", weights->get_layout()),
         data("biases", biases),
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 }));
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(false));
     network network(engine, topology, config, true);
     network.set_input_data("input", input);
@@ -1464,7 +1464,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
             padding{ { 0, 0, 0, 0 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1566,7 +1566,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
             padding{ { 0, 0, 0, 0 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1662,7 +1662,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
             { 3, 2 },
             padding{ { 0, 0, 0, 0 }, 0 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1769,7 +1769,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) {
             padding{ { 0, 0, 0, 0 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1878,7 +1878,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) {
             { 3, 2 },
             padding{ { 0, 0, 0, 0 }, 0 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1976,7 +1976,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
             padding{ { 0,0,x_pad,y_pad }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2077,7 +2077,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2147,7 +2147,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2205,7 +2205,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2259,7 +2259,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 } )
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2311,7 +2311,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 5, 5 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2370,7 +2370,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 5, 5 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2426,7 +2426,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2482,7 +2482,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2547,7 +2547,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
             padding{ { 0, 0, 1, 1 }, 0 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2627,7 +2627,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2725,7 +2725,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2789,7 +2789,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2847,7 +2847,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx)
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2904,7 +2904,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2999,7 +2999,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
                 { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3088,7 +3088,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3187,7 +3187,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3282,7 +3282,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
             { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3377,7 +3377,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
             { 1, 1, 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3458,7 +3458,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
             { 1, 1, 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3545,7 +3545,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
             { 1, 1, 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3624,7 +3624,7 @@ TEST(convolution_gpu, trivial_convolution_relu) {
         )
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3702,7 +3702,7 @@ TEST(convolution_gpu, relu_with_negative_slope) {
         )
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -3751,7 +3751,7 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
         conv_2
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -3930,7 +3930,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
             )
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -4005,7 +4005,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
             { 0, 0, 0 }
         } };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     set_values<int8_t>(input, {  1,  2, -3,  4, -5,
@@ -4080,7 +4080,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) {
         convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }, {0, 0}, { 1, 1 }, tensor{ 1, 2, 3, 2 }),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4154,7 +4154,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4225,7 +4225,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4310,7 +4310,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4411,7 +4411,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, data_types::f32, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4482,7 +4482,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -4680,7 +4680,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
         reorder("output", input_info("conv"), { data_types::f32, output_format, output_size })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -5033,7 +5033,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32)
     }
 
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
     config.set_property(ov::intel_gpu::optimize_data(true));
@@ -5134,7 +5134,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) {
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
     config.set_property(ov::intel_gpu::optimize_data(true));
@@ -5341,7 +5341,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop)
         }
     }
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
     config.set_property(ov::intel_gpu::optimize_data(true));
@@ -5415,7 +5415,7 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) {
                     padding{ { 0, 0, output_padding, output_padding }, 0 }),
         reorder("output", input_info("conv"), { data_types::f32, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }));
 
-    ExecutionConfig config_ref;
+    ExecutionConfig config_ref = get_test_default_config(engine);
 
     network network_ref(engine, topology_ref, config_ref);
     network_ref.set_input_data("input", input);
@@ -5437,7 +5437,7 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) {
             padding{ { 0, 0, output_padding, output_padding }, 0 }),
         reorder("output", input_info("conv"), { data_types::f32,format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }));
 
-    ExecutionConfig config_act;
+    ExecutionConfig config_act = get_test_default_config(engine);
 
     config_act.set_property(ov::intel_gpu::optimize_data(true));
 
@@ -5577,7 +5577,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16)
     }
 
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
@@ -5804,7 +5804,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32)
 
     topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, config);
@@ -5941,7 +5941,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16)
 
     topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f16));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, config);
@@ -6077,7 +6077,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops)
 
     topology.add(reorder("reorder_bfzyx", input_info("scale"), format::bfzyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, config);
@@ -6238,7 +6238,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32)
 
     topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfyx" }));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
@@ -6378,7 +6378,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16)
 
     topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f16));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfyx" }));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
@@ -6516,7 +6516,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops)
 
     topology.add(reorder("reorder_bfyx", input_info("scale"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv_bsv16_fsv16", "reorder_bfyx" }));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
@@ -6654,7 +6654,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32)
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
@@ -6797,7 +6797,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16)
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
@@ -6928,7 +6928,7 @@ TEST_P(convolution_depthwise_gpu_fsv16_xy, depthwise_conv_b_fs_yx_fsv16)
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
@@ -7018,7 +7018,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa
         convolution("conv", input_info("input_reordered"), { "weights" }, { "bias" }, num_groups, stride, pad, dilation, output_size, data_types::f32, true),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } }));
@@ -7131,7 +7131,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx)
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } }));
@@ -7452,7 +7452,7 @@ TEST_P(convolution_grouped_gpu, base) {
     if (has_comp)
         topology.add(data(comp_prim_name[0], comp));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv", "out" }));
     ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name };
@@ -7613,7 +7613,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) {
         conv_fsv.output_paddings = {padding({ 0, 0, output_padding, output_padding }, 0.f)};
         topology.add(conv_fsv);
     }
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
@@ -7721,7 +7721,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding)
     topology.add(reorder_bfyx);                                                                             // format 8 to 3 -> after fusing, removed
 
     // Exec ref network (non-fusing)
-    ExecutionConfig config_ref;
+    ExecutionConfig config_ref = get_test_default_config(engine);
     config_ref.set_property(ov::intel_gpu::optimize_data(false));
     config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true));
 
@@ -7733,7 +7733,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding)
     cldnn::mem_lock<FLOAT16> ref_out_ptr(ref_out_mem, get_test_stream());
 
     // Exec target network (fusing: conv+reorder)
-    ExecutionConfig config_target;
+    ExecutionConfig config_target = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" };
     config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
     config_target.set_property(ov::intel_gpu::optimize_data(true));
@@ -7817,7 +7817,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type)
     topology.add(reorder_bfyx);                                                                             // format 8 to 3 -> after fusing, removed
 
     // Exec ref network (non-fusing)
-    ExecutionConfig config_ref;
+    ExecutionConfig config_ref = get_test_default_config(engine);
     config_ref.set_property(ov::intel_gpu::optimize_data(false));
     config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true));
 
@@ -7829,7 +7829,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type)
     cldnn::mem_lock<float> ref_out_ptr(ref_out_mem, get_test_stream());
 
     // Exec target network (fusing: conv+reorder)
-    ExecutionConfig config_target;
+    ExecutionConfig config_target = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" };
     config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
     config_target.set_property(ov::intel_gpu::optimize_data(true));
@@ -7935,10 +7935,10 @@ class convolution_test_base {
 
         auto topo = build_topology(engine);
 
-        ExecutionConfig config{
-            ov::intel_gpu::optimize_data(true),
-            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { input_format(), "" } } })
-        };
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { input_format(), "" } } }));
+
         auto prog = program::build_program(engine, topo, config);
 
         cldnn::network net(prog, 0);
@@ -8295,10 +8295,10 @@ class convolution_random_test_fsv4_input : public convolution_random_test_base<I
 
         auto topo = this->build_topology(engine);
 
-        ExecutionConfig config{
-            ov::intel_gpu::optimize_data(true),
-            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { this->input_format(), "" } } })
-        };
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { this->input_format(), "" } } }));
+
         auto prog = program::build_program(engine, topo, config);
 
         cldnn::network net(prog, 0);
@@ -8691,7 +8691,7 @@ class convolution_test : public tests::generic_test {
         for (cldnn::data_types data_type : data_types) {
             for (cldnn::format input_format : input_formats) {
                 for (cldnn::format weights_format : weights_formats) {
-                    ExecutionConfig network_build_config;
+                    ExecutionConfig network_build_config = get_test_default_config(get_test_engine());
                     if (input_format == cldnn::format::bfyx) {
                         network_build_config.set_property(ov::intel_gpu::optimize_data(true));
                     }
@@ -9066,9 +9066,8 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
         topology.add(conv_fsv);
     }
     topology.add(reorder("reorder_bfyx", input_info("conv_fsv"), format::bfyx, data_types::f32));
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
-    config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"conv_fsv","reorder_bfyx"}));
     network network(engine, topology, config);
 
@@ -9134,19 +9133,17 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) {
     topology topology_test(input, weights, input_reorder, conv1, conv2, output_reorder);
     topology topology_ref(input, weights, input_reorder, conv1, conv2, output_reorder);
 
-    ExecutionConfig config_test;
+    ExecutionConfig config_test = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv1_impl_test = { format::byxf, "", impl_types::onednn };
     ov::intel_gpu::ImplementationDesc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl };
     config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } }));
     config_test.set_property(ov::intel_gpu::optimize_data(true));
-    config_test.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
-    ExecutionConfig config_ref;
+    ExecutionConfig config_ref = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv1_impl_ref = { format::bfyx, "", impl_types::ocl };
     ov::intel_gpu::ImplementationDesc conv2_impl_ref = { format::bfyx, "", impl_types::ocl };
     config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } }));
     config_ref.set_property(ov::intel_gpu::optimize_data(true));
-    config_ref.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
     network network_test(engine, topology_test, config_test);
     network network_ref(engine, topology_ref, config_ref);
@@ -9228,11 +9225,10 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_asymmetric_act
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }}));
     config.set_property(ov::intel_gpu::optimize_data(true));
-    config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -9319,11 +9315,10 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_asymmetric_act
                     { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", input_info("conv"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }}));
     config.set_property(ov::intel_gpu::optimize_data(true));
-    config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
 
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -9421,7 +9416,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1(
 
     topology.add(conv_fsv);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
@@ -9502,7 +9497,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias_swap_xy) {
         data("weights", weights),
         convolution("conv", input_info("input"), { "weights" }, { 1, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
index d0492aa55b19e8..0d828c66506fd1 100644
--- a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
@@ -57,7 +57,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -103,7 +103,7 @@ TEST(crop_gpu, basic_in2x2x2x3_crop_all) {
         input_vec.push_back(static_cast<float>(i));
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -152,7 +152,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all) {
     std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -200,7 +200,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all) {
     std::vector<int64_t> input_vec = generate_random_input<int64_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -248,7 +248,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -297,7 +297,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_bfyx) {
     std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -346,7 +346,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_bfyx) {
     std::vector<int64_t> input_vec = generate_random_input<int64_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -395,7 +395,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -442,7 +442,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_fyxb) {
     std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -489,7 +489,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_fyxb) {
     std::vector<int64_t> input_vec = generate_random_input<int64_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -553,7 +553,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) {
         -14.f, -15.f, -16.f, -17.f };
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -618,7 +618,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_offsets) {
         -14, -15, -16, -17 };
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -683,7 +683,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_offsets) {
         -14, -15, -16, -17 };
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -759,7 +759,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split) {
     std::vector<float> out1 = { -1.f, 2.f,-3.f };
     std::vector<float> out2 = { 4.f, };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
 
@@ -807,7 +807,7 @@ TEST(crop_gpu, basic_in1x4x1x1_crop_pad) {
     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
     std::vector<float> out1 = { -1.f, 2.f,-3.f };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
@@ -875,7 +875,7 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) {
     std::vector<int32_t> out1 = { -1, 2,-3 };
     std::vector<int32_t> out2 = { 4, };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
 
@@ -950,7 +950,7 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) {
     std::vector<int64_t> out1 = { -1, 2,-3 };
     std::vector<int64_t> out2 = { 4, };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
 
@@ -1028,10 +1028,10 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) {
     std::vector<float> out2 = { 4.f, };
     set_values(input, input_vec);
 
-    ExecutionConfig cfg{
-        ov::intel_gpu::enable_memory_pool(false),
-        ov::intel_gpu::optimize_data(true)
-    };
+    ExecutionConfig cfg = get_test_default_config(*engine);
+    cfg.set_property(ov::intel_gpu::enable_memory_pool(false));
+    cfg.set_property(ov::intel_gpu::optimize_data(true));
+
     network network(*engine, topology, cfg);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -1081,7 +1081,7 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1137,7 +1137,7 @@ TEST(crop_gpu, basic_in3x1x3x2x2x1_crop_all_bfwzyx) {
     VF<float> input_vec = flatten_6d<float>(format::bfwzyx, input_rnd);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1212,7 +1212,7 @@ TEST_P(crop_gpu, pad_test) {
         res.insert(res.end(), res_data.begin(), res_data.end());
     }
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
@@ -1289,7 +1289,7 @@ TEST(crop_gpu, dynamic_i32_in2x3x2x2_crop_offsets) {
         4, -5, 8, 8,
         -14, -15, -16, -17 };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
@@ -1348,7 +1348,7 @@ TEST(crop_gpu, dynamic_in1x4x1x1_split) {
     std::vector<float> out1 = { -1.0f, 2.0f };
     std::vector<float> out2 = { -3.0f, 4.0f };
     set_values(input_mem, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
@@ -1417,7 +1417,7 @@ TEST(crop_gpu, dynamic_in1x4x1x1_varaidic_split) {
     set_values(axis_mem, {1});
     set_values(splits_length_mem, splits_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
@@ -1470,7 +1470,7 @@ TEST(crop_gpu, static_split_batch) {
 
     set_values(input_mem, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids()));
 
@@ -1525,7 +1525,7 @@ TEST(crop_gpu, optimized_out_crop) {
     topology.add(crop("crop2", { input_info("crop1") }, tensor(5, 4, 1, 1), { tensor(0, 0, 0, 0) }, padding({0, 0, 0, 0}, {0, 0, 0, 0})));
     topology.add(reorder("reorder_out", input_info("crop2"), layout{ ov::PartialShape{5, 4, 1, 1}, data_types::f32, format::bfyx }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp
index c63e5ec4e1e250..cc1b6b5999ae6d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp
@@ -104,7 +104,7 @@ struct ctc_loss_gpu_test : public testing::TestWithParam<ctc_loss_test_params<TF
         topology.add(ctc_loss("ctc_loss", inputs_ids, p.preprocess_collapse_repeated, p.ctc_merge_repeated, p.unique));
         topology.add(reorder("reordered_ctc_loss", input_info("ctc_loss"), plane_format, float_data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         for (auto& input : inputs) {
             network->set_input_data(std::get<0>(input), std::get<1>(input));
diff --git a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp
index df45d0b8c802bf..1d6ef0d2c76b4e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp
@@ -185,7 +185,7 @@ class cum_sum_gpu : public ::testing::TestWithParam<cum_sum_params> {
         topology.add(input_layout("Input0", input->get_layout()));
         topology.add(cum_sum("cum_sum", input_info("Input0"), axis, exclusive, reverse));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input);
 
@@ -282,7 +282,7 @@ TEST(cum_sum_gpu_f16, DISABLED_basic_1d) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(cum_sum("cum_sum", input_info("Input0")));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input);
 
@@ -317,7 +317,7 @@ TEST(cum_sum_gpu_fp32, dynamic) {
     topology.add(input_layout("input", in_layout));
     topology.add(cum_sum("cum_sum", input_info("input")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
diff --git a/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp b/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp
index de8634e02d6390..7661adc7737231 100644
--- a/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp
@@ -82,7 +82,7 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -188,7 +188,7 @@ void add_basic_in2x2x2x2_with_reorder()
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.f,  0.f, -2.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -288,7 +288,7 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -381,7 +381,7 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -483,7 +483,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2
         4.f, -0.5f, 8.f,  8.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -547,7 +547,7 @@ void test_custom_gpu_primitive_u8_add_basic_in2x2x2x2(bool is_caching_test) {
          2, 60,  0, 20
     });
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("input2", input2);
diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
index 4c1a6431488d60..a218640354ae95 100644
--- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
@@ -175,7 +175,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -235,7 +235,7 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
         reorder("plane_output", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -297,7 +297,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Fil
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -359,7 +359,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_pad1) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -410,7 +410,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -475,7 +475,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_stride4_pad2) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -537,7 +537,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_stride2_pad1) {
         reorder("plane_output", input_info("deconv"), format::yxfb, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -584,7 +584,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
     //  f1: 17 - 13
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -665,7 +665,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -726,7 +726,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_p
         deconvolution("deconv", input_info("reorder"), { "weights" }, { "biases" }, { 2, 2 }, { 1, 1 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -776,7 +776,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd
     //  f1: 17 - 13
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
@@ -858,7 +858,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1)
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -907,7 +907,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pa
     auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     set_values(input, { FLOAT16(8.f), FLOAT16(0.5f),
@@ -996,7 +996,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1041,7 +1041,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1118,7 +1118,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group1
         deconvolution("deconv", input_info("reordered_input"), { "weights" }, { "bias" }, 16, { 2, 2 }, { 1, 1 }),
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1208,7 +1208,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group1
         deconvolution("deconv", input_info("reordered_input"), { "weights" }, { "bias" }, 16, { 2, 2 }, { 1, 1 }),
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1264,7 +1264,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1337,7 +1337,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) {
         reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1489,7 +1489,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) {
         reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1583,7 +1583,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad)
         reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1656,7 +1656,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) {
         reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1719,7 +1719,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_k9x9_s2x2_pad4x4) {
         reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f16)
     );
 
-    network network_ref(engine, topology_ref);
+    network network_ref(engine, topology_ref, get_test_default_config(engine));
     network_ref.set_input_data("input", input);
 
     auto outputs_ref = network_ref.execute();
@@ -1739,7 +1739,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_k9x9_s2x2_pad4x4) {
         reorder("out", input_info("deconv_act"), format::bfyx, data_types::f16)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network_act(engine, topology_act, config);
     network_act.set_input_data("input_act", input);
@@ -1797,7 +1797,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -1868,7 +1868,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad
             reorder("out", input_info("deconv"), format::bfyx, data_types::f16)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -1917,7 +1917,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -1965,7 +1965,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2011,7 +2011,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) {
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2065,7 +2065,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) {
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2107,7 +2107,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv
         reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2163,7 +2163,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2219,7 +2219,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2304,7 +2304,7 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid
             reorder("out", input_info("deconv"), format::bfzyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc impl = { format::bs_fs_zyx_bsv16_fsv16, "" };
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} }));
@@ -2357,7 +2357,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1
             reorder("out", input_info("deconv"), format::bfyx, data_types::f32)
     );
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
@@ -2714,7 +2714,7 @@ class deconvolution_random_test : public testing::TestWithParam<deconvolution_ra
         }
     }
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(get_test_engine());
 
 private:
     template <typename InputT, typename WeightsT, typename OutputT>
@@ -2971,8 +2971,9 @@ TEST(deconvolution_f32_fw_gpu_onednn, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
 
     ov::intel_gpu::ImplementationDesc conv_impl = { format::yxfb, "", impl_types::onednn };
 
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", conv_impl} })};
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", conv_impl} }));
     network network(engine, topology, cfg);
     network.set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp
index 71b1a4100995fc..01ca5a07dad41d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp
@@ -65,7 +65,7 @@ TEST(depth_concatenate_f32_gpu, test01) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(concatenation("depth1", { input_info("input1"), input_info("input2") }, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -127,7 +127,7 @@ void concat_basic_with_reorder() {
     topology.add(concatenation("depth1", { input_info("to_int1"), input_info("to_int2") }, 1));
     topology.add(reorder("to_float", input_info("depth1"), {data_types::f32, format::yxfb, {2, 5, 1, 1}}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -203,7 +203,7 @@ TEST(depth_concatenate_f32_gpu, test02) {
     topology.add(input_layout("input3", input3->get_layout()));
     topology.add(concatenation("depth1", { input_info("input1"), input_info("input2"), input_info("input3") }, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -253,7 +253,7 @@ TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) {
     topology.add(data("weights", weights));
     topology.add(convolution("conv", input_info("concat1"), {"weights"}));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input1", input1);
@@ -288,7 +288,7 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
     topology.add(concatenation("depth3", { input_info("relu4"), input_info("depth2") }, 1));
     topology.add(activation("relu5", input_info("depth3"), activation_func::relu));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -340,7 +340,7 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
     topology.add(concatenation("depth1", { input_info("input1"), input_info("input2") }, 1));
     topology.add(activation("relu1", input_info("depth1"), activation_func::relu));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -394,7 +394,7 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) {
     topology.add(concatenation("depth1", { input_info("reshape1"), input_info("reshape2") }, 1));
     topology.add(reorder("output", input_info("depth1"), format::bfyx, data_types::f32));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -453,7 +453,7 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) {
     topology.add(concatenation("depth2", { input_info("depth1"), input_info("conv") }, 1));
     topology.add(reorder("output", input_info("depth2"), format::bfyx, data_types::f32));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } }));
     network network(engine, topology, config);
@@ -529,7 +529,7 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) {
     topology.add(convolution("conv", input_info("depth1"), { "weights" }, {1, 1}, {1, 1}));
     topology.add(reorder("output", input_info("conv"), format::bfyx, data_types::f32));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } }));
     network network(engine, topology, config);
@@ -589,7 +589,7 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) {
     topology.add(activation("actv2", input_info("input2"), activation_func::linear, { 0.5f, 0.0f }));
     topology.add(concatenation("depth1", { input_info("actv1"), input_info("actv2") }, 1));
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -620,7 +620,7 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) {
 
 TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     const int in1_f = 2, in2_f = 1;
     const int b = 2, x = 2, y = 4;
     auto input1 = engine.allocate_memory({ data_types::f32, format::yxfb,{ b, in1_f, y, x } });
@@ -704,7 +704,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) {
 TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) {
 
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2,4,1,2 } });
 
     std::vector<float> values = {
@@ -742,7 +742,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) {
 
 TEST(depth_concatenate_i32_gpu, optimize_data01) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 1, 1}});
 
     topology topology;
@@ -769,7 +769,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data01) {
 
 TEST(depth_concatenate_i32_gpu, optimize_data02) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
     auto input2 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
     auto input3 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
@@ -836,7 +836,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) {
 
 TEST(depth_concatenate_i32_gpu, optimize_data03) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
 
     topology topology;
@@ -876,7 +876,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) {
 
 TEST(depth_concatenate_i32_gpu, optimize_data04) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
 
     topology topology;
@@ -916,7 +916,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) {
 
 TEST(depth_concatenate_i32_gpu, optimize_data05) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}});
 
     topology topology;
@@ -990,7 +990,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) {
 
     set_values(input1, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
 
@@ -1042,7 +1042,7 @@ static network::ptr setup_depth_concatatenate_network(const std::vector<data_typ
     //TODO: ask Uzi if something tests cases where there's missing input_names (nodes not present in the topology, etc.)
     topology.add(concatenation("depth_concat_node", input_names, 1));
 
-    return network::build_network(engine, topology);
+    return network::build_network(engine, topology, get_test_default_config(engine));
 }
 
 TEST(NegativeDepthConcatenateTest, DISABLED_TestAll) {
diff --git a/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp
index d543d02c44847b..564fd8a8fbe20c 100644
--- a/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp
@@ -37,7 +37,7 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -79,7 +79,7 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -134,7 +134,7 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) {
             depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -181,7 +181,7 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -220,7 +220,7 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network_act(engine, topology_act);
+    network network_act(engine, topology_act, get_test_default_config(engine));
 
     network_act.set_input_data("Input0", input1);
 
@@ -288,7 +288,7 @@ TEST(depth_to_space_fp32_gpu, d1933_bs3) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -343,7 +343,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_blocks_first) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::blocks_first)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input1);
 
@@ -393,7 +393,7 @@ void test_depth_to_space_fp32_gpu_d1822_bs2_depth_first(bool is_caching_test) {
         depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::depth_first)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input0", input1);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
index 41654f46b62ff1..d3af0d731c66d6 100644
--- a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
@@ -147,7 +147,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -182,7 +182,7 @@ class detection_output_test : public ::testing::Test {
         topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
         topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -224,7 +224,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -272,7 +272,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -314,7 +314,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -367,7 +367,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -430,7 +430,7 @@ class detection_output_test : public ::testing::Test {
             prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
         ));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -480,7 +480,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -541,7 +541,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -589,7 +589,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -640,7 +640,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -686,7 +686,7 @@ class detection_output_test : public ::testing::Test {
 
         topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
@@ -749,7 +749,7 @@ class detection_output_test : public ::testing::Test {
             prior_is_normalized, this->img_size, this->img_size
         ));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input_location", input_location);
         network->set_input_data("input_confidence", input_confidence);
diff --git a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
index 8ccb5509d4042f..e1a7f04658af8c 100644
--- a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
@@ -118,7 +118,7 @@ struct dft_gpu_test : public testing::TestWithParam<dft_test_params> {
         // It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks
         topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         const auto outputs = network->execute();
@@ -2054,7 +2054,7 @@ TEST(dft_gpu_test, irdft_output_shape) {
         topology.add(dft("dft", input_info("reorder_input"), p.axes, p.signal_size, p.output_shape, type.direction, type.mode));
 
         {
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             const auto outputs = network.execute();
 
@@ -2069,7 +2069,7 @@ TEST(dft_gpu_test, irdft_output_shape) {
 
         topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
         const auto outputs = network.execute();
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
index 4bbe8ae9413c96..e3e9d06c3ce1a7 100644
--- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
@@ -102,7 +102,7 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
         topology.add(activation("out", out_id, activation_func::relu, { slope, 0.0f }));
         out_id = "out";
     }
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
     auto outputs = network.execute();
@@ -240,7 +240,7 @@ void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int in
     topology.add(reorder("reorder1", input_info("input1"), input1->get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 })));
     topology.add(eltwise("eltwise", { input_info("reorder1"), input_info("input2") }, mode, DEFAULT_BROADCAST_SPEC, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
     auto outputs = network.execute();
@@ -343,7 +343,7 @@ TEST(eltwise_gpu_f32, equal_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::eq));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -413,7 +413,7 @@ TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::ne));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -483,7 +483,7 @@ TEST(eltwise_gpu_f32, less_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::lt));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -553,7 +553,7 @@ TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::le));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -623,7 +623,7 @@ TEST(eltwise_gpu_f32, greater_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::gt));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -693,7 +693,7 @@ TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::ge));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -763,7 +763,7 @@ TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_and));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -849,7 +849,7 @@ TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) {
     topology.add(input_layout("input3", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2"), input_info("input3") }, eltwise_mode::logic_and));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -920,7 +920,7 @@ TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_or));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -1006,7 +1006,7 @@ TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) {
     topology.add(input_layout("input3", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2"), input_info("input3") }, eltwise_mode::logic_or));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -1077,7 +1077,7 @@ TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_xor));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
@@ -1128,7 +1128,7 @@ TEST(eltwise_gpu_f32, isfinite_in1_float_out1_int) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_finite));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     const auto outputs = network.execute();
@@ -1183,7 +1183,7 @@ TEST(eltwise_gpu_f32, isinf_in1_float_out1_int) {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_inf, coefficients, data_types::i8));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         network.set_input_data("input", input);
 
         const auto outputs = network.execute();
@@ -1227,7 +1227,7 @@ TEST(eltwise_gpu_f32, isnan_in1_float_out1_int) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_nan));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     const auto outputs = network.execute();
@@ -1270,7 +1270,7 @@ TEST(eltwise_gpu_f32, dynamic_kernel_no_broadcast) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input1", input1);
@@ -1326,7 +1326,7 @@ TEST(eltwise_gpu_f32, dynamic_kernel_broadcast) {
 
     set_values(input2, { 0.5f, -0.5f });
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input1", input1);
@@ -1403,7 +1403,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1459,7 +1459,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) {
          -2.f,  6.5f,
         -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1528,7 +1528,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) {
         -0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1591,7 +1591,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) {
         4.f, -0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1656,7 +1656,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) {
         4.f, -0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1715,7 +1715,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) {
             1.f,
             2.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1772,7 +1772,7 @@ TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) {
 
     set_values(input2, { 2.0f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1849,7 +1849,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) {
         -4.f, 0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1925,7 +1925,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) {
         -4.f, 0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2003,7 +2003,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
         15.f,  17.f,   8.f,  10.f,
          6.f,   8.f, -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2074,7 +2074,7 @@ TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) {
        15.f,  17.f,   8.f,   8.5f,
         6.f,   8.f, -0.5f,  10.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2145,7 +2145,7 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) {
                 6.f,   8.f, 0.f,  10.f };
             set_values(input2, input_2_vec);
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             network.set_input_data("input2", input2);
             auto outputs = network.execute();
@@ -2223,7 +2223,7 @@ TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) {
                 6.f,   8.f, 0.f,  10.f };
             set_values(input2, input_2_vec);
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             network.set_input_data("input2", input2);
             auto outputs = network.execute();
@@ -2304,7 +2304,7 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
         2.5f,   7.f,  17.f,    8.f,
         2.5f,   4.f,  10.f,   -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2378,7 +2378,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) {
         15.f,  17.f,   8.f,  10.f,
         6.f,   8.f, -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2450,7 +2450,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
             15.f,  17.f,    8.f,  10.f,
             -2.f,  6.5f,  -0.5f, -2.5f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2566,7 +2566,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) {
             6.f,  0.f,  2.f, 0.f,
             5.f,  1.f,  1.f, 1.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2656,7 +2656,7 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
         15.f,  3.f,  9.f, 1.f,
         -1.f,  6.f, 0.5f, 8.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2746,7 +2746,7 @@ TEST(eltwise_gpu_f32, stride_test_2x2) {
         15, 31, 47, 63,
         16, 32, 48, 64 });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2814,7 +2814,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) {
         0.5f,   2.5f,  0.5f,  2.5f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2882,7 +2882,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic)
     golden_topology.add(input_layout("input2", input2->get_layout()));
     golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum));
 
-    network golden_network(engine, golden_topology);
+    network golden_network(engine, golden_topology, get_test_default_config(engine));
     golden_network.set_input_data("input1", input1);
     golden_network.set_input_data("input2", input2);
 
@@ -2899,7 +2899,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic)
     FSV32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum));
     FSV32_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor)));
 
-    network FSV32_network(engine, FSV32_topology);
+    network FSV32_network(engine, FSV32_topology, get_test_default_config(engine));
     FSV32_network.set_input_data("input1", input1);
     FSV32_network.set_input_data("input2", input2);
 
@@ -2949,7 +2949,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast)
     ref_topology.add(input_layout("input2", input2->get_layout()));
     ref_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::prod));
 
-    network ref_network(engine, ref_topology);
+    network ref_network(engine, ref_topology, get_test_default_config(engine));
     ref_network.set_input_data("input1", input1);
     ref_network.set_input_data("input2", input2);
 
@@ -2965,7 +2965,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast)
     fsv32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::prod));
     fsv32_topology.add(reorder("reorder_bfyx", input_info("eltwise"), layout(data_types::f16, format::bfyx, input1_tensor)));
 
-    network fsv32_network(engine, fsv32_topology);
+    network fsv32_network(engine, fsv32_topology, get_test_default_config(engine));
     fsv32_network.set_input_data("input1", input1);
     fsv32_network.set_input_data("input2", input2);
 
@@ -3013,7 +3013,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast_bfyx)
     ref_topology.add(input_layout("input2", input2->get_layout()));
     ref_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::prod));
 
-    network ref_network(engine, ref_topology);
+    network ref_network(engine, ref_topology, get_test_default_config(engine));
     ref_network.set_input_data("input1", input1);
     ref_network.set_input_data("input2", input2);
 
@@ -3028,7 +3028,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast_bfyx)
     fsv32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("input2"), eltwise_mode::prod));
     fsv32_topology.add(reorder("reorder_bfyx", input_info("eltwise"), layout(data_types::f16, format::bfyx, input1_tensor)));
 
-    network fsv32_network(engine, fsv32_topology);
+    network fsv32_network(engine, fsv32_topology, get_test_default_config(engine));
     fsv32_network.set_input_data("input1", input1);
     fsv32_network.set_input_data("input2", input2);
 
@@ -3067,7 +3067,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2x2) {
 
     set_values(input2, { 0.5f, 2.5f, 0.5f, 2.5f, 1.f, 2.f, 3.f, 4.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -3124,7 +3124,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic)
     golden_topology.add(input_layout("input2", input2->get_layout()));
     golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum));
 
-    network golden_network(engine, golden_topology);
+    network golden_network(engine, golden_topology, get_test_default_config(engine));
     golden_network.set_input_data("input1", input1);
     golden_network.set_input_data("input2", input2);
 
@@ -3141,7 +3141,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic)
     FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum));
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor)));
 
-    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology);
+    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine));
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1);
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3158,7 +3158,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic)
     BYXF_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum));
     BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor)));
 
-    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology);
+    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine));
     BYXF_OUTPUT_network.set_input_data("input1", input1);
     BYXF_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3204,7 +3204,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) {
     golden_topology.add(input_layout("input2", input2->get_layout()));
     golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum, DEFAULT_BROADCAST_SPEC, padding{ {0,0,5,10} , 0 }));
 
-    network golden_network(engine, golden_topology);
+    network golden_network(engine, golden_topology, get_test_default_config(engine));
     golden_network.set_input_data("input1", input1);
     golden_network.set_input_data("input2", input2);
 
@@ -3222,7 +3222,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) {
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor,
                                               padding{ {0,0,5,10} , 0 })));
 
-    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology);
+    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine));
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1);
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3240,7 +3240,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) {
     BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor,
                                      padding{ {0,0,5,10} , 0 })));
 
-    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology);
+    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine));
     BYXF_OUTPUT_network.set_input_data("input1", input1);
     BYXF_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3289,7 +3289,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding)
     golden_topology.add(reorder("reorder2", input_info("input2"), layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,7},0.0f })));
     golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum));
 
-    network golden_network(engine, golden_topology);
+    network golden_network(engine, golden_topology, get_test_default_config(engine));
     golden_network.set_input_data("input1", input1);
     golden_network.set_input_data("input2", input2);
 
@@ -3306,7 +3306,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding)
     FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum));
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor)));
 
-    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology);
+    network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine));
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1);
     FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3323,7 +3323,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding)
     BYXF_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum));
     BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor)));
 
-    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology);
+    network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine));
     BYXF_OUTPUT_network.set_input_data("input1", input1);
     BYXF_OUTPUT_network.set_input_data("input2", input2);
 
@@ -3455,7 +3455,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
                          eltw, actv);
 
             // Network processing
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input1", input1);
             network.set_input_data("input2", input2);
             network.set_input_data("input3", input3);
@@ -3511,7 +3511,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
                                      { in_B, in_F, in_X, in_Y })));
 
             // Network processing
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input1", input1);
             network.set_input_data("input2", input2);
             network.set_input_data("input3", input3);
@@ -3671,7 +3671,7 @@ struct eltwise_same_input_test : testing::TestWithParam<eltwise_same_input_test_
         auto prim = eltwise("eltwise", { input_info("input1"), input_info("input2") }, eltwise_mode::sum);
         topo.add(prim);
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise"}));
 
         cldnn::network net(engine, topo, config);
@@ -3835,7 +3835,7 @@ TEST_P(eltwise_test, fsv16) {
     topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32));
     primitive_id out_id = "out";
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -3941,7 +3941,7 @@ TEST_P(eltwise_test_6d, bfwzyx) {
     topology.add(reorder("out", input_info("eltwise"), format::bfwzyx, data_types::f32));
     primitive_id out_id = "out";
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -4026,7 +4026,7 @@ TEST_P(eltwise_test_mixed_precision, fsv16) {
     topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32));
     primitive_id out_id = "out";
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -4131,7 +4131,7 @@ TEST_P(eltwise_test_mixed_layout, mixed_layout) {
     topology.add(reorder("out", input_info("eltwise"), format::bfyx, data_types::f32));
     primitive_id out_id = "out";
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -4278,7 +4278,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
         auto prim = eltwise("eltwise", { input_info("input1"), input_info("input2") }, params.mode);
         topo.add(prim);
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise"}));
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"eltwise", {params.in_format, "generic_eltwise_ref"}} }));
 
@@ -4295,7 +4295,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
         auto prim_opt = eltwise("eltwise_opt", { input_info("input1"), input_info("input2") }, params.mode);
         topo_opt.add(prim_opt);
 
-        ExecutionConfig config_opt;
+        ExecutionConfig config_opt = get_test_default_config(engine);
         config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise_opt"}));
 
         cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
diff --git a/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp
index a90f9623973582..9220efbdd1bc3b 100644
--- a/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp
@@ -53,7 +53,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -106,7 +106,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -187,7 +187,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -310,7 +310,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -404,7 +404,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) {
     topology.add(
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
     );
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -469,7 +469,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -534,7 +534,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -592,7 +592,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -707,7 +707,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -803,7 +803,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -868,7 +868,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -933,7 +933,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -991,7 +991,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -1106,7 +1106,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -1199,7 +1199,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_basic) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -1309,7 +1309,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_dim3) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", emb_table);
     network.set_input_data("Input1", indices);
@@ -1394,7 +1394,7 @@ void test_embedding_bag_fp32_gpu_extended5_6(bool is_caching_test) {
             embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input0", emb_table);
     network->set_input_data("Input1", indices);
diff --git a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
index 7f7b0370c1daa0..ee39a7b6cd4bc4 100644
--- a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
@@ -44,7 +44,7 @@ TEST_P(test_empty_tensor, concat_two_inputs) {
     topology.add(gather_nonzero("gather_nonzero", input_info("nonzero_input"), input_info("count_nonzero")));
     topology.add(concatenation("concat", { input_info("gather_nonzero"), input_info("concat_data") }, p.concat_axis));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
index 660e3bdf34202f..46c13f3ac9a8f9 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
@@ -143,7 +143,7 @@ struct experimental_detectron_detection_output_test
         const primitive_id eddo_id = "experimental_detectron_detection_output";
         topology.add(reorder(eddo_id, input_info(b_eddo_primitive) /*b_eddo_id*/, format::bfyx, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data(input_boxes_id, input_boxes);
         network->set_input_data(input_deltas_id, input_deltas);
@@ -159,7 +159,7 @@ struct experimental_detectron_detection_output_test
         cldnn::topology reorder_score_topology;
         reorder_score_topology.add(input_layout(b_output_scores_id, output_scores_layout));
         reorder_score_topology.add(reorder(output_scores_id, input_info(b_output_scores_id), format::bfyx, data_type));
-        cldnn::network reorder_score_net{engine, reorder_score_topology};
+        cldnn::network reorder_score_net{engine, reorder_score_topology, get_test_default_config(engine)};
         reorder_score_net.set_input_data(b_output_scores_id, b_output_scores);
         const auto score_result = reorder_score_net.execute();
         const auto output_scores = score_result.at(output_scores_id).get_memory();
@@ -170,7 +170,7 @@ struct experimental_detectron_detection_output_test
         cldnn::topology reorder_classes_topology;
         reorder_classes_topology.add(input_layout(b_output_classes_id, output_classes_layout));
         reorder_classes_topology.add(reorder(output_classes_id, input_info(b_output_classes_id), format::bfyx, data_types::i32));
-        cldnn::network reorder_classes_net{engine, reorder_classes_topology};
+        cldnn::network reorder_classes_net{engine, reorder_classes_topology, get_test_default_config(engine)};
         reorder_classes_net.set_input_data(b_output_classes_id, b_output_classes);
         const auto classes_result = reorder_classes_net.execute();
         const auto output_classes = classes_result.at(output_classes_id).get_memory();
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp
index 21b1ce49594976..7a9470b3ff36e0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp
@@ -241,7 +241,7 @@ struct experimental_detectron_generate_proposals_single_image_test
         const primitive_id reorder_result_id = edgpsi_id + "Reordered";
         topology.add(reorder(reorder_result_id, input_info(edgpsi_primitive), format::bfyx, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data(input_im_info_id, input_im_info);
         network->set_input_data(input_anchors_id, input_anchors);
@@ -258,7 +258,7 @@ struct experimental_detectron_generate_proposals_single_image_test
         cldnn::topology reorder_topology;
         reorder_topology.add(input_layout("scores", rois_scores_layout));
         reorder_topology.add(reorder("plane_scores", input_info("scores"), format::bfyx, data_type));
-        cldnn::network reorder_net{engine, reorder_topology};
+        cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)};
         reorder_net.set_input_data("scores", output_roi_scores);
         const auto second_output_result = reorder_net.execute();
         const auto plane_data_mem = second_output_result.at("plane_scores").get_memory();
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp
index 37a37e7bc5d796..3a52028e650139 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp
@@ -62,7 +62,7 @@ struct experimental_detectron_prior_grid_generator_test
                                                                  params.imageShape.first,
                                                                  params.imageShape.second));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), params.is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), params.is_caching_test);
 
         network->set_input_data(priors_id, prior_input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
index 19f2f56d8490bc..85d45ad22ae615 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
@@ -53,7 +53,7 @@ void test_experimental_detectron_roi_feature_extractor_gpu_fp32_one_level(bool i
     topology.add(activation(activation_abs_id, feature_extractor_id,  activation_func::abs));
     topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data(input_rois_id, roi_input);
     network->set_input_data(input_level_1_id, level_1);
@@ -150,7 +150,7 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) {
     topology.add(activation(activation_abs_id, feature_extractor_id, activation_func::abs));
     topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data(input_rois_id, roi_input);
     network.set_input_data(input_level_1_id, level_1);
@@ -246,7 +246,7 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, multiple_feature_ext
     topology.add(activation(activation_abs_second_instance_id, input_info(feature_extractor_second_instance_id),  activation_func::abs));
     topology.add(mutable_data(second_output_r_second_instance_id, { input_info(feature_extractor_second_instance_id) }, second_output_second_instance));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data(input_rois_first_instance_id, roi_input_first_instance);
     network.set_input_data(input_rois_second_instance_id, roi_input_second_instance);
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
index ed809b82b013de..2dd3ce2b417bcb 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
@@ -80,7 +80,7 @@ TYPED_TEST(experimental_detectron_topk_rois_gpu_test, check_set_indices_layer) {
                                                   rois_num));
     topology.add(reorder("plane_output", experimental_detectron_topk_rois_id, format::bfyx, this->data_type));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data(input_rois_id, roi_input);
     network.set_input_data(input_indices_id, roi_indices);
@@ -118,7 +118,7 @@ TYPED_TEST(experimental_detectron_topk_rois_gpu_test, check_set_indices_layer_mo
                                                   rois_num));
     topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, this->data_type));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data(input_rois_id, roi_input);
     network.set_input_data(input_indices_id, roi_indices);
@@ -159,7 +159,7 @@ TEST(experimental_detectron_topk_rois_gpu_test, export_import) {
                                                   rois_num));
     topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, test_data_type));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), true);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), true);
 
     network->set_input_data(input_rois_id, roi_input);
     network->set_input_data(input_indices_id, roi_indices);
diff --git a/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp
index acee53d74a4e22..9a6e022efc0697 100644
--- a/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp
@@ -41,7 +41,7 @@ TEST(extract_image_patches_gpu, basic) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -115,7 +115,7 @@ TEST(extract_image_patches_gpu, basic2) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -179,7 +179,7 @@ TEST(extract_image_patches_gpu, basic3) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -274,7 +274,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -369,7 +369,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -443,7 +443,7 @@ TEST(extract_image_patches_gpu, basic4) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("Input0", input);
     auto outputs = network.execute();
 
@@ -518,7 +518,7 @@ void test_extract_image_patches_gpu_basic5(bool is_caching_test) {
     topology.add(input_layout("Input0", input->get_layout()));
     topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input0", input);
     auto outputs = network->execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/eye.cpp b/src/plugins/intel_gpu/tests/test_cases/eye.cpp
index ad316f4fb28d31..22ee147e12c747 100644
--- a/src/plugins/intel_gpu/tests/test_cases/eye.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eye.cpp
@@ -85,7 +85,7 @@ class EyeTest : public ::testing::TestWithParam<eye_test_param<OutputType, Input
             tp.add(reorder("output", input_info("eye"), oupput_fmt, type_to_data_type<OutputType>::value));
         }
 
-        cldnn::network::ptr network = get_network(engine_, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine_, tp, get_test_default_config(engine_), get_test_stream_ptr(), is_caching_test);
 
         auto outputs = network->execute();
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
index 588b9a75fc8eff..e3722341a997c3 100644
--- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
@@ -88,7 +88,7 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te
         topology.add(activation("out", input_info(out_id), activation_func::relu, { slope, 0.0f }));
         out_id = "out";
     }
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -209,7 +209,7 @@ TEST(fully_connected_gpu, no_biases) {
     topology.add(w_data);
     topology.add(fc);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -269,7 +269,7 @@ TEST(fully_connected_gpu, no_biases_int8) {
     topology.add(fc);
     topology.add(ri);
     topology.add(rf);
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -328,7 +328,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1) {
         fully_connected("fc_prim", input_info("input"), "weights", "bias")
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -388,7 +388,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2) {
         fully_connected("fc_prim", input_info("input"), "weights", "bias")
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -450,7 +450,7 @@ TEST(fully_connected_gpu, x_f32) {
         fully_connected("fc_prim", input_info("input"), "weights", "bias")
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -511,7 +511,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) {
         activation("out", input_info("fc_prim"), activation_func::relu)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -573,7 +573,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) {
         activation("out", input_info("fc_prim"), activation_func::relu)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -636,7 +636,7 @@ TEST(fully_connected_gpu, x_f32_relu) {
         activation("out", input_info("fc_prim"), activation_func::relu)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -696,7 +696,7 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
         activation("out", input_info("fc_prim"), activation_func::relu_negative_slope, { 0.1f })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_prim);
 
     auto outputs = network.execute();
@@ -799,7 +799,7 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4)
     topology.add(reorder_gold, reorder_imad);
 
     // Network build
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -868,7 +868,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) {
     );
 
     // Set data optimization to allow weights reordering to optimal format
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
@@ -944,7 +944,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34)
     );
 
     // Set data optimization to allow weights reordering to optimal format
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(engine, topology, config);
@@ -1006,7 +1006,7 @@ struct fully_connected_random_test : ::testing::TestWithParam<fully_connected_te
         auto bias = net.add_data<BiasT, 2>("bias", format::bfyx, std::move(bias_data));
         auto fc = net.add_fully_connected<OutputT>("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel });
 
-        net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test);
+        net.run(get_test_default_config(eng, ov::intel_gpu::optimize_data(true)), is_caching_test);
     }
 };
 
@@ -1129,7 +1129,9 @@ struct fully_connected_random_test_3d : ::testing::TestWithParam<fully_connected
         auto bias = net.add_data<BiasT, 2>("bias", format::bfyx, std::move(bias_data));
         auto fc = net.add_fully_connected_3d<OutputT>("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel }, 3);
 
-        net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test);
+        ExecutionConfig config = get_test_default_config(eng);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+        net.run(config, is_caching_test);
     }
 };
 
@@ -1393,7 +1395,7 @@ class fully_connected_quantized_test : public ::testing::Test {
 
         topo.add(reorder("output", input_info("quantization_prim"), format::bfyx, output_data_type()));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         network net(engine, topo, config);
@@ -1686,9 +1688,8 @@ TEST(fully_connected_onednn_gpu, no_biases_int8) {
 
     ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
 
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} })
-    };
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }));
     network network(engine, topology, cfg);
     network.set_input_data("input", input_prim);
 
@@ -1738,7 +1739,8 @@ TEST(fully_connected_3d_onednn_gpu, no_biases_int8) {
     topology.add(rf);
 
     ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_impl } })};
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }));
 
     network network(engine, topology, cfg);
     network.set_input_data("input", input_prim);
@@ -1778,7 +1780,7 @@ TEST(fully_connected_gpu, dynamic) {
         fully_connected("fc", input_info("input"), "weights")
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -1828,7 +1830,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
         fully_connected("fc", input_info("input"), "weights")
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -1908,7 +1910,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
         fully_connected("fc", input_info("input"), "weights")
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -1998,7 +2000,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
         fully_connected("fc", input_info("input"), "weights")
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -2133,7 +2135,7 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam<fully_connected_dy
         else
             topology.add(fully_connected("fc", input_info("input"), "weights", "bias"));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp
index 6f6d1cc03be0c9..3cee30b34c30bc 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp
@@ -32,7 +32,7 @@ inline void DoTest(engine& engine,
         gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), input1->get_layout().format, output_tensor, axis)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputData", input0);
     network->set_input_data("InputIndices", input1);
@@ -1294,7 +1294,7 @@ TEST(gather_elements_gpu, dynamic) {
     topology.add(input_layout("InputIndices", in1_dyn_layout));
     topology.add(gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), axis));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp
index 9f421837ca46ae..421802e60aa9de 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp
@@ -90,7 +90,7 @@ class gather8_test : public ::testing::TestWithParam<gather8_test_param> {
                                 batch_dim,
                                 true));
         reorder_topo.add(reorder("reorder2", input_info("gather"), format::type::bfwzyx, T_dat_dt));
-        network reorder_network(engine, reorder_topo);
+        network reorder_network(engine, reorder_topo, get_test_default_config(engine));
         reorder_network.set_input_data("input0", input0);
         reorder_network.set_input_data("input1", input1);
         auto reorder_output = reorder_network.execute().at("reorder2").get_memory();
@@ -101,7 +101,7 @@ class gather8_test : public ::testing::TestWithParam<gather8_test_param> {
         planar_topo.add(input_layout("input1", input1->get_layout()));
         planar_topo.add(
             gather("gather", input_info("input0"), input_info("input1"), axis, ov::Shape(shape_out.begin(), shape_out.end()), batch_dim, true));
-        network planar_network(engine, planar_topo);
+        network planar_network(engine, planar_topo, get_test_default_config(engine));
         planar_network.set_input_data("input0", input0);
         planar_network.set_input_data("input1", input1);
         auto planar_output = planar_network.execute().at("gather").get_memory();
@@ -358,7 +358,7 @@ TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -465,7 +465,7 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2, 2, 2}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -576,7 +576,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -680,7 +680,7 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 3, 4, 1}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -755,7 +755,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -818,7 +818,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -880,7 +880,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 3, 2, 1}, batch_dim)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -948,7 +948,7 @@ TEST(gather_gpu_fp16, d14_axisB) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1010,7 +1010,7 @@ TEST(gather_gpu_fp16, d222_axisB) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1071,7 +1071,7 @@ TEST(gather_gpu_fp16, d22_axisY) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1132,7 +1132,7 @@ TEST(gather_gpu_fp16, d22_axisF) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1190,7 +1190,7 @@ TEST(gather_gpu_fp32, d14_axisB) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1251,7 +1251,7 @@ TEST(gather_gpu_fp32, d222_axisB) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1312,7 +1312,7 @@ TEST(gather_gpu_fp32, d22_axisY) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1373,7 +1373,7 @@ TEST(gather_gpu_fp32, d22_axisF) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1434,7 +1434,7 @@ TEST(gather_gpu_int32, d22_axisF) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1492,7 +1492,7 @@ TEST(gather_gpu_int32, d14_axisB) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1553,7 +1553,7 @@ TEST(gather_gpu_int32, d222_axisB) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1614,7 +1614,7 @@ TEST(gather_gpu_int32, d22_axisY) {
             gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1678,7 +1678,7 @@ TEST(gather_gpu_fp32, d41_axisB) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 1, 2, 3})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1741,7 +1741,7 @@ TEST(gather_gpu_fp32, d41_axisF) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 4, 1, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1800,7 +1800,7 @@ TEST(gather_gpu_fp32, d2_axisX) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 1, 2})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1850,7 +1850,7 @@ TEST(gather_gpu_fp32, 322_axisF) {
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1})
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputDictionary", input1);
     network.set_input_data("InputText", input2);
@@ -1889,7 +1889,7 @@ TEST(gather_gpu_fp32, dynamic_322_axisF) {
     topology.add(input_layout("input2", in2_layout));
     topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{}));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input1", input1);
@@ -1938,7 +1938,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) {
     topology.add(
         gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputDictionary", input1);
     network->set_input_data("InputText", input2);
diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp
index 564ff44725b8b6..0dc3f499d2aca5 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp
@@ -39,7 +39,7 @@ inline void DoTestBase(engine& engine,
     topology.add(input_layout("InputIndices", input1->get_layout()));
     topology.add(gather_nd_inst);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputData", input0);
     network->set_input_data("InputIndices", input1);
diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp
index 7cbe4e9968dce8..8c7e9fa22e6280 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp
@@ -213,7 +213,7 @@ struct gather_tree_test
         const primitive_id reorder_result_id = result_id + "_reordered";
         topology.add(reorder(reorder_result_id, input_info(result_id), plain_layout, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data(step_id, step_input);
         network->set_input_data(parent_id, parent_input);
diff --git a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
index ccfe63bda67aea..d2d31bd2b5dfd1 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
@@ -129,7 +129,7 @@ class GemmGPUTest : public ::testing::TestWithParam<GemmParams> {
             std::cout << "cached" << std::endl;
             membuf mem_buf;
             {
-                cldnn::network _network(engine, tp);
+                cldnn::network _network(engine, tp, get_test_default_config(engine));
                 process_program(_network.get_program());
                 std::ostream out_mem(&mem_buf);
                 BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
@@ -141,7 +141,7 @@ class GemmGPUTest : public ::testing::TestWithParam<GemmParams> {
                 network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
             }
         } else {
-            network = std::make_shared<cldnn::network>(engine, tp);
+            network = std::make_shared<cldnn::network>(engine, tp, get_test_default_config(engine));
             process_program(network->get_program());
         }
 
@@ -292,7 +292,7 @@ void test_basic_bfyx_t2_inplace_crop_with_pad(bool is_caching_test) {
         gemm("output", { input_info("crop.1"), input_info("input2") }, data_types::f32, false, true)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
@@ -343,7 +343,7 @@ TEST(gemm_gpu, dynamic) {
                  gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, true, 1.0f, 0.0f, 4, 2)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -412,7 +412,7 @@ TEST(gemm_gpu, dynamic_multi_inference_same_shape) {
                  gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, false, 1.0f, 0.0f, 4, 2)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -501,7 +501,7 @@ TEST(gemm_gpu, dynamic_multi_inference_different_shape) {
                  gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, false, 1.0f, 0.0f, 4, 2)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -1311,11 +1311,11 @@ class GemmBaseTest : public ::testing::TestWithParam<gemm_params> {
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
         ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::onednn };
-        ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order));
 #else
         ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, p.kernel_name };
-        ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
 #endif
+
+        ExecutionConfig cfg = get_test_default_config(engine);
         cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} }));
 
         cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
diff --git a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
index 1ad55e645d5a50..5435c6b96ddad4 100644
--- a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "intel_gpu/runtime/execution_config.hpp"
 #include "test_utils.h"
 
 #include <intel_gpu/primitives/generate_proposals.hpp>
@@ -289,7 +290,7 @@ struct generate_proposals_test
         const auto rois_num_type = type_to_data_type<ROIS_NUM_T>::value;
 
         auto& engine = get_test_engine();
-
+        std::shared_ptr<cldnn::stream> stream = get_test_stream_ptr();;
         const primitive_id input_im_info_id = "InputImInfo";
         const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}});
         set_values(input_im_info, getValues<T>(im_info));
@@ -355,7 +356,7 @@ struct generate_proposals_test
         const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
         topology.add(reorder(reorder_result_id, input_info(generate_proposals_id), format::bfyx, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), stream, is_caching_test);
 
         network->set_input_data(input_im_info_id, input_im_info);
         network->set_input_data(input_anchors_id, input_anchors);
@@ -366,7 +367,7 @@ struct generate_proposals_test
 
         const auto rois = outputs.at(reorder_result_id).get_memory();
 
-        const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
+        const cldnn::mem_lock<T> rois_ptr(rois, *stream);
         ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4);
 
         const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) {
@@ -376,7 +377,7 @@ struct generate_proposals_test
             cldnn::topology reorder_topology;
             reorder_topology.add(input_layout("data", from_layout));
             reorder_topology.add(reorder("plane_data", input_info("data"), format::bfyx, data_type));
-            cldnn::network reorder_net{engine, reorder_topology};
+            cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)};
             reorder_net.set_input_data("data", mem);
             const auto second_output_result = reorder_net.execute();
             const auto plane_data_mem = second_output_result.at("plane_data").get_memory();
@@ -384,11 +385,11 @@ struct generate_proposals_test
         };
 
         const cldnn::mem_lock<T> roi_scores_ptr(
-                get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream());
+                get_plane_data(output_roi_scores, data_type, rois_scores_layout), *stream);
         ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count);
 
         const cldnn::mem_lock<ROIS_NUM_T> rois_num_ptr(
-                get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream());
+                get_plane_data(output_rois_num, rois_num_type, rois_num_layout), *stream);
         ASSERT_EQ(rois_num_ptr.size(), num_batches);
 
         const auto& expected_rois = param.expected_rois;
diff --git a/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp
index 1473961df8f593..122299d498dfd0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp
@@ -75,7 +75,7 @@ struct grid_sample_gpu_test : public testing::TestWithParam<grid_sample_test_par
         topology.add(grid_sample("grid_sample", { input_info("reordered_data"), input_info("reordered_grid") }, p.attributes));
         topology.add(reorder("plane_grid_sample", input_info("grid_sample"), plane_format, data_data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("data", data);
         network->set_input_data("grid", grid);
         const auto outputs = network->execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
index 154ca0f08eb019..04f5d34b531638 100644
--- a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
@@ -35,7 +35,7 @@ TEST(check_hash_value, eltwise_basic) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(eltwise(key_prim_id, { input_info("input"), input_info("input2") }, eltwise_mode::sum));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -65,7 +65,7 @@ TEST(check_hash_value, fc_basic) {
         fully_connected(key_prim_id, input_info("input"), "weights", "bias")
     );
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -96,7 +96,7 @@ TEST(check_hash_value, gather_basic) {
         gather(key_prim_id, input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes)
     );
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -122,7 +122,7 @@ TEST(check_hash_value, gemm_basic) {
     topology.add(crop("crop.1", input_info("input"), { 1, 1, 4, 3 }, { 0, 1, 0, 0 }));
     topology.add(gemm(key_prim_id, { input_info("crop.1"), input_info("input2") }, data_types::f32, false, true));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -145,7 +145,7 @@ TEST(check_hash_value, permute_basic) {
         input_layout("input", input->get_layout()),
         permute(key_prim_id, input_info("input"), { 0, 1, 2, 3 }));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -174,7 +174,7 @@ TEST(check_hash_value, reorder_basic) {
         input_layout("input", input->get_layout()),
         reorder(key_prim_id, input_info("input"), output_layout));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -200,7 +200,7 @@ TEST(check_hash_value, reshape_basic) {
     topology.add(reorder("reorder", input_info("input"), padded_input_layout));
     topology.add(reshape(key_prim_id, input_info("reorder"), tensor( 1, 1, 4, 1 ), cldnn::reshape::reshape_mode::base, padding({0, 0, 2, 2})));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -227,7 +227,7 @@ TEST(check_hash_value, conv_basic) {
         data("biases", biases),
         convolution(key_prim_id, input_info("input"), { "weights" }, { "biases" }, {1, 1, 1}, {0, 0, 0}, {1, 1, 1}));
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
@@ -260,7 +260,7 @@ TEST(check_hash_value, quantize_basic) {
         quantize(key_prim_id, input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 256, data_types::u8)
     );
 
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network net(prog, 0);
     const auto  prim_inst = net.get_primitive(key_prim_id);
     const auto  primitve  = prim_inst->desc();
diff --git a/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp
index a86c94b7119b61..94fb17104275e0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp
@@ -73,7 +73,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test)
              input_primitive_maps, output_primitive_maps, back_edges, 8)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input_mem);
     network->set_input_data("trip_count", trip_count_mem);
@@ -174,7 +174,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test)
              input_primitive_maps, output_primitive_maps, back_edges, trip_count)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input_mem);
     network->set_input_data("trip_count", trip_count_mem);
     network->set_input_data("initial_condition", initial_condition_mem);
@@ -314,7 +314,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
     /////////////////////////////////
     // network execution
     /////////////////////////////////
-    cldnn::network::ptr network = get_network(engine, main_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, main_topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input_mem);
     network->set_input_data("trip_count", trip_count_mem);
     network->set_input_data("initial_condition", initial_condition_mem);
diff --git a/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp
index e35384a068fb03..fcbe4eaa7d3e76 100644
--- a/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp
@@ -37,7 +37,7 @@ void test_fp32_basic(bool is_caching_test) {
     float beta = 1.f;
     topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -89,7 +89,7 @@ void test_fp32_basic2(bool is_caching_test) {
     float beta = 1.f;
     topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -141,7 +141,7 @@ void test_fp16_basic1(bool is_caching_test) {
     float beta = 1.f;
     topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -193,7 +193,7 @@ void test_fp32_basic3(bool is_caching_test) {
     float beta = 0.75f;
     topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp
index b9535c6a1a251e..5b7495ee5f1c51 100644
--- a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp
@@ -245,7 +245,7 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test
             "weights",
             bias_id));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         network network(engine, topology, config);
 
@@ -407,7 +407,7 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test
             initial_hidden_id,
             initial_cell_id));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         network network(engine, topology, config);
         network.set_input_data("input", input_mem);
@@ -888,7 +888,7 @@ TEST(lstm_dynamic_negative, wrong_weights_size) {
         "dyn_len",
         "weights",
         "recurrent"));
-    ASSERT_ANY_THROW(network network(engine, topology));
+    ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(lstm_dynamic_negative, wrong_recurrent_size_0) {
@@ -913,7 +913,7 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_0) {
         "dyn_len",
         "weights",
         "recurrent"));
-    ASSERT_ANY_THROW(network network(engine, topology));
+    ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(lstm_dynamic_negative, wrong_recurrent_size_1) {
@@ -938,7 +938,7 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_1) {
         "dyn_len",
         "weights",
         "recurrent"));
-    ASSERT_ANY_THROW(network network(engine, topology));
+    ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) {
@@ -963,7 +963,7 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) {
         "dyn_len",
         "weights",
         "recurrent"));
-    ASSERT_ANY_THROW(network network(engine, topology));
+    ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) {
@@ -988,5 +988,5 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) {
         "dyn_len",
         "weights",
         "recurrent"));
-    ASSERT_ANY_THROW(network network(engine, topology));
+    ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine)));
 }
diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
index 50c9555a59413d..2d62d73290d746 100644
--- a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
@@ -244,7 +244,7 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
 
     topology.add(lstm_gemm("lstm_gemm", input_info("input"), "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : ""));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     if (hasHidden) {
         network->set_input_data("hidden", hidden);
@@ -307,7 +307,7 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_
     }
     topology.add(lstm_elt("lstm_elt", input_info("tempGEMM"), hasCell ? "cell" : "", clip_threshold, input_forget));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("tempGEMM", tempGEMM);
     if (hasCell) {
         network->set_input_data("cell", cell);
@@ -430,7 +430,7 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
     generate_lstm_topology(topology, input, hidden, cell, weights, recurrent, biases, sequence_len,
         hasBias, hasInitialHidden, hasInitialCell);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     if (hasInitialHidden) network->set_input_data("hidden", hidden);
     if (hasInitialCell) network->set_input_data("cell", cell);
@@ -596,7 +596,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
         prev_lstm_id = lstm_id;
     }
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     for (int i = 0; i < layers; ++i) {
         std::string sid = get_string_id(i);
@@ -722,7 +722,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
         topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
     }
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     network->set_input_data("hidden", hidden);
     network->set_input_data("cell", cell);
@@ -886,7 +886,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions, bool is_c
         topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
     }
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     std::map<primitive_id, network_output> outputs;
 
@@ -1053,7 +1053,7 @@ void lstm_gpu_users_test(bool is_caching_test = false) {
     std::vector<input_info> output_ids_offsets { input_info("lstm"), input_info("hidden") };
     topology.add(concatenation("concatenation", output_ids_offsets, 1));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     std::map<primitive_id, network_output> outputs;
 
@@ -1212,7 +1212,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
 		prev_node_id = output_crop_id;
 	}
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 	network->set_input_data("input", input);
 	for (int i = 0; i < layers; ++i) {
 		std::string sid = get_string_id(i);
@@ -1555,7 +1555,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
     }
 
     // Creating network out of the above designed topology
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     for (size_t layer = 0; layer < layers; layer++) {
         std::string sid = get_string_id(layer);
diff --git a/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
index 7fe4a65e0de3d0..0adeae76c21f6d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
@@ -107,7 +107,7 @@ struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_param
                                 attrs));
         topology.add(reorder("matrix_nms", input_info("reordered_matrix_nms"), plain_format, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("boxes", boxes);
         network->set_input_data("scores", scores);
diff --git a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp
index 511caddca2e83a..0d8c2df6c67a0a 100644
--- a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp
@@ -77,7 +77,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) {
 
     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
     set_values(input, input_vec);
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(*engine, topology, config);
@@ -109,7 +109,7 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
     topology.add(activation("relu4", input_info("relu3"), activation_func::relu));
     topology.add(activation("relu5", input_info("relu4"), activation_func::relu));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(*engine, topology, config);
@@ -144,7 +144,7 @@ TEST(memory_pool, multi_outputs_network) {
     topology.add(activation("relu6", input_info("relu5"), activation_func::relu));
     topology.add(activation("relu7", input_info("relu6"), activation_func::relu));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(*engine, topology, config);
@@ -182,7 +182,7 @@ TEST(memory_pool, oooq) {
     topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1));
     topology.add(activation("relu6", input_info("concat2"), activation_func::relu));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(*engine, topology, config);
@@ -227,7 +227,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
     topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1));
     topology.add(activation("relu6", input_info("concat2"), activation_func::linear, { 1.0f, 0.5f }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network_first(*engine, topology, config);
@@ -302,7 +302,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) {
         convolution("conv", input_info("input"), { "weights" }, { 1, 1, 1, 2 }),
         softmax("softmax", input_info("conv")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network_first(*engine, topology, config);
@@ -388,7 +388,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
         convolution("conv", input_info("input"), { "weights" }, { 2, 1 }),
         softmax("softmax", input_info("conv")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network_first(*engine, topo, config);
@@ -421,7 +421,7 @@ TEST(memory_pool, shared_dep_two_output) {
     topo.add(cldnn::concatenation("result_1_0", { input_info("constant_0_0") }, 0));
     topo.add(cldnn::concatenation("result_2_0", { input_info("constant_0_0") }, 0));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(*engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network network(*engine, topo, config);
@@ -462,7 +462,9 @@ TEST(memory_pool, non_opt_intermidate_opt_after) {
         data_memory
     );
 
-    ExecutionConfig config(ov::intel_gpu::optimize_data(false));
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(false));
+
     network network(engine, topology, config);
     network.set_input_data("input1", input_memory1);
     network.set_input_data("input2", input_memory2);
@@ -510,7 +512,7 @@ TEST(memory_pool, add_mem_dep_test) {
         actv3, actv4
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input1", input_memory1);
diff --git a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp
index 9ac247cf60dff0..d5fe3121996c65 100644
--- a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp
@@ -170,7 +170,7 @@ struct multiclass_nms_test : public ::testing::TestWithParam<MulticlassNmsParams
 
             topology.add(primitive);
             topology.add(reorder("multiclass_nms", input_info("multiclass_nms_reordered"), plain_format, data_type));
-            ExecutionConfig config;
+            ExecutionConfig config = get_test_default_config(engine);
             config.set_property(ov::intel_gpu::optimize_data(false));
 
             cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), param.is_caching_test);
@@ -195,7 +195,7 @@ struct multiclass_nms_test : public ::testing::TestWithParam<MulticlassNmsParams
                 cldnn::topology reorder_topology;
                 reorder_topology.add(input_layout("data", from_layout));
                 reorder_topology.add(reorder("plane_data", input_info("data"), plain_format, data_type));
-                cldnn::network reorder_net{engine, reorder_topology};
+                cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)};
                 reorder_net.set_input_data("data", mem);
                 const auto second_output_result = reorder_net.execute();
                 const auto plane_data_mem = second_output_result.at("plane_data").get_memory();
diff --git a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
index a7abe8e326de40..98be895ebc6992 100644
--- a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
@@ -24,7 +24,7 @@ TEST(multistream_gpu, basic) {
     auto task_executor = std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_config);
     auto& engine = get_test_engine();
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     auto input1_dyn_layout = layout{ ov::PartialShape::dynamic(3), data_types::f16,format::bfyx };
diff --git a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp
index 4e8abcd2def262..491be53d4b4e49 100644
--- a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp
@@ -122,7 +122,7 @@ void test_mvn_test_across_channels_outside_sqrt_bfyx(bool is_caching_test) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -156,7 +156,7 @@ void test_mvn_test_across_channels_inside_sqrt_bfyx(bool is_caching_test) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -195,7 +195,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, true));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -222,7 +222,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance)
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -249,7 +249,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, true));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -276,7 +276,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance_
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -305,7 +305,7 @@ TEST(mvn_gpu_test, dynamic_across_channels_inside_sqrt_bfyx_normalize_variance_f
     topology.add(input_layout("input", in_layout));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -338,7 +338,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -365,7 +365,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt__bfyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -392,7 +392,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_fp16) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -419,7 +419,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_fp16) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -446,7 +446,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -473,7 +473,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance)
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -500,7 +500,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -527,7 +527,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance_
     topology.add(input_layout("input", input->get_layout()));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -556,7 +556,7 @@ TEST(mvn_gpu_test, dynamic_within_channels_inside_sqrt_bfyx_normalize_variance_f
     topology.add(input_layout("input", in_layout));
     topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -663,7 +663,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
         prim.output_paddings = {output_pad};
         topo.add(prim);
 
-        cldnn::network::ptr net = get_network(eng, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(eng, topo, get_test_default_config(eng), get_test_stream_ptr(), is_caching_test);
 
         net->set_input_data("input", input);
 
@@ -852,7 +852,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
         auto prim = mvn("mvn", input_info("input"), params.normalize_variance, 1e-10f, false, params.across_channels);
         prim.output_paddings = {output_pad};
         topo.add(prim);
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn"}));
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} }));
 
@@ -869,7 +869,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
         auto prim_opt = mvn("mvn_opt", input_info("input_to_target_layout"), params.normalize_variance, 1e-10f, false, params.across_channels);
         prim_opt.output_paddings = {output_pad};
         topo_opt.add(prim_opt);
-        ExecutionConfig config_opt;
+        ExecutionConfig config_opt = get_test_default_config(engine);
         config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn_opt", "input_to_target_layout"}));
         config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} }));
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp
index ffc9ace5e39049..245db179d50feb 100644
--- a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp
@@ -125,7 +125,7 @@ struct non_max_suppression_basic : public testing::Test {
         topo.add(non_max_suppression("nms", input_info("reformat_boxes"), input_info("reformat_scores"), 6, false, true));
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -186,7 +186,7 @@ struct non_max_suppression_basic : public testing::Test {
                                     "num_per_class"));
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -257,7 +257,7 @@ struct non_max_suppression_basic : public testing::Test {
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
         topo.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -317,7 +317,7 @@ struct non_max_suppression_basic : public testing::Test {
         second_output_topology.add(input_layout("num_outputs", this->valid_outputs_layout));
         second_output_topology.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type));
         second_output_topology.add(reorder("plane_num", input_info("num_outputs"), format::bfyx, cldnn::data_types::i32));
-        network second_output_net{engine, second_output_topology};
+        network second_output_net{engine, second_output_topology, get_test_default_config(engine)};
         second_output_net.set_input_data("selected_scores", selected_scores_mem);
         second_output_net.set_input_data("num_outputs", valid_outputs_mem);
         auto second_output_result = second_output_net.execute();
@@ -375,7 +375,7 @@ struct non_max_suppression_basic : public testing::Test {
         topo.add(reorder("plane_scores", input_info("nms", 1), format::bfyx, this->data_type));
         topo.add(reorder("plane_outputs", input_info("nms", 2), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
@@ -437,7 +437,7 @@ struct non_max_suppression_basic : public testing::Test {
         second_output_topology.add(input_layout("num_outputs", valid_outputs_mem->get_layout()));
         second_output_topology.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type));
         second_output_topology.add(reorder("plane_num", input_info("num_outputs"), format::bfyx, cldnn::data_types::i32));
-        network second_output_net{engine, second_output_topology};
+        network second_output_net{engine, second_output_topology, get_test_default_config(engine)};
         second_output_net.set_input_data("selected_scores", selected_scores_mem);
         second_output_net.set_input_data("num_outputs", valid_outputs_mem);
         auto second_output_result = second_output_net.execute();
@@ -485,7 +485,7 @@ struct non_max_suppression_basic : public testing::Test {
                                     "iou_threshold"));
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -542,7 +542,7 @@ struct non_max_suppression_basic : public testing::Test {
                                     "score_threshold"));
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -603,7 +603,7 @@ struct non_max_suppression_basic : public testing::Test {
                                     "soft_nms_sigma"));
         topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
diff --git a/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp
index e1ef46d7f9a9ea..1c9122ca4171c3 100644
--- a/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp
@@ -60,7 +60,7 @@ void test_count_non_zero(layout in_layout, std::vector<T> in_data) {
     topology.add(count_nonzero("count_nonzero", input_info("InputData"))
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("InputData", input_mem);
     auto outputs = network.execute();
     auto output = outputs.at("count_nonzero").get_memory();
@@ -132,7 +132,7 @@ TEST(test_count_non_zero, dynamic_2d_f32_bfyx) {
     topology.add(input_layout("InputData", in_dyn_layout));
     topology.add(count_nonzero("count_nonzero", input_info("InputData")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     std::vector<size_t> input_shapes = {171, 531, 168, 169, 174, 172, 168, 167, 1169, 16, 677};
@@ -180,7 +180,7 @@ void test_gather_non_zero(layout in_layout, std::vector<T> in_data) {
         gather_nonzero("gather_nonzero", input_info("InputData"), input_info("OutputShape"))
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputData", input_mem);
     auto outputs = network.execute();
@@ -290,7 +290,7 @@ TEST(non_zero_gpu, dynamic) {
     topology.add(count_nonzero("count_nonzero", input_info("InputData")));
     topology.add(gather_nonzero("gather_nonzero", input_info("InputData"), input_info("count_nonzero")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -335,7 +335,7 @@ void test_non_zero(layout in_layout, std::vector<T> in_data) {
     topology.add(count_nonzero("count_nonzero", input_info("InputData")));
     topology.add(gather_nonzero("gather_nonzero", input_info("InputData"), input_info("count_nonzero")));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("InputData", input_mem);
     auto outputs = network.execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp
index 713ab875b66e8e..b9b6f620a43842 100644
--- a/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp
@@ -83,7 +83,7 @@ struct normalize_basic : public testing::Test {
         topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
         topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp
index eebb0af3aeae81..9d1c4698802313 100644
--- a/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp
@@ -84,7 +84,7 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
     topology.add(input_layout("input", input->get_layout()));
     topology.add(one_hot("output", input_info("input"), shape, one_hot_axis, one_hot_limit));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     auto outputs = network->execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -183,7 +183,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax4) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -242,7 +242,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax4) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -301,7 +301,7 @@ TEST(one_hot_gpu_i32_to_f32, bfyx_ax4) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -354,7 +354,7 @@ TEST(one_hot_gpu_i64_to_f32, bfyx_ax4) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -405,7 +405,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax0) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -460,7 +460,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax0) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -515,7 +515,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax1) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -570,7 +570,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax1) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -625,7 +625,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax2) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -680,7 +680,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax2) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -735,7 +735,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax3) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
@@ -790,7 +790,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax3) {
 
     set_values(input, input_rnd_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
     ASSERT_EQ(outputs.size(), size_t(1));
diff --git a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
index 54439b2816ea1a..0b7b285b467a63 100644
--- a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
@@ -58,7 +58,7 @@ TEST(permute_gpu_f32, output_ordering_test)
                     input_layout("input", input->get_layout()),
                     permute("permute", input_info("input"), perm));
 
-                network network(engine, topology);
+                network network(engine, topology, get_test_default_config(engine));
                 network.set_input_data("input", input);
                 auto outputs = network.execute();
                 auto output = outputs.at("permute");
@@ -113,7 +113,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3)
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 1, 2, 3 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -172,7 +172,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 1, 3, 2 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -219,7 +219,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3)
         input_layout("input", input_mem->get_layout()),
         permute("permute", input_info("input"), { 1, 0, 2, 3 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -281,7 +281,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding)
         reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })),
         permute("permute", input_info("reorder"), { 0, 1, 3, 2 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -338,7 +338,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature)
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 1, 0, 2, 3 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -393,7 +393,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature)
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 1, 0, 2, 3 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -453,7 +453,7 @@ void permute_test_with_reorder()
         permute("permute", input_info("reorder"), { 0, 1, 3, 2 }),
         reorder("reorder_out", input_info("permute"), { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -552,7 +552,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1)
         reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32),
         permute("out", input_info("reorder2"), { 0, 3, 1, 2}));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(false));
     config.set_property(ov::intel_gpu::allow_static_input_reorder(true));
 
@@ -567,7 +567,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1)
         reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32), // to be fused to previous permute
         permute("out", input_info("reorder2"), { 0, 3, 1, 2})); // return to original value
 
-    ExecutionConfig config_fused;
+    ExecutionConfig config_fused = get_test_default_config(engine);
     config_fused.set_property(ov::intel_gpu::optimize_data(true));
     network fused(engine, topology_fused, config_fused);
     fused.set_input_data("input", input);
@@ -602,7 +602,7 @@ TEST(fc_permute_crop_gpu, basic_permute_yxfb)
         permute("permute", input_info("input"), { 1, 0, 2, 3 }) // yxfb {5, 1, 1, 512}  --- without permute fix yxfb {1, 5, 512, 1}
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -637,7 +637,7 @@ TEST(fc_permute_crop_gpu, basic_0)
         crop("crop", input_info("permute"), { 1, 1, 1, 512 }, { 4, 0, 0 ,0 })       // without permute fix it will fail "Tensor pitches didn't set correctly"
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -667,7 +667,7 @@ TEST(fc_permute_gpu, basic_permute_bfyx)
         permute("permute", input_info("input"), { 1, 0, 2, 3 })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -727,7 +727,7 @@ TEST(permute_gpu_f32, permute_bfwzyx)
         permute("permute", input_info("input"), permute_order)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -819,7 +819,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape)
         reorder("output_4d", input_info("reshape_6_to_4"), { data_types::f32, format::bfyx, cldnn::tensor(batch(b), feature(f), spatial(x, y)) })
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
@@ -870,7 +870,7 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_4_1_2_3)
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 4, 1, 2, 3 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -937,7 +937,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_2_3_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -993,7 +993,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_2_3_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1049,7 +1049,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_2_3_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1099,7 +1099,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_2_3_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1149,7 +1149,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_2_3_4_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1211,7 +1211,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_2_3_4_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1265,7 +1265,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_2_3_4_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1319,7 +1319,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_2_3_4_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1373,7 +1373,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_2_3_4_5_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1445,7 +1445,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_2_3_4_5_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1505,7 +1505,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_2_3_4_5_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1565,7 +1565,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_2_3_4_5_1) {
         input_layout("input", input->get_layout()),
         permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1686,7 +1686,7 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
     );
 
     // run with permute_ref
-    ov::intel_gpu::ExecutionConfig config_ref;
+    ov::intel_gpu::ExecutionConfig config_ref = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" };
     config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} }));
 
@@ -1697,7 +1697,7 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
     cldnn::mem_lock<type> output_ref_ptr(output_ref, get_test_stream());
 
     // run with optimized kernel, e.g. permute_tile_8x8_4x4_fsv16
-    ExecutionConfig config_tile;
+    ExecutionConfig config_tile = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc permute_tile_opt = { format_fsv, permute_opt };
     config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_opt} }));
 
@@ -1872,7 +1872,7 @@ TEST(permute_gpu_f32_dynamic, bfyx_0_2_3_1) {
         input_layout("input", input_layout_dynamic),
         permute("permute", input_info("input"), { 0, 2, 3, 1 }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
diff --git a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
index 42312c5bfd91ca..e1a348624dd912 100644
--- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
@@ -210,7 +210,7 @@ TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) {
     topology topology;
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 3, 3 }, { 1, 1 }));
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f,
         1.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
         2.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
@@ -256,7 +256,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 3, 3 }, { 1, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
     network.set_input_data("input_prim", input_prim);
 
@@ -297,12 +297,9 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) {
         reorder("reorder2", input_info("pool1"), out_layout)
     );
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" })
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" }));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input_memory);
 
@@ -349,12 +346,9 @@ TEST(pooling_forward_gpu, basic_avg_pooling_int8) {
         reorder("reorder2", input_info("pool1"), out_layout)
     );
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" })
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" }));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input_memory);
 
@@ -390,7 +384,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 1, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
     network.set_input_data("input_prim", input_prim);
 
@@ -434,7 +428,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) {
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 2, 2 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { -0.25f, 1.00f, 0.50f, 0.25f, 2.00f, 1.50f, -0.50f, -0.75f, 0.00f, -1.00f, 0.50f, 0.25f, 0.50f, -2.00f, -1.50f, -2.50f });
     network.set_input_data("input_prim", input_prim);
 
@@ -488,7 +482,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) {
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 1, 1 }));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { -0.5f, 0.5f, -1.5f, 0.0f, 0.5f, 0.0f, -0.5f, 0.5f, 0.0f, -0.5f, 0.0f, -0.5f, 1.0f, -2.0f, 0.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -2.0f, 1.0f, 1.5f, 0.0f, -1.0f, -0.5f, -2.0f, 0.5f, -0.5f, -1.0f, 1.0f, -0.5f, -0.5f, 1.5f, -0.5f, 0.0f });
     network.set_input_data("input_prim", input_prim);
 
@@ -538,7 +532,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad)
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { 1.50f, -0.50f, -1.00f, 0.50f });
     network.set_input_data("input_prim", input_prim);
 
@@ -583,7 +577,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad)
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     set_values(input_prim, {
         1.50f, -1.00f, -0.50f,
@@ -632,7 +626,7 @@ TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 4.0f, -1.0f, 3.5f });
     network.set_input_data("input_prim", input_prim);
 
@@ -677,7 +671,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad)
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
     network.set_input_data("input_prim", input_prim);
 
@@ -722,7 +716,7 @@ TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad)
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, { 3, 3 }, { 3, 3 }, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     std::vector<float> input_vec = { 1.5f, -0.5f, -1.0f, 0.5f, 0.1f, 0.2f, 0.9f, 1.1f, 2.2f };
     set_values(input_prim, input_vec);
@@ -770,7 +764,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad)
     topology.add(input_layout("input_prim", input_prim->get_layout()));
     topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { 1.5f, -0.5f, 2.5f, -1.0f, 0.5f, 3.0f, 0.5f, 0.0f, -8.0f });
     network.set_input_data("input_prim", input_prim);
 
@@ -825,7 +819,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out
         topology.add(input_layout("input_prim", input_prim->get_layout()));
         topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 2, 2}, 0}));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
         network.set_input_data("input_prim", input_prim);
 
@@ -886,7 +880,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out
         topology.add(input_layout("input_prim", input_prim->get_layout()));
         topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0}));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         set_values(input_prim, {
             1.50f, -1.00f, -0.50f,
@@ -957,7 +951,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp
         topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 })));
         topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 2, 2}, 0}));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
         network.set_input_data("input_prim", input_prim);
 
@@ -1020,7 +1014,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp
         topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })));
         topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0}));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         set_values(input_prim, {
             1.50f, -1.00f, -0.50f,
@@ -1091,7 +1085,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou
         topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
         topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, 2, 2 }, 0 }));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
         set_values(input_prim, {
             1.f, 2.f, 3.f, 4.f,
             5.f, 1.5f, -0.5f, 6.f,
@@ -1159,7 +1153,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
         topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
         topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0}));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         set_values(input_prim, {
             1.f, 2.f, 3.f, 4.f, 5.f,
@@ -1426,7 +1420,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4)
                               pool);
 
             // Network processing
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             //network_exe(network, vGoldOutput, "pool_GOLD");
             auto outputs = network.execute();
@@ -1475,7 +1469,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4)
                                         format::bfyx,
                                         { in_B, in_F, in_X, in_Y })));
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
             network.set_input_data("input", input);
             //network_exe(network, vTestOutput, "reorder_UnSwizzelled");
             auto outputs = network.execute();
@@ -1529,7 +1523,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_ou
     topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 1, 1 }));
     topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { 1, 1, 2, 2 })));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f) });
     network.set_input_data("input", input_prim);
 
@@ -1581,7 +1575,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride)
     topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 2, 2 }));
     topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { 1, 1, 3, 3 })));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f) });
     network.set_input_data("input", input_prim);
 
@@ -1647,7 +1641,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride)
     topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 2, 2 }));
     topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { batch_count, features_count, out_y, out_x })));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f),   //B0F0
                              FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f),   //B0F1
                              FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f),   //B1F0
@@ -1718,7 +1712,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
         topology.add(pooling("pool_prim", input_info("reorder_input"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0}));
         topology.add(reorder("reorder_pooling", input_info("pool_prim"), layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 })));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         set_values(input_prim, {
             FLOAT16(1.50f), FLOAT16(-1.00f), FLOAT16(-0.50f),
@@ -1791,7 +1785,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x
     topology.add(pooling("pool_prim", input_info("reorder_input"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0}));
     topology.add(reorder("reorder_pooling", input_info("pool_prim"), layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 })));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     set_values(input_prim, {
         FLOAT16(1.f),  FLOAT16(2.f),    FLOAT16(3.f),    FLOAT16(4.f),    FLOAT16(5.f),
@@ -1867,7 +1861,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
         golden_topology.add(reorder("reorder_input", input_info("input"), input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 })));
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -1885,7 +1879,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
         golden_topology.add(pooling("fsv32_pooling", input_info("reorder_input"), pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
         golden_topology.add(reorder("reorder_pooling", input_info("fsv32_pooling"), layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 })));
 
-        network fsv32_network(engine, golden_topology);
+        network fsv32_network(engine, golden_topology, get_test_default_config(engine));
         fsv32_network.set_input_data("input", input_prim);
 
         auto outputs = fsv32_network.execute();
@@ -1936,7 +1930,8 @@ class pooling_test_base {
     virtual void run_expect(const VVVVVF<output_t>& expected, bool is_caching_test) {
         auto& eng = get_test_engine();
         auto topo = build_topology(eng);
-        ExecutionConfig config(ov::intel_gpu::optimize_data(true));
+        ExecutionConfig config = get_test_default_config(eng);
+        config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(eng, topo, config, get_test_stream_ptr(), is_caching_test);
 
@@ -2314,7 +2309,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size},
                                     {stride_size, stride_size},{y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2336,7 +2331,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride)
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"),
                                     layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2399,7 +2394,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x
                 pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size},
                         {stride_size, stride_size}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2420,7 +2415,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x
                     {stride_size, stride_size}, {y_in_pad, x_in_pad}));
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2481,7 +2476,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, {pool_size, pool_size},
                                     {stride_size, stride_size}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2504,7 +2499,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride)
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"),
                                     layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2566,7 +2561,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, {pool_size, pool_size},
                                     {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2587,7 +2582,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride)
                                     {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2649,7 +2644,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size},
                                     {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2672,7 +2667,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride)
                         {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2734,7 +2729,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size},
                                     {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2757,7 +2752,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride)
                         {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -2823,7 +2818,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride)
         golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size},
                                     {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
 
-        network golden_network(engine, golden_topology);
+        network golden_network(engine, golden_topology, get_test_default_config(engine));
         golden_network.set_input_data("input", input_prim);
 
         auto outputs = golden_network.execute();
@@ -2846,7 +2841,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride)
                         {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad}));
         tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor)));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"bsv16_fsv16_pooling", "reorder_pooling"}));
         network bsv16_fsv16_network(engine, tested_topology, config);
         bsv16_fsv16_network.set_input_data("input", input_prim);
@@ -3231,10 +3226,9 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) {
     );
 
     ov::intel_gpu::ImplementationDesc impl = {format::bfyx, std::string(""), impl_types::onednn};
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                        ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" }),
-                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool1", impl}}),
-    };
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder2" }));
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool1", impl}}));
 
     network network(
         engine,
diff --git a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp
index 6fb279797239a4..3ce2af6391f06a 100644
--- a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp
@@ -90,7 +90,7 @@ class PriorBoxGPUTest : public ::testing::TestWithParam<prior_box_param<InputTyp
         topo.add(prior_box);
         topo.add(reorder("prior_box", input_info("blocked_prior_box"), plain_format, output_data_type));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(false));
 
         cldnn::network::ptr network = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
diff --git a/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp
index 9190f0613ba872..79b9371a586c9c 100644
--- a/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp
@@ -18,7 +18,7 @@ using namespace ::tests;
 template <typename T>
 void test_copy_dependecies_from_nodes(bool is_caching_test) {
     auto& engine = get_test_engine();
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
diff --git a/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp
index 484bd9caf64139..51f047aaa70f64 100644
--- a/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp
@@ -100,7 +100,7 @@ struct pyramid_roi_align_typed_test : testing::Test {
                                 { P2_scale, P3_scale, P4_scale, P5_scale },
                                 starting_level));
 
-        cldnn::network::ptr net = get_network(engine, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, topo, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         net->set_input_data("rois", rois_mem);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp
index fdea2522d96bb7..b3e5a2d3a1a85f 100644
--- a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp
@@ -84,7 +84,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -148,7 +148,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -212,7 +212,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack)
         reorder("reorder", input_info("quantize"), layout{data_types::f32, format::bfyx, tensor{1,8,2,2}})
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -292,7 +292,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -381,7 +381,7 @@ TEST(quantize_gpu, quantize_levels_3) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 3, data_types::f32)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -472,7 +472,7 @@ TEST(quantize_gpu, quantize_levels_256_2d_unsigned) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 256, data_types::u8)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -564,7 +564,7 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) {
         reorder("out", input_info("quantize"), format::bfzyx, data_types::u8)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -658,7 +658,7 @@ TEST(quantize_gpu, dynamic) {
         quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -837,7 +837,7 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
             FAIL() << "Not supported inputs number: " << params.inputs_num;
         }
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"quantize"}));
 
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
@@ -877,7 +877,7 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
         }
 
 
-        network net_opt(engine, topo_opt, ExecutionConfig{});
+        network net_opt(engine, topo_opt, get_test_default_config(engine));
         net_opt.set_input_data("input_opt", input_opt);
 
         auto result_opt = net_opt.execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp
index 855d0860aae6a4..f414c915e30b7e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp
@@ -53,7 +53,7 @@ struct random_uniform_gpu_test : public ::testing::TestWithParam<RandomUniformPa
         topology.add(input_layout("shape", shape->get_layout()));
         topology.add(input_layout("min_val", min_val->get_layout()));
         topology.add(input_layout("max_val", max_val->get_layout()));
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
 
         cldnn::network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
diff --git a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp
index 8a405850392e4c..96edafa0e6dc75 100644
--- a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp
@@ -44,9 +44,11 @@ struct RangeArgs {
         step.addTo(topology);
         topology.add(range { "range", { input_info(start.name), input_info(stop.name), input_info(step.name) }, { dt, format::bfyx, tensor{batch(outLen)} } });
 
-        ExecutionConfig config(ov::intel_gpu::allow_new_shape_infer(use_new_shape_infer));
+        auto& engine = get_test_engine();
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::allow_new_shape_infer(use_new_shape_infer));
 
-        network network { tests::get_test_engine(), topology, config };
+        network network { engine, topology, config };
 
         start.setData(network);
         stop.setData(network);
@@ -207,7 +209,7 @@ TEST(range_gpu_test, range_with_select) {
     set_values<int32_t>(input0, {start_val});
     set_values<int32_t>(input2, {step_val});
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network { tests::get_test_engine(), topology, config };
@@ -243,7 +245,7 @@ TEST(range_gpu_test, constant_folding) {
     topology.add(data("input2", input2));
     topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32});
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network(engine, topology, config);
@@ -281,7 +283,7 @@ TEST(range_gpu_test, dynamic_all) {
     topology.add(input_layout("input2", dynamic_input_layout));
     topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32});
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network(engine, topology, config);
@@ -327,7 +329,7 @@ TEST(range_gpu_test, dynamic_stop) {
     topology.add(data("input2", input2));
     topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32});
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp
index a40f8885e35f76..bf7e6199bb73af 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp
@@ -526,7 +526,7 @@ class ReduceTestBase : public ::testing::TestWithParam<TestParamType_general_red
         }
         topology.add(input_layout("input", input_mem->get_layout()));
         topology.add(red);
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
@@ -780,7 +780,7 @@ void test_common_bfyx(bool is_caching_test) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -814,7 +814,7 @@ TEST(reduce_gpu, common_bfyx_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {3, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -844,7 +844,7 @@ TEST(reduce_gpu, regr_bfyx_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, { 0, 3 }, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -874,7 +874,7 @@ TEST(reduce_gpu, common_bfzyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -904,7 +904,7 @@ TEST(reduce_gpu, common_bfzyx_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -934,7 +934,7 @@ TEST(reduce_gpu, common_bfwzyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {2, 3, 4, 5}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -964,7 +964,7 @@ TEST(reduce_gpu, common_bfwzyx_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {1, 2, 3}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -995,7 +995,7 @@ TEST(reduce_gpu, common_bfwzyx_max_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::max, {0, 1}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1025,7 +1025,7 @@ TEST(reduce_gpu, common_bfwzyx_min) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::min, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1055,7 +1055,7 @@ TEST(reduce_gpu, common_bfwzyx_min_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::min, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1085,7 +1085,7 @@ TEST(reduce_gpu, common_bfwzyx_mean) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::mean, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1115,7 +1115,7 @@ TEST(reduce_gpu, common_bfwzyx_mean_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::mean, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1145,7 +1145,7 @@ TEST(reduce_gpu, common_bfwzyx_prod) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1175,7 +1175,7 @@ TEST(reduce_gpu, common_bfwzyx_prod_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1206,7 +1206,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0, 1}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1236,7 +1236,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_and, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1266,7 +1266,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_and, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1296,7 +1296,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_or, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1326,7 +1326,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_or, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1356,7 +1356,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum_square, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1386,7 +1386,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::sum_square, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1416,7 +1416,7 @@ TEST(reduce_gpu, common_bfwzyx_l1) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::l1, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1446,7 +1446,7 @@ TEST(reduce_gpu, common_bfwzyx_l1_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::l1, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1476,7 +1476,7 @@ TEST(reduce_gpu, common_bfwzyx_l2) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::l2, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1506,7 +1506,7 @@ TEST(reduce_gpu, common_bfwzyx_l2_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::l2, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1536,7 +1536,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1566,7 +1566,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1596,7 +1596,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum_exp, {1, 2}, 0));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1626,7 +1626,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum_exp, {1, 2}, 1));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -1658,7 +1658,7 @@ TEST(reduce_gpu, dynamic) {
     topology.add(input_layout("input", in_dyn_layout));
     topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 1));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -1715,7 +1715,7 @@ TEST(reduce_gpu, b_fs_yx_fsv16_min_dynamic) {
     topology.add(reorder("reorder", input_info("input"), used_layout));
     topology.add(reduce("reduce", input_info("reorder"), reduce_mode::min, {1}, 0));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network(engine, topology, config);
@@ -1770,7 +1770,7 @@ TEST(reduce_gpu, b_fs_yx_fsv16_max_dynamic) {
     topology.add(reorder("reorder", input_info("input"), used_layout));
     topology.add(reduce("reduce", input_info("reorder"), reduce_mode::max, {1}, 0)); 
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     network network(engine, topology, config);
@@ -1891,7 +1891,7 @@ class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParam<TestParamT
             }
             topology.add(input_layout("input", input_mem->get_layout()));
             topology.add(red);
-            ExecutionConfig config;
+            ExecutionConfig config = get_test_default_config(engine);
             config.set_property(ov::intel_gpu::optimize_data(true));
             ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
             config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
@@ -2045,7 +2045,7 @@ class ReduceOnednnTestBase : public ::testing::TestWithParam<TestParamType_gener
         }
         topology.add(input_layout("input", input_mem->get_layout()));
         topology.add(red);
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(true));
         ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name, impl_types::onednn};
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
diff --git a/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp
index 49144fffdba540..e1e0c7ff6b86a6 100644
--- a/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp
@@ -180,7 +180,7 @@ void runRegionTest(region_yolo_test_params& params, bool is_caching_test = false
                              params.regionNum, static_cast<uint32_t>(params.mask.size()), params.softMax));
     topology.add(reorder("reorder_post", input_info("region_yolo"), format::bfyx, params.dataType));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputData", inputPrim);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp
index 254c076e54e25f..0cdb5526aa4376 100644
--- a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp
@@ -63,7 +63,7 @@ void test_multiple_outputs(bool is_caching_test) {
     std::vector<T> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
     set_values(input, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "shuffle_channels", "reshape", "strided_slice" }));
 
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
@@ -131,7 +131,7 @@ void test_output_node_optimization(bool is_caching_test) {
     topology.add(convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
     topology.add(activation("relu", input_info("conv"), activation_func::relu));
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
 
     // checking the output node has the same name after output node deleting due to ReLU optimization
diff --git a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
index 5619d415928ef4..09438ac7c17145 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
@@ -44,6 +44,14 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
     int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in, int32_t w_in,
     bool is_caching_test) {
     auto& engine = get_test_engine();
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+    if (engine.get_device_info().supports_immad) {
+        // Onednn currently does NOT support out_of_order : skip this test
+        return;
+    }
+
+    auto stream = std::shared_ptr<cldnn::stream>(engine.create_stream(cfg));
 
     tensor ts;
     if (input_format.dimension() == 4) {
@@ -60,7 +68,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
     layout output_layout(output_data_type, output_format, ts);
 
     if (input_data_type == data_types::i8) {
-        mem_lock<uint8_t> input_ptr{input, get_test_stream()};
+        mem_lock<uint8_t> input_ptr{input, *stream};
         unsigned char i = 1;
         for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it)
         {
@@ -70,7 +78,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
             }
         }
     } else {
-        mem_lock<float> input_ptr{input, get_test_stream()};
+        mem_lock<float> input_ptr{input, *stream};
         float i = 1.f;
         for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it)
         {
@@ -84,11 +92,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
         reorder("reorder", input_info("input"), output_layout));
 
     // run on reference(reorder_data) kernel
-    ov::intel_gpu::ExecutionConfig config_ref;
+    ov::intel_gpu::ExecutionConfig config_ref = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" };
     config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} }));
 
-    cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, stream, is_caching_test);
 
     network_ref->set_input_data("input", input);
 
@@ -97,11 +105,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
     e1->wait();
 
     // run on optimized kernel
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name };
     config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} }));
 
-    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, config, stream, is_caching_test);
 
     network->set_input_data("input", input);
 
@@ -268,7 +276,7 @@ TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) {
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -355,7 +363,7 @@ TEST(reorder_gpu_f32, basic) {
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -447,7 +455,7 @@ TEST(reorder_gpu_f32, basic_subtract) {
         input_layout("subtract", subtract->get_layout()),
         reorder("reorder", input_info("input"), output_layout, "subtract"));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
     network.set_input_data("subtract", subtract);
 
@@ -529,7 +537,7 @@ TEST(reorder_gpu_f32, basic_subtract_value) {
     topology topology;
     topology.add(input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout, subtract_val));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -627,7 +635,7 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) {
     topology.add(data("subtract", subtract));
     topology.add(reorder("reorder", input_info("input"), output_layout, "subtract"));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -715,7 +723,7 @@ TEST(reorder_gpu_f16, basic_subtract_value) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(reorder("reorder", input_info("input"), output_layout, subtract_val));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -788,12 +796,9 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) {
     topology.add(reorder("reorder_f16_f32", input_info("input"), interm_layout));
     topology.add(reorder("reorder_f32_f16", input_info("reorder_f16_f32"), output_layout));
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{"reorder_f16_f32", "reorder_f32_f16"})
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"reorder_f16_f32", "reorder_f32_f16"}));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input);
 
@@ -859,12 +864,9 @@ TEST(reorder_gpu, basic_convert_int8) {
         reorder("reorder2", input_info("reorder_input"), in_layout)
     );
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2"})
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2"}));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input_memory);
 
@@ -906,12 +908,9 @@ TEST(reorder_gpu, basic_convert_uint8) {
         reorder("reorder2", input_info("reorder_input"), in_layout)
     );
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2" })
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2" }));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input_memory);
 
@@ -988,12 +987,9 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) {
             )
     );
 
-    network network(
-        engine,
-        topology,
-        ExecutionConfig{
-            ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "crop" })
-        });
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "crop" }));
+    network network(engine, topology, cfg);
 
     network.set_input_data("input", input_memory);
 
@@ -1091,7 +1087,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding)
         reorder("reorder", input_info("input"), input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 1, 2 }, 0 }),
         reorder("reorder2", input_info("reorder"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1170,7 +1166,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding)
         reorder("reorder", input_info("input"), input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 2, 1 }, 0 }),
         reorder("reorder2", input_info("reorder"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1229,7 +1225,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), format::bfzyx, data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1291,7 +1287,7 @@ TEST(reorder_gpu_f32, dynamic_bfyx_to_bfzyx) {
         input_layout("input", in_layout),
         reorder("reorder", input_info("input"), format::bfzyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
@@ -1360,7 +1356,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), format::bfzyx, data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1436,7 +1432,7 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), format::bfyx, data_types::f32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1496,7 +1492,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant)
         reorder("r2", input_info("r1"), format::yxfb, data_types::f32)
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network net(engine, tpl, config);
@@ -1525,7 +1521,7 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse)
         eltwise("output", { input_info("relu"), input_info("scale_data") }, eltwise_mode::prod)
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network net(engine, tpl, config);
@@ -1549,7 +1545,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders)
         reorder("r1", input_info("conv"), format::bfyx, data_types::f32) //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case)
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
 
     //we need to check if r1 will be successfully opimized and still we should be able to query for r1's output which should point to conv's output (note conv cannot be marked as output in this case)
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "r1" }));
@@ -1578,7 +1574,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders)
         softmax("output", input_info("r1"))
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     network net(engine, tpl, config);
@@ -1603,7 +1599,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant)
         reorder("r1", input_info("in"), format::bfyx, data_types::f32)
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
 
     config.set_property(ov::intel_gpu::optimize_data(true));
 
@@ -1641,7 +1637,7 @@ TEST(reorder_gpu_opt, mean_mul)
     };
 
     float answers[] = { 0.5f, 5.0f, -15.0f, 17.2f, 6.0f, -21.0f };
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network net(engine, tpl, config);
     net.set_input_data("in", in);
@@ -1676,7 +1672,7 @@ TEST(reorder_gpu_opt, mean_div)
     };
 
     float answers[] = { 2.0f, 1.0f, -1.0f, 0.5f, 4.0f, -2.0f };
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network net(engine, tpl, config);
     net.set_input_data("in", in);
@@ -1707,7 +1703,7 @@ TEST(reorder_gpu_opt, mean_mul_val)
     };
 
     float answers[] = { 2.0f, 4.0f, 1.5f, 2.0f, 50.0f, 600.0f };
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network net(engine, tpl, config);
     net.set_input_data("in", in);
@@ -1737,7 +1733,7 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int)
     };
 
     char answers[] = { 0, 2, 1, 2, 25, 127 };
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network net(engine, tpl, config);
     net.set_input_data("in", in);
@@ -1769,7 +1765,7 @@ TEST(reorder_gpu_i32, basic)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1810,7 +1806,7 @@ TEST(reorder_gpu_i64, basic)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1836,7 +1832,7 @@ TEST(reorder_gpu_binary, binary_output)
 {
     auto& engine = get_test_engine();
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
@@ -1855,7 +1851,7 @@ TEST(reorder_gpu_binary, binary_output)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1884,7 +1880,7 @@ TEST(reorder_gpu_binary, binary_input)
 {
     auto& engine = get_test_engine();
 
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
 
     auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } });
@@ -1906,7 +1902,7 @@ TEST(reorder_gpu_binary, binary_input)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -1976,7 +1972,7 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain)
         reorder("reorder3", input_info("reshape3"), format::bfyx, data_types::f32, sub_bfyx),
         reorder("out_reorder", input_info("reorder3"), format::bfwzyx, data_types::f32)
         );
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -2021,7 +2017,7 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16)
             input_layout("input", input->get_layout()),
             reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2103,7 +2099,7 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded)
             input_layout("input", input->get_layout()),
             reorder("reorder", input_info("input"), output_layout.with_padding(padding({ 0, 0, x_pad, y_pad, 0 }, 0.f))));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2178,7 +2174,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed)
             reorder(reorder_name, input_info("first_activation"), format::bfyx, data_types::f32),
             activation("second_activation", input_info(reorder_name), activation_func::abs));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -2225,7 +2221,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed)
             reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32),
             convolution("convolution", input_info(reorder_name), {"weights"}, { 1, 1 }, { 1, 1 }, { 1, 1 }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -2281,7 +2277,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded)
         reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32),
         activation("activation", input_info(reorder_name), activation_func::abs));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -2317,7 +2313,7 @@ TEST(reorder_gpu, any_format) {
     topo.add(input_layout("in", input->get_layout()));
     topo.add(reorder("out", input_info("in"), format::any, data_types::f32));
 
-    network net(engine, topo);
+    network net(engine, topo, get_test_default_config(engine));
 
     auto data = generate_random_1d<float>(input->count(), -1, 1);
     set_values(input, data);
@@ -2350,7 +2346,7 @@ TEST(reorder_image2d_rgba_to_bfyx_gpu, basic)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2400,7 +2396,7 @@ TEST(reorder_bfyx_to_image2d_rgba_gpu, basic)
         input_layout("input", input->get_layout()),
         reorder("reorder", input_info("input"), output_layout));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
@@ -2571,7 +2567,7 @@ class ReorderTest : public ::testing::TestWithParam<T> {
 public:
     cldnn::engine& engine = get_test_engine();
     cldnn::topology topology_test;
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     static const int min_random = -200;
     static const int max_random = 200;
     std::vector<primitive_id> executed_prims;
@@ -2721,10 +2717,9 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) {
     );
 
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl };
-    ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                            ov::intel_gpu::optimize_data(true),
-                            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} })
-    };
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }));
 
     setup_with_build_ops(config);
 
@@ -2751,10 +2746,9 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) {
     );
 
     ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl };
-    ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                            ov::intel_gpu::optimize_data(true),
-                            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} })
-    };
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }));
 
     setup_with_build_ops(config);
 
@@ -2925,10 +2919,9 @@ TEST(reorder_onednn_gpu, basic_convert_int8) {
     );
 
     ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn };
-    ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order),
-                        ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2"}),
-                        ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{ "reorder_input", impl }}),
-    };
+    ExecutionConfig cfg = get_test_default_config(engine);
+    cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "reorder_input", "reorder2"}));
+    cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{ "reorder_input", impl }}));
 
     network network(
         engine,
diff --git a/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp
index 245892c57c274f..edaecf60a34508 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp
@@ -320,7 +320,7 @@ struct reorg_yolo_test
         topology.add(reorg_yolo("reorg_yolo", input_info("input_reordered"), params.stride));
         topology.add(reorder("reorg_yolo_reordered", input_info("reorg_yolo"), plain_format, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
         const auto result = network->execute();
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
index 6c25f8ea09fd9d..2a239fc285d099 100644
--- a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
@@ -47,7 +47,7 @@ void test_basic_in2x3x2x2_nearest(bool is_caching_test) {
         12.f, 9.f, -17.f,
     });
 
-    cldnn::network::ptr net = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr net = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     net->set_input_data("input", input);
 
@@ -117,7 +117,7 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) {
         3.f, 4.f,
     });
 
-    cldnn::network net{ engine, topology };
+    cldnn::network net{ engine, topology, get_test_default_config(engine) };
     net.set_input_data("input", input);
 
     auto outputs = net.execute();
@@ -168,7 +168,7 @@ TEST(resample_gpu, nearest_asymmetric) {
         3.f, 4.f,
     });
 
-    cldnn::network net{ engine, topology };
+    cldnn::network net{ engine, topology, get_test_default_config(engine) };
     net.set_input_data("input", input);
 
     auto outputs = net.execute();
@@ -219,7 +219,7 @@ TEST(resample_gpu, nearest_asymmetric_i8) {
             3, 4,
     });
 
-    cldnn::network net{ engine, topology };
+    cldnn::network net{ engine, topology, get_test_default_config(engine) };
     net.set_input_data("input", input);
 
     auto outputs = net.execute();
@@ -270,7 +270,7 @@ TEST(resample_gpu, bilinear_asymmetric) {
         3.f, 4.f,
                });
 
-    cldnn::network net{ engine, topology };
+    cldnn::network net{ engine, topology, get_test_default_config(engine) };
     net.set_input_data("input", input);
 
     auto outputs = net.execute();
@@ -471,7 +471,8 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
         auto prim = resample("resample", input_info("in"), params.output_size, params.num_filter, params.operation_type);
         topo.add(prim);
 
-        ExecutionConfig config(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.out_format, ""}} }));
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.out_format, ""}} }));
         cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
 
         auto in_mem = engine.allocate_memory(in_layout);
@@ -631,7 +632,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
         prim.pads_end = params.pads_end;
         topo.add(prim);
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample"}));
         config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.in_format, "resample_ref"}} }));
 
@@ -649,7 +650,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
         prim_opt.pads_end = params.pads_end;
         topo_opt.add(prim_opt);
 
-        ExecutionConfig config_opt;
+        ExecutionConfig config_opt = get_test_default_config(engine);
         config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt"}));
         config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} }));
 
@@ -725,7 +726,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -815,7 +816,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -905,7 +906,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -995,7 +996,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1085,7 +1086,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1175,7 +1176,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1245,7 +1246,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1309,7 +1310,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1379,7 +1380,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1449,7 +1450,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1519,7 +1520,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1587,7 +1588,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 1;
@@ -1640,7 +1641,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) {
     //  Sample Type: Nearest
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 2;
@@ -1867,7 +1868,7 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) {
     //  Sample Type: Linear
 
     auto& engine = get_test_engine();
-    ov::intel_gpu::ExecutionConfig config;
+    ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
 
     int b = 1;
@@ -2027,7 +2028,7 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
         prim.pads_end = params.pads_end;
         topo.add(prim);
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample"}));
 
         network net(engine, topo, config);
@@ -2046,7 +2047,7 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
         topo_opt.add(prim_opt);
         topo_opt.add(reorder("res_to_bfyx", input_info("resample_opt"), origin_format, params.input_type));
 
-        ExecutionConfig config_opt;
+        ExecutionConfig config_opt = get_test_default_config(engine);
         config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
 
         cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
@@ -2138,10 +2139,10 @@ struct resample_opt_random_test_ext : resample_opt_random_test
         topo_opt.add(prim_opt);
         topo_opt.add(reorder("res_to_bfyx", input_info("resample_opt"), origin_format, params.input_type));
 
-        ExecutionConfig cfg{ov::enable_profiling(true),
-                            ov::intel_gpu::custom_outputs(std::vector<std::string>{"res_to_bfyx"}),
-                            ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {working_format, kernel}} })
-        };
+        ExecutionConfig cfg = get_test_default_config(engine);
+        cfg.set_property(ov::enable_profiling(true));
+        cfg.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"res_to_bfyx"}));
+        cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {working_format, kernel}} }));
 
         network net_opt(engine, topo_opt, cfg);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
index 9825d0492a8759..acb659a11a3dcf 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
@@ -66,7 +66,7 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re
     }
     tpl.add(reshape("reshape", reshape_input, reshape_size, cldnn::reshape::reshape_mode::base, output_padd));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{reshape_input, "reshape"}));
 
     cldnn::network::ptr net = get_network(engine, tpl, config, get_test_stream_ptr(), is_caching_test);
@@ -459,7 +459,7 @@ void test_multiple_users_with_reorder(bool is_caching_test) {
     std::vector<T> out2 = {0.f, 2.f, 0.f, 4.0f};
     set_values(input, input_vec);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     auto outputs = network->execute();
 
@@ -502,7 +502,7 @@ void test_calc_output_shape(bool is_caching_test) {
 
     set_values(input, {-1.f, 2.f, -3.f, 4.f});
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     auto outputs = network->execute();
 
@@ -574,7 +574,7 @@ void test_basic_bfwzyx(bool is_caching_test) {
 
     set_values(input, input_data);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
     auto outputs = network->execute();
 
@@ -630,7 +630,7 @@ void test_shrink_chain_partial(bool is_caching_test) {
     std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
     set_values(input, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
@@ -675,7 +675,7 @@ void test_shrink_chain_full(bool is_caching_test) {
     std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
     set_values(input, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -715,7 +715,7 @@ void test_shrink_chain_out(bool is_caching_test) {
     std::vector<T> out = {0.f, 2.f, 0.f, 4.0f};
     set_values(input, input_vec);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
@@ -758,7 +758,7 @@ TEST(reshape_gpu_f32, basic_runtime_static_shape) {
 
     set_values(input, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -806,7 +806,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape) {
 
     set_values(input, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
@@ -857,7 +857,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const) {
 
     set_values(input, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
@@ -914,7 +914,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
 
     set_values(input, input_data);
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
diff --git a/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp
index 497366d046c9eb..cb512fcd839463 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp
@@ -76,7 +76,7 @@ struct reverse_gpu_test : public ::testing::TestWithParam<ReverseParams<T, mode>
             tp.add(reverse(reverse_id, input_info(reverse_input_id), input_info(axes_id), mode));
         }
 
-        cldnn::network::ptr network = get_network(engine, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, tp, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data(reverse_input_id, reverse_input);
         network->set_input_data(axes_id, reverse_axes);
         auto result = network->execute();
diff --git a/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp
index e689368a6f51e7..d3491b814dd864 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp
@@ -36,7 +36,7 @@ void test_fp32_d2_2_ba1_sa0(bool is_caching_test) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("seq_lengths", seq_lengths);
@@ -85,7 +85,7 @@ void test_fp32_d3_3_3_ba0_sa1(bool is_caching_test) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("seq_lengths", seq_lengths);
@@ -135,7 +135,7 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -181,7 +181,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -228,7 +228,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -275,7 +275,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -320,7 +320,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -362,7 +362,7 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) {
         reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -406,7 +406,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -452,7 +452,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -498,7 +498,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -545,7 +545,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
@@ -592,7 +592,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
             reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("seq_lengths", seq_lengths);
diff --git a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp
index c0682d49ebfb8a..5afbe017c9d35f 100644
--- a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp
@@ -69,6 +69,7 @@ struct roi_align_test : public testing::Test {
                  roi_align::AlignedMode aligned_mode,
                  bool is_caching_test) const {
         auto& engine = get_test_engine();
+        auto stream = get_test_stream_ptr(get_test_default_config(engine));
 
         auto input = get_memory(engine, input_lt, input_data);
         auto coords = get_memory(engine, coords_lt, coords_data);
@@ -91,7 +92,7 @@ struct roi_align_test : public testing::Test {
                                aligned_mode));
         topology.add(reorder("out", input_info("roi_align"), plain_format, device_data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), stream, is_caching_test);
 
         network->set_input_data("input", input);
         network->set_input_data("coords", coords);
diff --git a/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp
index 282fcf99e5d7ce..1d9e6098e2c67d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp
@@ -185,7 +185,7 @@ struct roi_pooling_gpu_test : public testing::TestWithParam<roi_pooling_test_par
 
         topology.add(reorder("reordered_roi_pooling", input_info("roi_pooling"), plane_format, type_to_data_type<T>::value));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         for (auto& input : inputs) {
             network->set_input_data(input.first, input.second);
diff --git a/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp
index e8a33f30615be1..515a0a2ef3d3a0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp
@@ -54,7 +54,7 @@ struct roll_test : testing::TestWithParam<roll_test_params<T>> {
         topology.add(roll("roll", input_info("reordered_input"), tensor(input_format, p.shift)));
         topology.add(reorder("reordered_roll", input_info("roll"), plane_format, type_to_data_type<T>::value));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
         const auto outputs = network->execute();
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp
index 1ca64dd52552e2..961d10d5190cb0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp
@@ -71,7 +71,7 @@ void test_d2411_axisF(bool is_caching_test) {
         scatter_elements_update("scatter_elements_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), axis)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputData", input1);
     network->set_input_data("InputIndices", input2);
@@ -296,7 +296,7 @@ struct scatter_elements_update_gpu_formats_test
         );
         topology.add(reorder("ScatterEelementsUpdatePlain", input_info("ScatterEelementsUpdate"), plain_format, data_type));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Data", data);
         network->set_input_data("Indices", indices);
diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
index 7bcba7be9c08d7..89d418109ba1a1 100644
--- a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
@@ -146,7 +146,7 @@ struct scatter_nd_update_random_test : testing::TestWithParam<scatter_nd_update_
             reorder("out", input_info("scatter_nd_update"), params.input_format, params.input_type)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("InputData", input1);
         network->set_input_data("InputIndices", input2);
@@ -216,7 +216,7 @@ struct scatter_nd_update_random_test : testing::TestWithParam<scatter_nd_update_
             reorder("out", input_info("scatter_nd_update"), params.input_format, params.input_type)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("InputData", input1);
         network->set_input_data("InputIndices", input2);
@@ -570,7 +570,7 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 3)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -654,7 +654,7 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -718,7 +718,7 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -789,7 +789,7 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -919,7 +919,7 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1015,7 +1015,7 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1093,7 +1093,7 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1191,7 +1191,7 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1259,7 +1259,7 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1325,7 +1325,7 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1390,7 +1390,7 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1445,7 +1445,7 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1520,7 +1520,7 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1575,7 +1575,7 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1624,7 +1624,7 @@ TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1719,7 +1719,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -1858,7 +1858,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2008,7 +2008,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2129,7 +2129,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2232,7 +2232,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2341,7 +2341,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2458,7 +2458,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2592,7 +2592,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2760,7 +2760,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -2931,7 +2931,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3114,7 +3114,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3315,7 +3315,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3475,7 +3475,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3628,7 +3628,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3784,7 +3784,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -3947,7 +3947,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -4121,7 +4121,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
 
     network.set_input_data("InputData", input1);
@@ -4319,7 +4319,7 @@ void test_d222222_i211111(bool is_caching_test) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("InputData", input1);
     network->set_input_data("InputIndices", input2);
@@ -4431,7 +4431,7 @@ TEST(scatter_nd_update_gpu, dynamic) {
         scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2)
     );
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp
index fedc3f54996785..1eb3c62d09ccef 100644
--- a/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp
@@ -95,7 +95,7 @@ void test_d2411_axisB(bool is_caching_test) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("InputDictionary", input1);
         network->set_input_data("InputText", input2);
@@ -176,7 +176,7 @@ TEST(scatter_update_gpu_fp32, d8111_axisB) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
 
         network.set_input_data("InputDictionary", input1);
@@ -270,7 +270,7 @@ TEST(scatter_update_gpu_fp16, d4311_axisB) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -397,7 +397,7 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -510,7 +510,7 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -671,7 +671,7 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -797,7 +797,7 @@ TEST(scatter_update_gpu_fp32, d2214_axisX) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -899,7 +899,7 @@ TEST(scatter_update_gpu_int32, d6211_axisB) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -998,7 +998,7 @@ TEST(scatter_update_gpu_int32, d3151_axisY) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -1082,7 +1082,7 @@ TEST(scatter_update_gpu_fp32, d24111_axisF_bfzyx) {
             );
             topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32));
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
 
             network.set_input_data("InputDictionary", input1);
             network.set_input_data("InputText", input2);
@@ -1188,7 +1188,7 @@ TEST(scatter_update_gpu_int32, d121251_bfwzyx_axisB) {
                 scatter_update("scatter_update", input_info("InputDictionary"), input_info("TextReordered"), input_info("InputUpdates"), axis)
         );
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -1279,7 +1279,7 @@ TEST(scatter_update_gpu_fp32, d21511_bfzyx_axisX) {
             );
             topology.add(reorder("out", input_info("scatter_update"), plain_3d_format, data_types::f32));
 
-            network network(engine, topology);
+            network network(engine, topology, get_test_default_config(engine));
 
 
             network.set_input_data("InputDictionary", input1);
@@ -1385,7 +1385,7 @@ TEST(scatter_update_gpu_fp32, d1252_axisY_bfwzyx) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -1475,7 +1475,7 @@ TEST(scatter_update_gpu_int32, d2115_axisX_bfwzyx) {
         );
         topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32));
 
-        network network(engine, topology);
+        network network(engine, topology, get_test_default_config(engine));
 
         network.set_input_data("InputDictionary", input1);
         network.set_input_data("InputText", input2);
@@ -1569,7 +1569,7 @@ void test_d21214_bfzyx_axisX_bfwzyx(bool is_caching_test) {
             );
             topology.add(reorder("out", input_info("scatter_update"), plain_3d_format, data_types::f16));
 
-            cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+            cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
             network->set_input_data("InputDictionary", input1);
             network->set_input_data("InputText", input2);
@@ -1656,7 +1656,7 @@ TEST(scatter_update_gpu_fp32, dynamic) {
     );
     topology.add(reorder("out", input_info("scatter_update"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp
index fab61b847f7889..ec67d67a268ffd 100644
--- a/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp
@@ -45,7 +45,7 @@ void test_select_basic(bool is_caching_test) {
         0.f,   1.f,  0.f,  1.f,
         1.f,   0.f,  1.f,  0.f });
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("input2", input2);
@@ -103,7 +103,7 @@ TEST(select_gpu_f32, select_basic_negative) {
         -0.f,   -1.f,  -0.f,  -1.f,
         -1.f,   -0.f,  -1.f,  -0.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -180,7 +180,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x2x1x2) {
         0.f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -256,7 +256,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_1x1x1x1) {
         0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -336,7 +336,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x2x2x1) {
         -0.f,  -0.5f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -422,7 +422,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_2x2x1x2) {
         1.f,  0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -504,7 +504,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x2x1_bcast_in2_2x2x1
         1.f,  0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -578,7 +578,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x1x2x2_in1_1x2x2x2_in
         0.f,  1.f,
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -654,7 +654,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x
         -0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -730,7 +730,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_1x1x1x1) {
         1.f,  0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -810,7 +810,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) {
         -1.5f, -0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -896,7 +896,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x1x2) {
         1.f,  0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -972,7 +972,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_1x1x1x1) {
         1.f,  0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -1052,7 +1052,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) {
         -1.5f, -0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -1132,7 +1132,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x
         -0.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -1195,7 +1195,7 @@ TEST(select_gpu_f32, select_basic_comma) {
         -0.f,   -0.1f,  -0.f,  -0.5f,
         -0.7f,   -0.f,  -1.5f,  -0.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1230,7 +1230,7 @@ TEST(select_gpu_f32, select_basic_error_input_sizes) {
     topology.add(input_layout("mask", mask->get_layout()));
     topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2")));
 
-    EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(select_gpu_f32, select_basic_error_mask_sizes) {
@@ -1246,7 +1246,7 @@ TEST(select_gpu_f32, select_basic_error_mask_sizes) {
     topology.add(input_layout("mask", mask->get_layout()));
     topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2")));
 
-    EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(select_gpu_f32, select_basic_error_input_types) {
@@ -1261,7 +1261,7 @@ TEST(select_gpu_f32, select_basic_error_input_types) {
     topology.add(input_layout("input2", input2->get_layout()));
     topology.add(input_layout("mask", mask->get_layout()));
     topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2")));
-    EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine)));
 }
 
 TEST(select_gpu_f32, select_basic_byxf) {
@@ -1296,7 +1296,7 @@ TEST(select_gpu_f32, select_basic_byxf) {
         0.f,   1.f,  0.f,  1.f,
         1.f,   0.f,  1.f,  0.f });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1350,7 +1350,7 @@ TEST(select_gpu_f32, select_basic_mask_f16) {
         0,   1,  0,  1,
         1,   0,  1,  0 });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1404,7 +1404,7 @@ TEST(select_gpu_f32, select_basic_mask_i8) {
         0,   1,  0,  1,
         1,   0,  1,  0 });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1458,7 +1458,7 @@ TEST(select_gpu_f32, select_basic_mask_u8) {
         0,   211,  0,  255,
         199,   0,  160,  0 });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1505,7 +1505,7 @@ TEST(select_gpu_f32, select_basic_1x1x2x2) {
         0.f,    0.f,    1.f,    1.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1554,7 +1554,7 @@ TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) {
         1.f,   1.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1604,7 +1604,7 @@ TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) {
         1.f,   1.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1656,7 +1656,7 @@ void test_f16_select_basic_1x1x2x2(bool is_caching_test) {
         1,   1
     });
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("input2", input2);
@@ -1710,7 +1710,7 @@ TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) {
         1.5f,   0.4f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1760,7 +1760,7 @@ TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) {
         1,   1
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1810,7 +1810,7 @@ TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) {
         128,   255
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1862,7 +1862,7 @@ void test_i8_select_basic_1x1x2x2(bool is_caching_test) {
         3,   5
     });
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("input2", input2);
@@ -1916,7 +1916,7 @@ TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) {
         1.5f,  0.4f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -1966,7 +1966,7 @@ TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) {
         3,   5
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2016,7 +2016,7 @@ TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) {
         128,   255
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2068,7 +2068,7 @@ void test_u8_select_basic_1x1x2x2(bool is_caching_test) {
         128,   255
     });
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("input", input);
     network->set_input_data("input2", input2);
@@ -2122,7 +2122,7 @@ TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) {
         1.5f,  0.4f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2172,7 +2172,7 @@ TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) {
         1,   1
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2222,7 +2222,7 @@ TEST(select_gpu_u8, select_basic_mask_i8_1x1x2x2) {
         1,   1
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2269,7 +2269,7 @@ TEST(select_gpu_fp32, select_numpy_broadcast_mask_u8_1x1x3) {
         1,   0,   1
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
@@ -2332,7 +2332,7 @@ TEST(select_gpu_f32, select_different_formats) {
         1.f, 1.f
     });
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
@@ -2419,7 +2419,7 @@ TEST(select_gpu_f32, dynamic) {
     topology.add(input_layout("mask", mask_layout));
     topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2")));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp
index 2e6c9f55bc7561..a1adf941f132f0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp
@@ -46,7 +46,7 @@ void test_basic(bool is_caching_test) {
         reorder("reorder", input_info("Input"), input_data->get_layout())
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input", input_data);
     network->set_output_memory("reorder", output_mem);
@@ -94,7 +94,7 @@ TEST(set_output_memory_gpu, basic_const) {
             reorder("reorder_const", input_info("Const"), input_data->get_layout())
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input_data);
     network.set_output_memory("reorder_dyn", output_mem);
@@ -143,7 +143,7 @@ TEST(set_output_memory_gpu, basic_mutable) {
             reorder("reorder_mutable", input_info("Mutable"), input_data->get_layout())
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input", input_data);
     network.set_output_memory("reorder_dyn", output_mem);
@@ -196,7 +196,7 @@ TEST(set_output_memory_gpu, top_k1) {
     };
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_output_memory("reorder", output_mem);
@@ -242,7 +242,7 @@ TEST(set_output_memory_gpu, top_k2) {
     };
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_output_memory("reorder", second_output_mem);
@@ -322,7 +322,7 @@ TEST(set_output_memory_gpu, basic_opt) {
     primitive_id outputID = "reorder3";
     topology.add(reorder(outputID, input_info("concat"), ol));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -374,7 +374,7 @@ TEST(set_output_memory_gpu, mutable_output_data) {
             /*b1f3*/4.f,  0.5f,  8.f,   8.2f
     };
     set_values(input, input_vec);
-    auto prog = program::build_program(engine, topology, ExecutionConfig{});
+    auto prog = program::build_program(engine, topology, get_test_default_config(engine));
     network network(prog, 0);
     network.set_input_data("Add_1396", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
index 2cbbb93a890e46..c041372e0d7ab7 100644
--- a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
@@ -25,7 +25,7 @@ TEST(shape_of_gpu, bfyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -50,7 +50,7 @@ TEST(shape_of_gpu, bfyx_i64) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i64));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -75,7 +75,7 @@ TEST(shape_of_gpu, yxfb) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -100,7 +100,7 @@ TEST(shape_of_gpu, bfzyx) {
     topology.add(input_layout("input", input->get_layout()));
     topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32));
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -129,7 +129,7 @@ TEST(shape_of_gpu, dynamic) {
     topology.add(input_layout("input", in_layout));
     topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp
index f46864d0a8cd1a..7614bf9ae92bae 100644
--- a/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp
@@ -35,7 +35,7 @@ void test_d1_15_2_2_ax1_g5(bool is_caching_test) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->set_input_data("Input0", input0);
 
@@ -81,7 +81,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -123,7 +123,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -165,7 +165,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -204,7 +204,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -242,7 +242,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -280,7 +280,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -318,7 +318,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
@@ -354,7 +354,7 @@ TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) {
             shuffle_channels("shuffle_channels", input_info("Input0"), group, axis)
     );
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("Input0", input0);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/slice.cpp b/src/plugins/intel_gpu/tests/test_cases/slice.cpp
index 9e06a840b5e645..3dccacfc01637b 100644
--- a/src/plugins/intel_gpu/tests/test_cases/slice.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/slice.cpp
@@ -45,7 +45,7 @@ class SliceTest : public ::testing::Test {
         }
         topology.add(slice("slice", inputs, tensor{output_shape_}));
 
-        cldnn::network::ptr network = get_network(engine_, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine_, topology, get_test_default_config(engine_), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp
index 35658f33ba37b1..e45969be2f041d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp
@@ -73,7 +73,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(softmax("softmax", input_info("input"), 3));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -108,7 +108,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(softmax("softmax", input_info("input"), 3));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -165,7 +165,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(softmax("softmax", input_info("input"), 3));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -238,7 +238,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) {
         0.993307149f    //b=1, f=2, x=1
     };
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -318,7 +318,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_f) {
         0.977054322f //b=1, y=1, x=1
     };
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -403,7 +403,7 @@ TEST(softmax_gpu_bfzyx_f32, normalize_z) {
         0.880797f, 0.952574f,
     };
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -486,7 +486,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_b) {
         0.977054322f //f=1, y=1, x=1
     };
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -946,9 +946,9 @@ struct softmax_gpu_formats_test
 
         set_values(input, params.input);
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::optimize_data(false));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         const auto outputs = network->execute();
@@ -1048,7 +1048,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_f_dynamic) {
         0.977054322f //b=1, y=1, x=1
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -1153,7 +1153,7 @@ TEST(softmax_gpu_bfyx_f32, bf_opt_normalize_f_dynamic) {
         0.719294981f  //b=1, y=0, x=0
     };
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
diff --git a/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp
index 0b6580c2d0bf20..ddec5b3c84a4c3 100644
--- a/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp
@@ -40,7 +40,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,0,0}, 0),
                                                                         tensor(format::bfyx, {8,1,1,1}, 1)));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -84,7 +84,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,2,0}, 0),
                                                                         tensor(format::bfyx, {0,0,0,0}, 0),
                                                                         tensor(format::bfyx, {4,1,3,2}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -130,7 +130,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,1,0}, 0),
                                                                         tensor(format::bfyx, {0,1,0,0}, 0),
                                                                         tensor(format::bfyx, {16,1,2,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -176,7 +176,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                         tensor(format::bfzyx, {0,0,0,1,0}, 0),
                                                                         tensor(format::bfzyx, {0,0,0,0,0}, 0),
                                                                         tensor(format::bfzyx, {8,1,1,2,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -224,7 +224,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                         tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
                                                                         tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
                                                                         tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -277,7 +277,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                             tensor(format::bfyx, {0,0,0,1}, 0),
                                                                             tensor(format::bfyx, {8,8,1,1}, 1)));
         topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f16));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -330,7 +330,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test {
                                                                             tensor(format::bfyx, {0,2,0,0}, 0),
                                                                             tensor(format::bfyx, {4,5,1,2}, 1)));
         topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f16));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -406,7 +406,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,0,0}, 0),
                                                                         tensor(format::bfyx, {0,0,0,0}, 0),
                                                                         tensor(format::bfyx, {8,1,1,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -450,7 +450,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,2,0}, 0),
                                                                         tensor(format::bfyx, {0,0,0,0}, 0),
                                                                         tensor(format::bfyx, {4,1,3,2}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -496,7 +496,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                         tensor(format::bfyx, {0,0,1,0}, 0),
                                                                         tensor(format::bfyx, {0,1,0,0}, 0),
                                                                         tensor(format::bfyx, {16,1,2,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -542,7 +542,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                         tensor(format::bfzyx, {0,0,0,1,0}, 0),
                                                                         tensor(format::bfzyx, {0,0,0,0,0}, 0),
                                                                         tensor(format::bfzyx, {8,1,1,2,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -588,7 +588,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                         tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
                                                                         tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
                                                                         tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -645,7 +645,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                             tensor(format::bfyx, {0,0,0,0}, 0),
                                                                             tensor(format::bfyx, {8,4,1,2}, 1)));
         topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f32));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
@@ -699,7 +699,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test {
                                                                             tensor(format::bfyx, {0,0,0,0}, 0),
                                                                             tensor(format::bfyx, {6,2,2,2}, 1)));
         topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f32));
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp
index c8a8aed7b0e4f7..ad4c415829b324 100644
--- a/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp
@@ -36,7 +36,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -78,7 +78,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -126,7 +126,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
         space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -188,7 +188,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -236,7 +236,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -278,7 +278,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -326,7 +326,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -388,7 +388,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -438,7 +438,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -477,7 +477,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -525,7 +525,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
             space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -579,7 +579,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -626,7 +626,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -665,7 +665,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -713,7 +713,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -767,7 +767,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
                 space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size)
         );
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -822,7 +822,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
         topology.add(space_to_depth("space_to_depth", input_info("reorder"), space_to_depth::depth_first, block_size));
         topology.add(reorder("reorder_out", input_info("space_to_depth"), format::bfyx, data_types::f32));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
@@ -877,7 +877,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test {
         topology.add(space_to_depth("space_to_depth", input_info("reorder"), space_to_depth::depth_first, block_size));
         topology.add(reorder("reorder_out", input_info("space_to_depth"), format::bfyx, data_types::f32));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("Input0", input1);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp
index b91d8be7fc27fc..1963622528e88c 100644
--- a/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp
@@ -38,7 +38,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -93,7 +93,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -150,7 +150,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2, padding({ 0, 0, 1, 1 }, 0.0f)));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -205,7 +205,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3, padding({ 0, 0, 2, 0 }, { 0, 0, 0, 0 })));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -265,7 +265,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in3", input3->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 3));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
         net->set_input_data("in3", input3);
@@ -353,7 +353,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in3", input3->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 0));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
         net->set_input_data("in3", input3);
@@ -413,7 +413,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 4));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -477,7 +477,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -541,7 +541,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -616,7 +616,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in2", input2->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 0));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
 
@@ -746,7 +746,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test {
         tpl.add(input_layout("in3", input3->get_layout()));
         tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 0));
 
-        cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         net->set_input_data("in1", input1);
         net->set_input_data("in2", input2);
         net->set_input_data("in3", input3);
diff --git a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp
index feffb2181af190..186f4e05a6a50a 100644
--- a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp
@@ -74,7 +74,7 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec
     std::vector<T> input_vec = generate_random_input<T>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
     network->set_input_data("input", input);
 
     auto outputs = network->execute();
@@ -225,7 +225,7 @@ TEST(split_gpu_f32, basic_split_concat_optimization) {
     topology.add(concatenation("concat", inputs, 1));
     topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::f32));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -265,7 +265,7 @@ TEST(split_gpu_i64, basic_split_concat_optimization) {
     topology.add(concatenation("concat", inputs, 1));
     topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::i64));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::optimize_data(true));
     network network(engine, topology, config);
 
@@ -540,7 +540,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_feature_bfyx) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -586,7 +586,7 @@ TEST(split_gpu_i64, basic_in2x3x2x2_split_feature_bfyx) {
     std::vector<int64_t> input_vec = generate_random_input<int64_t>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
 
@@ -649,7 +649,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_scale_feature_bfyx) {
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
-    network network(engine, topology);
+    network network(engine, topology, get_test_default_config(engine));
 
     network.set_input_data("input", input);
     network.set_input_data("scale_input0", scale_input0);
diff --git a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp
index 77b52aad26478d..ba179242f2aa1d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp
@@ -32,7 +32,7 @@ class gpu_streams: public ::testing::Test {
                 input_layout("input", input->get_layout()),
                 activation("relu", input_info("input"), activation_func::relu_negative_slope, activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
         auto outputs = network->execute();
         ASSERT_EQ(outputs.size(), size_t(1));
@@ -82,7 +82,7 @@ class gpu_streams: public ::testing::Test {
             membuf mem_buf0;
             membuf mem_buf1;
             {
-                auto prog = program::build_program(engine, topology, ExecutionConfig{});
+                auto prog = program::build_program(engine, topology, get_test_default_config(engine));
                 {
                     network0 = std::make_shared<cldnn::network>(prog, 0);
                     std::ostream out_mem0(&mem_buf0);
@@ -109,7 +109,7 @@ class gpu_streams: public ::testing::Test {
                 }
             }
         } else {
-            auto prog = program::build_program(engine, topology, ExecutionConfig{});
+            auto prog = program::build_program(engine, topology, get_test_default_config(engine));
             network0 = std::make_shared<cldnn::network>(prog, 0);
             network1 = std::make_shared<cldnn::network>(prog, 1);
         }
@@ -185,7 +185,7 @@ class gpu_streams: public ::testing::Test {
             membuf mem_buf0;
             membuf mem_buf1;
             {
-                auto prog = program::build_program(engine, topology, ExecutionConfig{});
+                auto prog = program::build_program(engine, topology, get_test_default_config(engine));
                 {
                     network0 = std::make_shared<cldnn::network>(prog, 0);
                     std::ostream out_mem0(&mem_buf0);
@@ -212,7 +212,7 @@ class gpu_streams: public ::testing::Test {
                 }
             }
         } else {
-            auto prog = program::build_program(engine, topology, ExecutionConfig{});
+            auto prog = program::build_program(engine, topology, get_test_default_config(engine));
             network0 = std::make_shared<cldnn::network>(prog, 0);
             network1 = std::make_shared<cldnn::network>(prog, 1);
         }
diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
index 974af981e28ae4..62aafb16ae9b7e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
@@ -36,7 +36,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -81,7 +81,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -128,7 +128,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -176,7 +176,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -240,7 +240,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -292,7 +292,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -335,7 +335,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -376,7 +376,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -417,7 +417,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -458,7 +458,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -503,7 +503,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -544,7 +544,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -590,7 +590,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {}));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         network network(engine, topology, config);
 
@@ -640,7 +640,7 @@ class strided_slice_gpu: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {}));
 
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
         network network(engine, topology, config);
 
@@ -759,7 +759,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -816,7 +816,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -875,7 +875,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -935,7 +935,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1011,7 +1011,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(input_layout("input4", strides->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         network->set_input_data("input2", begin);
@@ -1078,7 +1078,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1133,7 +1133,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1187,7 +1187,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1240,7 +1240,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1293,7 +1293,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1350,7 +1350,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1401,7 +1401,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin, end, strides, {}, {}, {}, {}, {}, {1, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1452,7 +1452,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin, end, strides, {}, {}, {}, {}, {}, {1, 2, 2, 2}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1505,7 +1505,7 @@ class strided_slice_gpu_constants: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1563,7 +1563,7 @@ class strided_slice_gpu_four_inputs: public ::testing::Test {
         topology.add(input_layout("input4", strides->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         network->set_input_data("input2", begin);
@@ -1621,7 +1621,7 @@ class strided_slice_gpu_four_inputs: public ::testing::Test {
         topology.add(input_layout("input4", strides->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
         network->set_input_data("input2", begin);
@@ -1668,7 +1668,7 @@ class strided_slice_gpu_i8: public ::testing::Test {
         topology.add(input_layout("input", input->get_layout()));
         topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
@@ -1723,7 +1723,7 @@ class strided_slice_gpu_f32_i32: public ::testing::Test {
         topology.add(data("input4", strides));
         topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1}));
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data("input", input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp
index 1f32f6d3c253b9..bca2df35496d04 100644
--- a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp
@@ -14,8 +14,14 @@ using namespace tests;
 class test_device_mem_usage_estimation: public ::testing::Test {
 public:
     void test_basic(bool is_caching_test) {
-        ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+        ExecutionConfig cfg = get_test_default_config(get_test_engine());
+        cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
+
         std::shared_ptr<cldnn::engine> engine1 = create_test_engine();
+        if (engine1->get_device_info().supports_immad) {
+            // Enable this test for out_of_order queue-type if Onednn supports out_of_order
+            return;
+        }
 
         auto input1 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} });
         auto input2 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} });
diff --git a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp
index 826c7f0a4ee025..e4d70dd8f81f9e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp
@@ -72,7 +72,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 0, 2);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -104,7 +104,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 1, 2);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -140,7 +140,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 2, 2);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -172,7 +172,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 3, 2);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -200,7 +200,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 3, 4);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -237,7 +237,7 @@ class tile_gpu: public ::testing::Test {
         set_values(input, input_vec);
         tile_ref<float>(input, output_ref, 2, 2);
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
         network->set_input_data("input", input);
 
         auto outputs = network->execute();
@@ -292,7 +292,7 @@ TEST_F(tile_gpu, dynamic) {
     topology.add(input_layout("input", input_dyn_layout));
     topology.add(tile("tile", input_info("input"), std::vector<int64_t>{ 1, 2, 1, 1 }));
 
-    ExecutionConfig config;
+    ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
     network network(engine, topology, config);
     network.set_input_data("input", input);
@@ -672,7 +672,7 @@ struct tile_test
             result_id = reorder_result_id;
         }
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->set_input_data(input_data_id, input);
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp
index 2efdc4efaf646a..064f86c911e26f 100644
--- a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp
@@ -26,7 +26,7 @@ class trim_to_outputs: public ::testing::Test {
     */
     void test_one_node_to_eliminate_case1(bool is_caching_test) {
         auto& engine = get_test_engine();
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv1" }));
         config.set_property(ov::intel_gpu::optimize_data(false));             // to avoid adding reorders
 
@@ -75,7 +75,7 @@ class trim_to_outputs: public ::testing::Test {
     */
     void test_one_node_to_eliminate_case2(bool is_caching_test) {
         auto& engine = get_test_engine();
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv1" }));
         config.set_property(ov::intel_gpu::optimize_data(false));             // to avoid adding reorders
 
@@ -132,7 +132,7 @@ class trim_to_outputs: public ::testing::Test {
     */
     void test_two_nodes_to_eliminate_case1(bool is_caching_test) {
         auto& engine = get_test_engine();
-        ExecutionConfig config;
+        ExecutionConfig config = get_test_default_config(engine);
         config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "conv4" }));
         config.set_property(ov::intel_gpu::optimize_data(false));             // to avoid adding reorders
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/variable.cpp b/src/plugins/intel_gpu/tests/test_cases/variable.cpp
index 1e27aa9045e729..bcc25a2c5ebd1a 100644
--- a/src/plugins/intel_gpu/tests/test_cases/variable.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/variable.cpp
@@ -35,7 +35,7 @@ struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {
         topology.add(eltwise{"sum", { input_info("input"), input_info("read_value") }, eltwise_mode::sum, {}, variable_layout.data_type});
         topology.add(assign{"assign", { input_info("sum") }, "v0", variable_layout});
 
-        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
         network->assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
         network->set_input_data("input", input_data);
@@ -123,7 +123,7 @@ void test_exception_on_wrong_layout(bool is_caching_test) {
     topology.add(input_layout("wrong_input", wrong_input_data->get_layout()));
     topology.add(assign{"assign", { input_info("wrong_input") }, "v0", wrong_layout});
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
     network->set_input_data("input", input_data);
@@ -179,7 +179,7 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) {
     topology.add(data("dummy2", dummy2));
     topology.add(read_value{"read_result", { input_info("dummy2") }, "v_result", variable_layout});
 
-    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
 
     network->assign_variables_memories({
         { "v1", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp
index e71a2b32bc53e3..9751aafbb3f237 100644
--- a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp
+++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp
@@ -286,10 +286,30 @@ std::vector<std::shared_ptr<test_params>> generic_test::generate_generic_test_pa
     return all_generic_params;
 }
 
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine) {
+    return get_test_default_config(engine, {});
+}
+
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine, ov::AnyMap::value_type values) {
+    return get_test_default_config(engine, {values});
+}
+
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine,
+                                                std::initializer_list<ov::AnyMap::value_type> values) {
+    ExecutionConfig config(values);
+
+    // Onednn engine currently does NOT support out_of_order
+    if (engine.get_device_info().supports_immad) {
+        config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
+    }
+
+    return config;
+}
+
 std::shared_ptr<cldnn::engine> create_test_engine() {
     auto ret = cldnn::engine::create(engine_types::ocl, runtime_types::ocl);
 #ifdef ENABLE_ONEDNN_FOR_GPU
-    if(ret->get_device_info().supports_immad)
+    if (ret->get_device_info().supports_immad)
         ret->create_onednn_engine({});
 #endif
     return ret;
@@ -304,12 +324,15 @@ cldnn::engine& get_test_engine() {
 }
 
 cldnn::stream_ptr get_test_stream_ptr() {
+    // Create OOO queue for test purposes. If in-order queue is needed in a test, then it should be created there explicitly
+    auto cfg = get_test_default_config(get_test_engine());
+
+    return get_test_stream_ptr(cfg);
+}
+
+cldnn::stream_ptr get_test_stream_ptr(cldnn::ExecutionConfig cfg) {
     static std::shared_ptr<cldnn::stream> test_stream = nullptr;
-    if (!test_stream) {
-        // Create OOO queue for test purposes. If in-order queue is needed in a test, then it should be created there explicitly
-        ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order));
-        test_stream = get_test_engine().create_stream(cfg);
-    }
+    test_stream = get_test_engine().create_stream(cfg);
     return test_stream;
 }
 
diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/test_utils/test_utils.h
index e21f259bc2f309..7fa5c5b0b11114 100644
--- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h
+++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h
@@ -9,6 +9,7 @@
 #include <intel_gpu/runtime/memory.hpp>
 #include <intel_gpu/runtime/tensor.hpp>
 #include <intel_gpu/runtime/engine.hpp>
+#include <intel_gpu/runtime/execution_config.hpp>
 #include <intel_gpu/runtime/stream.hpp>
 #include <intel_gpu/graph/program.hpp>
 #include <intel_gpu/graph/network.hpp>
@@ -55,9 +56,17 @@ namespace tests {
 
 std::shared_ptr<cldnn::engine> create_test_engine();
 cldnn::engine& get_test_engine();
+cldnn::stream_ptr get_test_stream_ptr(cldnn::ExecutionConfig cfg);
 cldnn::stream_ptr get_test_stream_ptr();
 cldnn::stream& get_test_stream();
 
+// Set default configuration for test-cases
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&);
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&, ov::AnyMap::value_type values);
+cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&,
+                                                std::initializer_list<ov::AnyMap::value_type> values);
+
+
 template<typename T>
 bool has_node_with_type(cldnn::program& prog) {
     for (auto node : prog.get_processing_order()) {

From 05e54e9f3dd43573e22e5c175ed7894841f30496 Mon Sep 17 00:00:00 2001
From: hyunback kim <hyunback.kim@intel.com>
Date: Tue, 21 Mar 2023 18:56:41 +0900
Subject: [PATCH 011/296] [GPU] Update the latest onedNN3.1 (#16381)

- Fix group conv regression issue

Signed-off-by: hyunback <hyunback.kim@intel.com>
---
 src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 1 +
 src/plugins/intel_gpu/thirdparty/onednn_gpu     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
index 60883cbac6b5ad..f54f0a1d80853d 100644
--- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
@@ -91,6 +91,7 @@ if(ENABLE_ONEDNN_FOR_GPU)
                 "-DDNNL_BUILD_EXAMPLES=OFF"
                 "-DDNNL_BLAS_VENDOR=NONE"
                 "-DDNNL_LIBRARY_TYPE=STATIC"
+                "-DONEDNN_BUILD_GRAPH=OFF"
                 "-DOpenCL_LIBRARY=${OpenCL_LIBRARY}"
                 "-DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIR}"
         )
diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu
index ad34c124895690..b52e9cd54df5af 160000
--- a/src/plugins/intel_gpu/thirdparty/onednn_gpu
+++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu
@@ -1 +1 @@
-Subproject commit ad34c124895690bafd2b110577639824899ecbca
+Subproject commit b52e9cd54df5af92d1d586d435cdd514dd7617fe

From 8926282ac57491c3e66ef693b938697d2b8d5e52 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Tue, 21 Mar 2023 10:57:48 +0100
Subject: [PATCH 012/296] DOCS shift to rst - `Multi device execution` article
 (#16400)

---
 docs/OV_Runtime_UG/multi_device.md | 104 +++++++++++++----------------
 1 file changed, 46 insertions(+), 58 deletions(-)

diff --git a/docs/OV_Runtime_UG/multi_device.md b/docs/OV_Runtime_UG/multi_device.md
index 01d03ef100bd3f..e1b6492d59e5a2 100644
--- a/docs/OV_Runtime_UG/multi_device.md
+++ b/docs/OV_Runtime_UG/multi_device.md
@@ -7,13 +7,13 @@ To run inference on multiple devices, you can choose either of the following way
 - Use the :ref:`CUMULATIVE_THROUGHPUT option <cumulative throughput>` of the Automatic Device Selection mode. This way, you can use all available devices in the system without the need to specify them.
 - Use the Multi-Device execution mode. This page will explain how it works and how to use it.
 
-@endsphinxdirective
-
-## How MULTI Works
+How MULTI Works
+####################
 
 The Multi-Device execution mode, or MULTI for short, acts as a "virtual" or a "proxy" device, which does not bind to a specific type of hardware. Instead, it assigns available computing devices to particular inference requests, which are then executed in parallel.
 
 The potential gains from using Multi-Device execution are:
+
 * improved throughput from using multiple devices at once,
 * increase in performance stability due to multiple devices sharing inference workload.
 
@@ -22,31 +22,29 @@ Importantly, the Multi-Device mode does not change the application logic, so it
 Note that the performance increase in this mode comes from utilizing multiple devices at once. This means that you need to provide the devices with enough inference requests to keep them busy, otherwise you will not benefit much from using MULTI.
 
 
-## Using the Multi-Device Mode
+Using the Multi-Device Mode
+###########################
 
 Following the OpenVINO™ naming convention, the Multi-Device mode is assigned the label of “MULTI.” The only configuration option available for it is a prioritized list of devices to use:
 
-@sphinxdirective
 
-+---------------------------+---------------------------------+------------------------------------------------------------+
-| Property                  | Property values                 | Description                                                |
-+===========================+=================================+============================================================+
-| <device list>             | | MULTI: <device names>         | | Specifies the devices available for selection.           |
-|                           | | comma-separated, no spaces    | | The device sequence will be taken as priority            |
-+---------------------------+---------------------------------+ | from high to low.                                        |
-| ov::device::priorities    | | device names                  | | Priorities can be set directly as a string.              |
-|                           | | comma-separated, no spaces    |                                                            |
-+---------------------------+---------------------------------+------------------------------------------------------------+
++----------------------------+---------------------------------+------------------------------------------------------------+
+| Property                   | Property values                 | Description                                                |
++============================+=================================+============================================================+
+| <device list>              | | MULTI: <device names>         | | Specifies the devices available for selection.           |
+|                            | | comma-separated, no spaces    | | The device sequence will be taken as priority            |
++----------------------------+---------------------------------+ | from high to low.                                        |
+| ``ov::device::priorities`` | | device names                  | | Priorities can be set directly as a string.              |
+|                            | | comma-separated, no spaces    |                                                            |
++----------------------------+---------------------------------+------------------------------------------------------------+
 
-@endsphinxdirective
 
 Specifying the device list explicitly is required by MULTI, as it defines the devices available for inference and sets their priorities.  Importantly, the list may also specify the number of requests for MULTI to keep for each device, as described below.
 
-Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in [Working with devices](supported_plugins/Device_Plugins.md).
+Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in :doc:`Working with devices <openvino_docs_OV_UG_Working_with_devices>`.
 
 The following commands are accepted by the API:
 
-@sphinxdirective
 
 .. tab:: C++
 
@@ -60,11 +58,9 @@ The following commands are accepted by the API:
        :language: python
        :fragment: [MULTI_0]
 
-@endsphinxdirective
 
 Notice that MULTI allows you to **change device priorities on the fly**. You can alter the order, exclude a device, and bring an excluded device back. Still, it does not allow adding new devices.
 
-@sphinxdirective
 
 .. tab:: C++
 
@@ -78,19 +74,17 @@ Notice that MULTI allows you to **change device priorities on the fly**. You can
        :language: python
        :fragment: [MULTI_1]
 
-@endsphinxdirective
-
 
+One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: ``"MULTI:CPU(2),GPU(2)"``. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in `Configuring Individual Devices and Creating MULTI On Top <#configuring-individual-devices-and-creating-the-multi-device-on-top>`__.
 
-One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: `"MULTI:CPU(2),GPU(2)"`. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in [Configuring Individual Devices and Creating MULTI On Top](#config-multi-on-top).
+To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration <openvino_docs_OV_UG_query_api>`.
 
-To check what devices are present in the system, you can use the Device API. For information on how to do it, check [Query device properties and configuration](supported_plugins/config_properties.md).
 
+Configuring Individual Devices and Creating the Multi-Device On Top
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### <a name="config-multi-on-top"></a> Configuring Individual Devices and Creating the Multi-Device On Top
 As mentioned previously, executing inference with MULTI may be set up by configuring individual devices before creating the "MULTI" device on top. It may be considered for performance reasons.
 
-@sphinxdirective
 
 .. tab:: C++
 
@@ -104,17 +98,15 @@ As mentioned previously, executing inference with MULTI may be set up by configu
        :language: python
        :fragment: [MULTI_4]
 
-@endsphinxdirective
 
 Alternatively, you can combine all the individual device settings into a single config file and load it for MULTI to parse. See the code example in the next section.
 
+Querying the Optimal Number of Inference Requests
++++++++++++++++++++++++++++++++++++++++++++++++++
 
-
-### Querying the Optimal Number of Inference Requests
 When using MULTI, you don't need to sum over included devices yourself, you can query the optimal number of requests directly,
-using the [configure devices](supported_plugins/config_properties.md) property:
+using the :doc:`configure devices <openvino_docs_OV_UG_query_api>` property:
 
-@sphinxdirective
 
 .. tab:: C++
 
@@ -122,56 +114,52 @@ using the [configure devices](supported_plugins/config_properties.md) property:
        :language: cpp
        :fragment: [part5]
 
-@endsphinxdirective
-
 
-
-## Using the Multi-Device with OpenVINO Samples and Benchmarking Performance
+Using the Multi-Device with OpenVINO Samples and Benchmarking Performance
+#########################################################################
 
 To see how the Multi-Device execution is used in practice and test its performance, take a look at OpenVINO's Benchmark Application which presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads.
 Here is an example command to evaluate performance of CPU + GPU:
 
-```sh
-./benchmark_app –d MULTI:CPU,GPU –m <model> -i <input> -niter 1000
-```
+.. code-block:: sh
+
+   ./benchmark_app –d MULTI:CPU,GPU –m <model> -i <input> -niter 1000
+
+
+For more information, refer to the :doc:`C++ <openvino_inference_engine_samples_benchmark_app_README>` or :doc:`Python <openvino_inference_engine_tools_benchmark_tool_README>` version instructions.
 
-For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions.
 
-@sphinxdirective
 .. note::
 
    You can keep using the FP16 IR without converting it to FP32, even if some of the listed devices do not support it. The conversion will be done automatically for you.
 
-   No demos are yet fully optimized for MULTI, by means of supporting the ov::optimal_number_of_infer_requests property, using the GPU streams/throttling, and so on.
-@endsphinxdirective
+   No demos are yet fully optimized for MULTI, by means of supporting the ``ov::optimal_number_of_infer_requests`` property, using the GPU streams/throttling, and so on.
+
 
+Performance Considerations for the Multi-Device Execution
+#########################################################
 
-## Performance Considerations for the Multi-Device Execution
 For best performance when using the MULTI execution mode you should consider a few recommendations:
-- MULTI usually performs best when the fastest device is specified first in the device candidate list.
-This is particularly important when the request-level parallelism is not sufficient
-(e.g. the number of requests is not enough to saturate all devices).
-- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests
-directly from the instance of the `ov:compiled_model`. Refer to the code of the previously mentioned `benchmark_app` for more details.
-- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the `benchmark_app` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower
-communication of inference completion from the device to the host.
-- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests,
-and device-specific 'worker' requests that are being actually scheduled behind the scene.
-To facilitate the copy savings, it is recommended to run the requests in the order in which they were created.
+
+- MULTI usually performs best when the fastest device is specified first in the device candidate list. This is particularly important when the request-level parallelism is not sufficient (e.g. the number of requests is not enough to saturate all devices).
+- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests directly from the instance of the ``ov:compiled_model``. Refer to the code of the previously mentioned ``benchmark_app`` for more details.
+- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the ``benchmark_app`` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower communication of inference completion from the device to the host.
+- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, and device-specific 'worker' requests that are being actually scheduled behind the scene. To facilitate the copy savings, it is recommended to run the requests in the order in which they were created.
 - While performance of accelerators combines well with MULTI, the CPU+GPU execution may introduce certain performance issues. It is due to the devices sharing some resources, like power or bandwidth. Enabling the GPU throttling hint, which saves a CPU thread for CPU inference, is an example of a recommended solution addressing this issue.
 
 
+Additional Resources
+####################
 
-## Additional Resources
+- :doc:`Supported Devices <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`
+- :doc:`Automatic Device Selection <openvino_docs_OV_UG_supported_plugins_AUTO>`
 
-- [Supported Devices](supported_plugins/Supported_Devices.md)
-- [Automatic Device Selection](./auto_device_selection.md)
 
-@sphinxdirective
 .. raw:: html
 
     <iframe allowfullscreen mozallowfullscreen msallowfullscreen oallowfullscreen webkitallowfullscreen width="560" height="315" src="https://www.youtube.com/embed/xbORYFEmrqU" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
-@endsphinxdirective
 
-> **NOTE**: This video is currently available only for C++, but many of the same concepts apply to Python.
+.. note:: This video is currently available only for C++, but many of the same concepts apply to Python.
+
+@endsphinxdirective

From 24ff43aa5b7b60cbd1a809965ef8f582b2016579 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 21 Mar 2023 14:16:07 +0400
Subject: [PATCH 013/296] Fixed comparison of iterators (#16428)

---
 src/core/tests/any.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp
index 6b009cd15f3a6a..f66ae9720f0511 100644
--- a/src/core/tests/any.cpp
+++ b/src/core/tests/any.cpp
@@ -181,8 +181,8 @@ TEST_F(AnyTests, AnyAsMapOfMapOfAnys) {
 
     ASSERT_NE(testMap.find("refMap1"), testMap.end());
     auto testMap1 = testMap.at("refMap1").as<std::map<std::string, Any>>();
-    ASSERT_NE(testMap1.find("testParamInt"), testMap.end());
-    ASSERT_NE(testMap1.find("testParamString"), testMap.end());
+    ASSERT_NE(testMap1.find("testParamInt"), testMap1.end());
+    ASSERT_NE(testMap1.find("testParamString"), testMap1.end());
 
     int testInt1 = testMap1["testParamInt"].as<int>();
     std::string testString1 = testMap1["testParamString"].as<std::string>();
@@ -192,8 +192,8 @@ TEST_F(AnyTests, AnyAsMapOfMapOfAnys) {
 
     ASSERT_NE(testMap.find("refMap2"), testMap.end());
     auto testMap2 = testMap.at("refMap2").as<std::map<std::string, Any>>();
-    ASSERT_NE(testMap2.find("testParamInt"), testMap.end());
-    ASSERT_NE(testMap2.find("testParamString"), testMap.end());
+    ASSERT_NE(testMap2.find("testParamInt"), testMap2.end());
+    ASSERT_NE(testMap2.find("testParamString"), testMap2.end());
 
     int testInt2 = testMap2["testParamInt"].as<int>();
     std::string testString2 = testMap2["testParamString"].as<std::string>();

From d402b6ed3e1bf44f7a99695b47bd827016a5d954 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Tue, 21 Mar 2023 10:53:01 +0000
Subject: [PATCH 014/296] [POT] Return Mul to ignored ops for transformers
 (except CPU_SPR) (#16407)

---
 .../tools/pot/algorithms/quantization/fake_quantize.py      | 2 +-
 .../pot/openvino/tools/pot/algorithms/quantization/utils.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py
index 2211f2531e8ae1..f931c1e906228c 100644
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py
@@ -178,7 +178,7 @@ def insert_fake_quantize_nodes(config, model, qscheme=None):
         ignored_params.update(deepcopy(config['ignored']))
 
     if config['model_type']:
-        ignored_params['operations'] += get_ignored_operations(config['model_type'])
+        ignored_params['operations'] += get_ignored_operations(config['model_type'], config['target_device'])
 
     if qscheme:
         for key in qscheme:
diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
index bdfc908c050cbb..9ad333a72bfcf5 100644
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
@@ -321,9 +321,11 @@ def get_input_shape_for_bias(activations_statistics, input_node_name):
     return input_shape
 
 
-def get_ignored_operations(model):
+def get_ignored_operations(model_type, target_device):
     operation = {"transformer": [{"type": "Add"}, {"type": "Power"},
                                  {"type": "Squeeze"},
                                  {"type": "Subtract"}, {"type": "ReduceMean"},
                                  {"type": "SquaredDifference"}, {"type": "MVN"}]}
-    return operation[model]
+    if target_device != 'CPU_SPR':
+        operation['transformer'].append({"type": "Multiply"})
+    return operation[model_type]

From 0893efe0732f77ae4aab03437b16fc305935d4bd Mon Sep 17 00:00:00 2001
From: Tomasz Jankowski <tomasz1.jankowski@intel.com>
Date: Tue, 21 Mar 2023 13:58:34 +0100
Subject: [PATCH 015/296] [Core] Assure TensorVector comparison uniqueness
 (#16232)

* Assure TensorVector comparison uniqueness

* Add test

* Make the flow clear
---
 src/core/src/bound_evaluate.cpp   | 33 ++++++++++++++++++++-----------
 src/core/tests/bound_evaluate.cpp | 28 ++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp
index 930d71b80cefee..1aea44f9fa7ee2 100644
--- a/src/core/src/bound_evaluate.cpp
+++ b/src/core/src/bound_evaluate.cpp
@@ -179,14 +179,26 @@ ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) {
 }
 
 struct TensorVectorCmp {
+    // Comparing Tensor vectors as numbers composed with pointers as digits.
+    // Indexed loop used to preserve order of comparison.
     bool operator()(const ov::TensorVector& lhs, const ov::TensorVector& rhs) const {
-        auto rhs_it = rhs.begin();
-        return std::any_of(lhs.begin(), lhs.end(), [&rhs_it](const ov::Tensor& lhs) {
-            bool is_less =
-                (lhs && *rhs_it) ? lhs.data() < rhs_it->data() : static_cast<bool>(lhs) < static_cast<bool>(*rhs_it);
-            ++rhs_it;
-            return is_less;
-        });
+        const auto lhs_size = lhs.size();
+        const auto rhs_size = rhs.size();
+
+        if (lhs_size < rhs_size)
+            return true;
+        if (lhs_size > rhs_size)
+            return false;
+
+        for (size_t i = 0; i < lhs_size; ++i) {
+            if (lhs[i].data() < rhs[i].data())
+                return true;
+            if (lhs[i].data() > rhs[i].data())
+                return false;
+        }
+
+        // if all equals
+        return false;
     }
 };
 
@@ -281,17 +293,14 @@ bool ov::interval_bound_evaluator(const Node* node,
     auto low_1 = ov::evaluate_lower_bound(node->get_input_source_output(1));
     auto up_0 = ov::evaluate_upper_bound(node->get_input_source_output(0));
     auto up_1 = ov::evaluate_upper_bound(node->get_input_source_output(1));
+    if (!low_0 || !low_1 || !up_0 || !up_1)
+        return false;
 
     std::set<TensorVector, TensorVectorCmp> input_variants = {{low_0, low_1},
                                                               {low_0, up_1},
                                                               {up_0, low_1},
                                                               {up_0, up_1}};
 
-    for (const auto& variant_of_input_vector : input_variants)
-        for (const auto& input_tensor : variant_of_input_vector)
-            if (!input_tensor)
-                return false;
-
     if (input_variants.size() == 1)
         return node->evaluate(upper_output_values, *input_variants.begin()) &&
                node->evaluate(lower_output_values, *input_variants.begin());
diff --git a/src/core/tests/bound_evaluate.cpp b/src/core/tests/bound_evaluate.cpp
index cb855ddfa76e16..664f8ebcb810f7 100644
--- a/src/core/tests/bound_evaluate.cpp
+++ b/src/core/tests/bound_evaluate.cpp
@@ -51,3 +51,31 @@ TEST_F(EvaluateBoundTest, no_exception_when_node_has_output_with_dynamic_element
 
     EXPECT_NO_THROW(evaluate_both_bounds(fn_op));
 }
+
+using BoundEvaluatorTest = ::testing::Test;
+TEST(BoundEvaluatorTest, no_exception_on_single_bound) {
+    constexpr auto et = element::i32;
+    const auto s = Shape{1, 1};
+    const auto a = std::make_shared<Parameter>(et, PartialShape{s});
+    const auto b = Constant::create(et, s, {1});
+    const auto sub = std::make_shared<Subtract>(a, b);
+
+    int32_t a_l[1] = {1};
+    a->get_output_tensor(0).set_lower_value(Tensor{et, s, a_l});
+
+    int32_t o_[1] = {INT32_MIN};  // initial value of output tensor is not needed, it's set to check whether changed
+    TensorVector output{{et, s, o_}};
+    // evaluations won't be performed due to missing upper bound tensor of parameter a
+    ASSERT_NO_THROW(sub->evaluate_lower(output));
+    EXPECT_EQ(o_[0], INT32_MIN);
+    ASSERT_NO_THROW(sub->evaluate_upper(output));
+    EXPECT_EQ(o_[0], INT32_MIN);
+
+    int32_t a_u[1] = {11};
+    a->get_output_tensor(0).set_upper_value(Tensor{et, s, a_u});
+    // now both bounds of sub node can be calculated
+    ASSERT_NO_THROW(sub->evaluate_lower(output));
+    EXPECT_EQ(o_[0], 0);
+    ASSERT_NO_THROW(sub->evaluate_upper(output));
+    EXPECT_EQ(o_[0], 10);
+}

From 85d9c11b9761bf03486279c9d7aaaf27eb1f6629 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 21 Mar 2023 17:13:20 +0400
Subject: [PATCH 016/296] Fixed build (#16442)

---
 src/plugins/auto/plugin.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp
index 165f5c3db6cf3c..65bd8793f71a6a 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@@ -895,9 +895,9 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
         } else {
             for (auto&& device : devicesToBeMerged) {
                 if (!isAnyDev(device, deviceList)) {
-                    DeviceIDParser parsed{device};
-                    auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.getDeviceName());
-                    if (iter != devicesMerged.end() && parsed.getDeviceName() != device && parsed.getDeviceID() == "0")
+                    ov::DeviceIDParser parsed{device};
+                    auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.get_device_name());
+                    if (iter != devicesMerged.end() && parsed.get_device_name() != device && parsed.get_device_id() == "0")
                         // The device is the device with default device ID (eg. GPU.0) and
                         // its wide name (eg. GPU) has been in device candidate list.
                         continue;
@@ -912,8 +912,8 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
                         auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), deviceWithDefaultID(item));
                         // Remove the device with default device id from candidate device list (eg. GPU.0)
                         // if its wide name is a single device (eg. GPU).
-                        DeviceIDParser parsed{item};
-                        if (parsed.getDeviceName() == item && iter != devicesMerged.end())
+                        ov::DeviceIDParser parsed{item};
+                        if (parsed.get_device_name() == item && iter != devicesMerged.end())
                             devicesMerged.erase(iter);
                         // continue if targe device has been in the candidate device list.
                         if (std::find(devicesMerged.begin(), devicesMerged.end(), item) != devicesMerged.end())

From d8e7b39edb9ee81cc95f321b91430d8cbd799872 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 21 Mar 2023 15:05:11 +0100
Subject: [PATCH 017/296] flush by recreating constant (#16430)

---
 .../test_transformations/test_offline_api.py  |  5 +++--
 .../flush_fp32_subnormals_to_zero.cpp         | 22 +++++++++++++++----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py
index fdc7eec0048ce9..e153fc9412c19a 100644
--- a/src/bindings/python/tests/test_transformations/test_offline_api.py
+++ b/src/bindings/python/tests/test_transformations/test_offline_api.py
@@ -354,5 +354,6 @@ def test_flush_fp32_subnormals_to_zero():
 
     apply_moc_transformations(model, cf=False, smart_reshape=True)  # apply_flush_fp32_subnormals_to_zero is called inside
 
-    assert np.all(weights.data[4:8] != subnorm_val)
-    assert np.all(weights.data[4:8] == 0.0)
+    new_weights = add_node.input_value(1).get_node()
+    assert np.all(new_weights.data[4:8] != subnorm_val)
+    assert np.all(new_weights.data[4:8] == 0.0)
diff --git a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp
index f01c60b1c0389b..ca03c288092260 100644
--- a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp
+++ b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp
@@ -36,14 +36,28 @@ ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
         bool has_subnormals = false;
         for (size_t i = 0; i < size; ++i) {
             if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
-                data[i] = 0.0f;
                 has_subnormals = true;
+                break;
             }
         }
-        if (has_subnormals)
-            return true;
+        if (!has_subnormals)
+            return false;
+
+        auto new_constant = std::make_shared<ov::opset8::Constant>(ov::element::f32, node->get_shape());
+        auto* dst_data = const_cast<float*>(new_constant->get_data_ptr<float>());
+
+        for (size_t i = 0; i < size; ++i) {
+            if (fpclassify(std::abs(data[i])) != FP_SUBNORMAL)
+                dst_data[i] = data[i];
+            else
+                dst_data[i] = 0.0f;
+        }
+
+        new_constant->set_friendly_name(node->get_friendly_name());
+        ov::copy_runtime_info(node, new_constant);
+        ov::replace_node(node, new_constant);
 
-        return false;
+        return true;
     };
 
     auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);

From 234f36e9b74d6b6d4f4a96b87c2660e95278c56d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Tue, 21 Mar 2023 18:23:29 +0100
Subject: [PATCH 018/296] TopK v11 usage in ONNX FE (#16449)

---
 .../onnx/frontend/src/op/hardmax.cpp          | 22 +++++++------
 src/frontends/onnx/frontend/src/op/topk.cpp   | 33 +++++++++----------
 .../src/utils/arg_min_max_factory.cpp         | 20 ++++++-----
 3 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp
index eb9b421cd71ed6..5e726e2458b55d 100644
--- a/src/frontends/onnx/frontend/src/op/hardmax.cpp
+++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp
@@ -4,6 +4,8 @@
 
 #include "op/hardmax.hpp"
 
+#include <openvino/opsets/opset11.hpp>
+
 #include "exceptions.hpp"
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/op/one_hot.hpp"
@@ -37,11 +39,11 @@ OutputVector hardmax(const Node& node) {
 
     const auto indices_axis = 1;
     const auto topk =
-        std::make_shared<default_opset::TopK>(coerced_tensor,
-                                              default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
-                                              indices_axis,
-                                              default_opset::TopK::Mode::MAX,
-                                              default_opset::TopK::SortType::NONE);
+        std::make_shared<ov::opset11::TopK>(coerced_tensor,
+                                            default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
+                                            indices_axis,
+                                            ov::opset11::TopK::Mode::MAX,
+                                            ov::opset11::TopK::SortType::NONE);
 
     const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
     const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
@@ -71,11 +73,11 @@ OutputVector hardmax(const Node& node) {
     row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
 
     const auto topk =
-        std::make_shared<default_opset::TopK>(input,
-                                              default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
-                                              axis,
-                                              default_opset::TopK::Mode::MAX,
-                                              default_opset::TopK::SortType::NONE);
+        std::make_shared<ov::opset11::TopK>(input,
+                                            default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
+                                            axis,
+                                            ov::opset11::TopK::Mode::MAX,
+                                            ov::opset11::TopK::SortType::NONE);
 
     const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
     const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
diff --git a/src/frontends/onnx/frontend/src/op/topk.cpp b/src/frontends/onnx/frontend/src/op/topk.cpp
index df884eaa54c0a9..dc33b103613df2 100644
--- a/src/frontends/onnx/frontend/src/op/topk.cpp
+++ b/src/frontends/onnx/frontend/src/op/topk.cpp
@@ -6,6 +6,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <openvino/opsets/opset11.hpp>
 
 #include "default_opset.hpp"
 #include "ngraph/node.hpp"
@@ -37,13 +38,12 @@ OutputVector topk(const Node& node) {
     const auto k_node = node.get_attribute_as_constant<std::int64_t>("k");
     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
 
-    std::shared_ptr<ngraph::Node> top_k =
-        std::make_shared<default_opset::TopK>(data,
-                                              k_node,
-                                              axis,
-                                              default_opset::TopK::Mode::MAX,
-                                              default_opset::TopK::SortType::SORT_VALUES,
-                                              element::i64);
+    std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
+                                                                              k_node,
+                                                                              axis,
+                                                                              ov::opset11::TopK::Mode::MAX,
+                                                                              ov::opset11::TopK::SortType::SORT_VALUES,
+                                                                              element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
@@ -55,13 +55,12 @@ OutputVector topk(const Node& node) {
     auto k = get_k(node);
     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
 
-    std::shared_ptr<ngraph::Node> top_k =
-        std::make_shared<default_opset::TopK>(data,
-                                              k,
-                                              axis,
-                                              default_opset::TopK::Mode::MAX,
-                                              default_opset::TopK::SortType::SORT_VALUES,
-                                              element::i64);
+    std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
+                                                                              k,
+                                                                              axis,
+                                                                              ov::opset11::TopK::Mode::MAX,
+                                                                              ov::opset11::TopK::SortType::SORT_VALUES,
+                                                                              element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
@@ -79,13 +78,13 @@ OutputVector topk(const Node& node) {
     const auto sorted = node.get_attribute_value<std::int64_t>("sorted", 1);
 
     // Map attribute values to nGraph enums
-    const auto sort_type = sorted ? default_opset::TopK::SortType::SORT_VALUES : default_opset::TopK::SortType::NONE;
+    const auto sort_type = sorted ? ov::opset11::TopK::SortType::SORT_VALUES : ov::opset11::TopK::SortType::NONE;
 
     const auto compute_max = static_cast<bool>(largest);
-    const auto mode = compute_max ? default_opset::TopK::Mode::MAX : default_opset::TopK::Mode::MIN;
+    const auto mode = compute_max ? ov::opset11::TopK::Mode::MAX : ov::opset11::TopK::Mode::MIN;
 
     std::shared_ptr<ngraph::Node> top_k =
-        std::make_shared<default_opset::TopK>(data, k, axis, mode, sort_type, element::i64);
+        std::make_shared<ov::opset11::TopK>(data, k, axis, mode, sort_type, element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
index d5a3fdb827061f..9807367273e46f 100644
--- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
+++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
@@ -4,6 +4,8 @@
 
 #include "utils/arg_min_max_factory.hpp"
 
+#include <openvino/opsets/opset11.hpp>
+
 #include "default_opset.hpp"
 #include "ngraph/opsets/opset1.hpp"
 #include "ngraph/validation_util.hpp"
@@ -18,14 +20,14 @@ ArgMinMaxFactory::ArgMinMaxFactory(const Node& node)
       m_select_last_index{node.get_attribute_value<std::int64_t>("select_last_index", 0)} {}
 
 std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_max() const {
-    return make_topk_subgraph(default_opset::TopK::Mode::MAX);
+    return make_topk_subgraph(ov::opset11::TopK::Mode::MAX);
 }
 
 std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_min() const {
-    return make_topk_subgraph(default_opset::TopK::Mode::MIN);
+    return make_topk_subgraph(ov::opset11::TopK::Mode::MIN);
 }
 
-std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset::TopK::Mode mode) const {
+std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(ov::opset11::TopK::Mode mode) const {
     const auto k_node = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
 
     if (m_select_last_index == 1) {
@@ -59,11 +61,11 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset
         const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis});
         const auto reverse = std::make_shared<opset1::Reverse>(m_input_node, axis_node, opset1::Reverse::Mode::INDEX);
 
-        const auto topk = std::make_shared<default_opset::TopK>(reverse,
-                                                                k_node,
-                                                                normalized_axis,
-                                                                mode,
-                                                                default_opset::TopK::SortType::NONE);
+        const auto topk = std::make_shared<ov::opset11::TopK>(reverse,
+                                                              k_node,
+                                                              normalized_axis,
+                                                              mode,
+                                                              ov::opset11::TopK::SortType::NONE);
 
         const auto data_shape = std::make_shared<default_opset::ShapeOf>(m_input_node);
         const auto dims_on_axis = std::make_shared<default_opset::Gather>(
@@ -88,7 +90,7 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset
     }
 
     const auto topk =
-        std::make_shared<default_opset::TopK>(m_input_node, k_node, m_axis, mode, default_opset::TopK::SortType::NONE);
+        std::make_shared<ov::opset11::TopK>(m_input_node, k_node, m_axis, mode, ov::opset11::TopK::SortType::NONE);
 
     const auto result = std::make_shared<default_opset::Convert>(topk->output(1), element::i64);
 

From b70e56d11039c9d3fbb774f2bee4c92304f11fe9 Mon Sep 17 00:00:00 2001
From: Tingqian Li <tingqian.li@intel.com>
Date: Wed, 22 Mar 2023 02:39:25 +0800
Subject: [PATCH 019/296] [CPU] Support using BF16 in INT8 models (#15663)

---
 src/plugins/intel_cpu/src/config.cpp          |  4 --
 src/plugins/intel_cpu/src/config.h            |  2 -
 .../intel_cpu/src/dnnl_postops_composer.cpp   |  2 +-
 src/plugins/intel_cpu/src/graph.cpp           |  5 ---
 src/plugins/intel_cpu/src/graph_dumper.cpp    |  4 +-
 src/plugins/intel_cpu/src/graph_optimizer.cpp | 27 ------------
 src/plugins/intel_cpu/src/nodes/conv.cpp      |  5 ---
 .../intel_cpu/src/nodes/fullyconnected.cpp    | 34 +++++++--------
 src/plugins/intel_cpu/src/nodes/matmul.cpp    | 42 +++++--------------
 .../src/utils/debug_capabilities.cpp          |  5 ---
 src/plugins/intel_cpu/thirdparty/onednn       |  2 +-
 11 files changed, 32 insertions(+), 100 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 39a3429c67be53..04c8c6467684f2 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -139,13 +139,11 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             if (val == PluginConfigParams::YES) {
                 if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
                     enforceBF16 = true;
-                    manualEnforceBF16 = true;
                 } else {
                     IE_THROW() << "Platform doesn't support BF16 format";
                 }
             } else if (val == PluginConfigParams::NO) {
                 enforceBF16 = false;
-                manualEnforceBF16 = false;
             } else {
                 IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                     << ". Expected only YES/NO";
@@ -159,13 +157,11 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             if (val == "bf16") {
                 if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
                     enforceBF16 = true;
-                    manualEnforceBF16 = true;
                 } else {
                     IE_THROW() << "Platform doesn't support BF16 format";
                 }
             } else if (val == "f32") {
                 enforceBF16 = false;
-                manualEnforceBF16 = false;
             } else {
                 IE_THROW() << "Wrong value for property key " << ov::inference_precision.name()
                     << ". Supported values: bf16, f32";
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 9f2680fbe88e97..8c399d5189a30f 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -52,12 +52,10 @@ struct Config {
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
     bool enforceBF16 = true;
-    bool manualEnforceBF16 = false;
 #else
     // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode.
     LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
     bool enforceBF16 = false;
-    bool manualEnforceBF16 = false;
 #endif
 
     DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep;
diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
index 6f1bd97b4085a2..6321ea1cac06aa 100644
--- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
@@ -136,7 +136,7 @@ bool DnnlPostOpsComposer::appendScale(const std::vector<float>& scale, bool isLa
         if (oscale_values.size() == 1)
             oscale_mask = 0;
         else
-            oscale_mask = 1 << 1;  // it works for both Conv/Matmul
+            oscale_mask = 1 << idxOC;
         updateOutputScales();
         return true;
     }
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index 9764e881de04f5..62f33e02575db7 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -1506,11 +1506,6 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo
 
 // Set all non const data paths precision to BF16
 void Graph::EnforceBF16() {
-    // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
-    // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
-    if (!implication(context->isGraphQuantized(), getConfig().manualEnforceBF16))
-        return;
-
     std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
     searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
         for (size_t i = 0; i < node->getParentEdges().size(); i++) {
diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp
index 1bfe65af51e52c..03b4b138f7a8e5 100644
--- a/src/plugins/intel_cpu/src/graph_dumper.cpp
+++ b/src/plugins/intel_cpu/src/graph_dumper.cpp
@@ -261,7 +261,7 @@ void summary_perf(const Graph &graph) {
     }
     const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;
 
-    if (summaryPerf.empty())
+    if (summaryPerf.empty() || !std::stoi(summaryPerf))
         return;
 
     std::map<std::string, double> perf_by_type;
@@ -308,7 +308,7 @@ void summary_perf(const Graph &graph) {
             std::stringstream ss;
             int percentage = static_cast<int>(it.second*100/total_avg);
             if (percentage == 0) break;
-            ss << std::setw(10) << std::right << percentage << " % :" << it.first << std::endl;
+            ss << std::setw(10) << std::right << percentage << " % :  " << std::setw(8) << std::right << it.second << "(us)  " << it.first << std::endl;
             std::cout << ss.str();
         }
     }
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index 3e6fd7d7e00cf9..949acf7cd6ab1c 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -734,21 +734,6 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
     }
 }
 
-/**
- * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
- *       for bf16 depthwise postops.
- *       This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
- *       multiple binary post ops.
- *       This check can already be removed for FC fusing, but should be kept for Convolution,
- *       which still uses legacy depthwise postops for performance reasons.
- */
-static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) {
-    return childNode->getType() == Type::FakeQuantize &&
-        one_of(Precision::BF16,
-            parentNode->getOriginalOutputPrecisionAtPort(0),
-            childNode->getOriginalOutputPrecisionAtPort(0));
-}
-
 void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -772,12 +757,6 @@ void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) {
             continue;
         }
 
-        //  BF16 Quantize Layer Fusing Disabling
-        if (BF16QuantizeNodeFusing(parentNode, childNode)) {
-            parent++;
-            continue;
-        }
-
         childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) {
@@ -1066,12 +1045,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) {
             continue;
         }
 
-        //  BF16 Quantize Layer Fusing Disabling
-        if (BF16QuantizeNodeFusing(parentNode, childNode)) {
-            parent++;
-            continue;
-        }
-
         childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) {
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 3ce92de2169f6c..ab07b6521e71f7 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -503,11 +503,6 @@ void Convolution::getSupportedDescriptors() {
 
     if (canBeExecutedInInt8()) {
         DEBUG_LOG(getName(), "Creating I8 descriptor");
-        //  We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type
-        if (outputDataType == memory::data_type::bf16)
-            outputDataType = memory::data_type::f32;
-        if (eltwisePrecision == Precision::BF16)
-            eltwisePrecision = Precision::FP32;
         // initTryBrgconvFlag depends on outputDataType, should be after outputDataType computed
         if (!enforceBrgconv)
             initTryBrgconvFlag();
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 23b6c5be7cee9f..6b4c8e43521426 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -232,29 +232,29 @@ void FullyConnected::getSupportedDescriptors() {
     auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID));
     outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID));
 
-    if (inputDataType == memory::data_type::f32) {
-        outputDataType = memory::data_type::f32;
-    }
-
     if (!fusedWith.empty()) {
         outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
     }
     auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID));
 
-    //  We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type
-    if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8)
-            && inputDataType != memory::data_type::bf16) {
-        inputDataType = outputDataType = memory::data_type::f32;
-    }
-
-    if (one_of(inputDataType , memory::data_type::u8, memory::data_type::s8)
-        && outputDataType == memory::data_type::bf16) {
+    // revert back outputDataType on special cases
+    if (inputDataType == memory::data_type::f32) {
+        // oneDNN only support f32 output when input is f32, even if FQ is fused
         outputDataType = memory::data_type::f32;
-    }
-
-    if (inputDataType == memory::data_type::bf16
-        && one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
-        outputDataType = memory::data_type::bf16;
+    } else if (inputDataType == memory::data_type::bf16) {
+        // bf16 input only supports bf16/f32 output, even if FQ is fused as post-ops
+        if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
+            outputDataType = memory::data_type::bf16;
+        }
+    } else if (one_of(inputDataType, memory::data_type::u8, memory::data_type::s8)) {
+        if (weightsDataType != memory::data_type::s8) {
+            // weight has to be s8 for INT8 mode, otherwise fallback to
+            // f32 mode
+            inputDataType = outputDataType = memory::data_type::f32;
+        }
+    } else {
+        // s32/u32/... unsupported input data types, fallback to f32
+        inputDataType = outputDataType = memory::data_type::f32;
     }
 
     inDims = isDynamicNode() ? makeDummyInputDims() : getInputShapeAtPort(DATA_ID).getStaticDims();
diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp
index 83e4bd1a179294..c1c1381e6631f2 100644
--- a/src/plugins/intel_cpu/src/nodes/matmul.cpp
+++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp
@@ -204,34 +204,6 @@ MatMul::MatMul(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
 }
 
 bool MatMul::canFuse(const NodePtr& node) const {
-    // per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669)
-    if (getOutputShapeAtPort(0).getRank() > 2) {
-        if (const auto* eltwiseNode = dynamic_cast<Eltwise *>(node.get())) {
-            if (one_of(eltwiseNode->getAlgorithm(), Algorithm::EltwiseAdd,
-                                                    Algorithm::EltwiseMultiply,
-                                                    Algorithm::EltwiseSubtract,
-                                                    Algorithm::EltwiseDivide,
-                                                    Algorithm::EltwisePrelu,
-                                                    Algorithm::EltwiseMulAdd,
-                                                    Algorithm::EltwisePowerStatic) &&
-                eltwiseNode->getBroadcastingPolicy() != Eltwise::PerTensor) {
-                return false;
-            }
-        } else if (const auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get())) {
-            if (fakeQuantizeNode->getBroadcastingPolicy() != FakeQuantize::PerTensor) {
-                return false;
-            }
-        }
-    }
-
-    // Todo:
-    //  Consider the case when Matmul doesn't support execution in int8, but is getting fused with FQ with int8 output.
-    //  Then the Matmul will change its output precision to fp32, but the FQ child will still has the int8 input precision.
-    //  This information should be propagated! Note that we may need to propagate updated precision to child fused nodes.
-    if (node->getType() == Type::FakeQuantize &&
-        one_of(node->getOriginalOutputPrecisionAtPort(0), Precision::I8, Precision::U8) &&
-        !canBeExecutedInInt8(getOriginalInputPrecisionAtPort(0), getOriginalInputPrecisionAtPort(1)))
-        return false;
     return canFuseSimpleOperation(node);
 }
 
@@ -344,12 +316,20 @@ void MatMul::getSupportedDescriptors() {
         outPortPrec = firstInPortPrec = secondInPortPrec = Precision::FP32;
     }
 
+    Precision postOpsPrec = outPortPrec;
     if (!fusedWith.empty()) {
-        outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
+        postOpsPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
-    if (!canBeExecutedInInt8(firstInPortPrec, secondInPortPrec) && one_of(outPortPrec, Precision::U8, Precision::I8))
-        outPortPrec = Precision::FP32; // INT output is not supported for non-INT inputs
+    if (canBeExecutedInInt8(firstInPortPrec, secondInPortPrec)) {
+        // INT8 mode support wide range of output precisions
+        outPortPrec = postOpsPrec;
+    } else if (postOpsPrec == Precision::FP32) {
+        // all non-INT8 modes support fp32 output precision
+        outPortPrec = postOpsPrec;
+    } else {
+        // otherwise we ignore postOpsPrec and stay with getOriginalOutputPrecisionAtPort(0)
+    }
 
     const auto& inputShape0 = getInputShapeAtPort(0);
     const auto& inputShape1 = getInputShapeAtPort(1);
diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
index 31b02bd7cad08e..fb13000708cd74 100644
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
@@ -479,11 +479,6 @@ std::ostream & operator<<(std::ostream & os, const PrintableModel& model) {
         os << std::endl;
 
         // recursively output subgraphs
-        if (auto subgraph = std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(op)) {
-            os << "\t\t snippets Subgraph: " << subgraph->get_friendly_name() << " is_quantized:" << subgraph->is_quantized() << std::endl;
-            os << PrintableModel(subgraph->body(), tag, prefix + "\t\t");
-        }
-
         if (auto msubgraph = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(op)) {
             auto cnt = msubgraph->get_internal_subgraphs_size();
             for (int i = 0; i < cnt; i++) {
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index bd3498162fab74..02857209960e9d 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit bd3498162fab7401b571c6ce77d837f1adcff265
+Subproject commit 02857209960e9d91c1b3df90ab4c7ac359bf0973

From d86d94edad0ff07f14574a73ede288c1bfc01857 Mon Sep 17 00:00:00 2001
From: River Li <river.li@intel.com>
Date: Wed, 22 Mar 2023 05:55:51 +0800
Subject: [PATCH 020/296] [DOC][CAPI] document for remote tensor (#16408)

* [DOC][CAPI] document for remote tensor

* Update

* Update minor

* Update GPU_RemoteTensor_API.md

---------

Co-authored-by: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
---
 .../supported_plugins/GPU_RemoteTensor_API.md | 254 ++++++++++++----
 docs/snippets/gpu/context_sharing_va_c.cpp    | 156 ++++++++++
 .../gpu/preprocessing_nv12_two_planes_c.cpp   | 126 ++++++++
 .../gpu/remote_objects_creation_c.cpp         | 283 ++++++++++++++++++
 4 files changed, 765 insertions(+), 54 deletions(-)
 create mode 100644 docs/snippets/gpu/context_sharing_va_c.cpp
 create mode 100644 docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp
 create mode 100644 docs/snippets/gpu/remote_objects_creation_c.cpp

diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md
index dc73deb70965d0..0eac844e4c7701 100644
--- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md
@@ -45,46 +45,85 @@ To create the ``ov::RemoteContext`` object for user context, explicitly provide
 of ``ov::RemoteContext`` derived classes.
 
 
-.. tab:: Linux
+.. tab:: Linux/C++
 
    .. tab:: Create from cl_context
  
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_cl_context
+         :fragment: [context_from_cl_context]
 
    .. tab:: Create from cl_queue
 
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_cl_queue
+         :fragment: [context_from_cl_queue]
 
    .. tab:: Create from VADisplay
 
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_va_display
+         :fragment: [context_from_va_display]
 
-.. tab:: Windows
+.. tab:: Windows/C++
 
    .. tab:: Create from cl_context
 
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_cl_context
+         :fragment: [context_from_cl_context]
 
    .. tab:: Create from cl_queue
 
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_cl_queue
+         :fragment: [context_from_cl_queue]
 
    .. tab:: Create from ID3D11Device
    
       .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
          :language: cpp
-         :fragment: context_from_d3d_device
+         :fragment: [context_from_d3d_device]
 
+.. tab:: Linux/C
+
+   .. tab:: Create from cl_context
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_cl_context]
+
+   .. tab:: Create from cl_queue
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_cl_queue]
+
+   .. tab:: Create from VADisplay
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_va_display]
+
+.. tab:: Windows/C
+
+   .. tab:: Create from cl_context
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_cl_context]
+
+   .. tab:: Create from cl_queue
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_cl_queue]
+
+   .. tab:: Create from ID3D11Device
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [context_from_d3d_device]
 
 Getting RemoteContext from the Plugin
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -96,19 +135,33 @@ Once the plugin options have been changed, the internal context is replaced by t
 
 To request the current default context of the plugin, use one of the following methods:
 
+.. tab:: C++
 
-.. tab:: Get context from Core
+   .. tab:: Get context from Core
 
-   .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-      :language: cpp
-      :fragment: default_context_from_core
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+         :language: cpp
+         :fragment: [default_context_from_core]
 
-.. tab:: Get context from compiled model
+   .. tab:: Get context from compiled model
 
-   .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-      :language: cpp
-      :fragment: default_context_from_model
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+         :language: cpp
+         :fragment: [default_context_from_model]
+
+.. tab:: C
+
+   .. tab:: Get context from Core
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [default_context_from_core]
 
+   .. tab:: Get context from compiled model
+
+      .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+         :language: c
+         :fragment: [default_context_from_model]
 
 Memory Sharing Between Application and GPU Plugin
 ###########################################################
@@ -116,70 +169,140 @@ Memory Sharing Between Application and GPU Plugin
 The classes that implement the ``ov::RemoteTensor`` interface are the wrappers for native API
 memory handles (which can be obtained from them at any time).
 
-To create a shared tensor from a native memory handle, use dedicated ``create_tensor``or ``create_tensor_nv12`` methods
+To create a shared tensor from a native memory handle, use dedicated ``create_tensor`` or ``create_tensor_nv12`` methods
 of the ``ov::RemoteContext`` sub-classes.
 ``ov::intel_gpu::ocl::ClContext`` has multiple overloads of ``create_tensor`` methods which allow to wrap pre-allocated native handles with the ``ov::RemoteTensor``
-object or request plugin to allocate specific device memory. For more details, see the code snippets below:
+object or request plugin to allocate specific device memory. There also provides C APIs to do the same things with C++ APIs.
+For more details, see the code snippets below:
 
 
 .. tab-set::
 
-   .. tab-item:: Wrap native handles
+   .. tab-item:: Wrap native handles/C++
+      :sync: wrap-native-handles
 
       .. tab-set::
 
          .. tab-item:: USM pointer
+            :sync: usm-pointer
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: wrap_usm_pointer
+               :language: cpp
+               :fragment: [wrap_usm_pointer]
 
          .. tab-item:: cl_mem
+            :sync: cl_mem
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: wrap_cl_mem
+               :language: cpp
+               :fragment: [wrap_cl_mem]
 
          .. tab-item:: cl::Buffer
+            :sync: buffer
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: wrap_cl_buffer
+               :language: cpp
+               :fragment: [wrap_cl_buffer]
 
          .. tab-item:: cl::Image2D
+            :sync: image2D
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: wrap_cl_image
+               :language: cpp
+               :fragment: [wrap_cl_image]
 
          .. tab-item:: biplanar NV12 surface
+            :sync: biplanar
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: wrap_nv12_surface
+               :language: cpp
+               :fragment: [wrap_nv12_surface]
 
-   .. tab-item:: Allocate device memory
+   .. tab-item:: Allocate device memory/C++
+      :sync: allocate-device-memory
 
       .. tab-set::
-      
+
          .. tab-item:: USM host memory
+            :sync: usm-host-memory
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: allocate_usm_host
+               :language: cpp
+               :fragment: [allocate_usm_host]
 
          .. tab-item:: USM device memory
+            :sync: usm-device-memory
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: allocate_usm_device
+               :language: cpp
+               :fragment: [allocate_usm_device]
 
          .. tab-item:: cl::Buffer
 
             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
-               :language: sh
-               :fragment: allocate_cl_buffer
+               :language: cpp
+               :fragment: [allocate_cl_buffer]
+
+.. tab-set::
+
+   .. tab-item:: Wrap native handles/C
+      :sync: wrap-native-handles
+
+      .. tab-set::
+
+         .. tab-item:: USM pointer
+            :sync: usm-pointer
 
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [wrap_usm_pointer]
+
+         .. tab-item:: cl_mem
+            :sync: cl_mem
+
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [wrap_cl_mem]
+
+         .. tab-item:: cl::Buffer
+            :sync: buffer
+
+             .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [wrap_cl_buffer]
+
+         .. tab-item:: cl::Image2D
+            :sync: image2D
+
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [wrap_cl_image]
+
+         .. tab-item:: biplanar NV12 surface
+            :sync: biplanar
+
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [create_nv12_surface]
+
+   .. tab-item:: Allocate device memory/C
+      :sync: allocate-device-memory
+
+      .. tab-set::
+
+         .. tab-item:: USM host memory
+            :sync: usm-host-memory
+
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [allocate_usm_host]
+
+         .. tab-item:: USM device memory
+            :sync: usm-device-memory
+
+            .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp
+               :language: c
+               :fragment: [allocate_usm_device]
 
 The ``ov::intel_gpu::ocl::D3DContext`` and ``ov::intel_gpu::ocl::VAContext`` classes are derived from ``ov::intel_gpu::ocl::ClContext``.
 Therefore, they provide the functionality described above and extend it
@@ -202,9 +325,17 @@ should be added before model compilation:
 
 .. tab:: two-plane
 
-   .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp
-      :language: cpp
-      :fragment: [init_preproc]
+   .. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp
+         :language: cpp
+         :fragment: [init_preproc]
+
+   .. tab:: C
+
+      .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp
+         :language: c
+         :fragment: [init_preproc]
 
 .. tab:: single-plane
 
@@ -228,21 +359,29 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect
 
    .. tab:: two-plane
 
-      .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp
-         :language: cpp
-         :fragment: single_batch
+      .. tab:: C++
+
+         .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp
+            :language: cpp
+            :fragment: [single_batch]
+
+      .. tab:: C
+
+         .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp
+            :language: c
+            :fragment: [single_batch]
 
    .. tab:: single-plane
    
       .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp
          :language: cpp
-         :fragment: single_batch
+         :fragment: [single_batch]
 
    .. tab:: NV12 to Grey
 
       .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp
          :language: cpp
-         :fragment: single_batch
+         :fragment: [single_batch]
 
 .. tab:: Multiple Batches
 
@@ -250,19 +389,19 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect
 
       .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp
          :language: cpp
-         :fragment: batched_case
+         :fragment: [batched_case]
 
    .. tab:: single-plane
                                             
       .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp
          :language: cpp
-         :fragment: batched_case
+         :fragment: [batched_case]
 
    .. tab:: NV12 to Grey
 
       .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp
          :language: cpp
-         :fragment: batched_case
+         :fragment: [batched_case]
 
 
 I420 color format can be processed in a similar way
@@ -283,7 +422,7 @@ on waiting for the completion of inference. The pseudo-code may look as follows:
 
    .. doxygensnippet:: docs/snippets/gpu/queue_sharing.cpp
       :language: cpp
-      :fragment: queue_sharing
+      :fragment: [queue_sharing]
 
 
 Limitations
@@ -326,20 +465,27 @@ To see pseudo-code of usage examples, refer to the sections below.
 
    .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp
       :language: cpp
-      :fragment: context_sharing_get_from_ov
+      :fragment: [context_sharing_get_from_ov]
 
 .. dropdown:: Running GPU Plugin Inference within User-Supplied Shared Context
 
    .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp
       :language: cpp
-      :fragment: context_sharing_user_handle
+      :fragment: [context_sharing_user_handle]
 
 .. dropdown:: Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux
 
-   .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp
-      :language: cpp
-      :fragment: context_sharing_va
+   .. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp
+         :language: cpp
+         :fragment: [context_sharing_va]
+
+   .. tab:: C
 
+      .. doxygensnippet:: docs/snippets/gpu/context_sharing_va_c.cpp
+         :language: c
+         :fragment: [context_sharing_va]
 
 See Also
 #######################################
diff --git a/docs/snippets/gpu/context_sharing_va_c.cpp b/docs/snippets/gpu/context_sharing_va_c.cpp
new file mode 100644
index 00000000000000..dd22e330bb5dcf
--- /dev/null
+++ b/docs/snippets/gpu/context_sharing_va_c.cpp
@@ -0,0 +1,156 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef ENABLE_LIBVA
+#include <openvino/c/openvino.h>
+#include <openvino/c/gpu/gpu_plugin_properties.h>
+#include <openvino/runtime/intel_gpu/ocl/va.hpp>
+
+VADisplay get_va_display();
+VASurfaceID decode_va_surface();
+
+int main() {
+    ov_core_t* core = NULL;
+    ov_model_t* model = NULL;
+    ov_compiled_model_t* compiled_model = NULL;
+    ov_infer_request_t* infer_request = NULL;
+    ov_remote_context_t* shared_va_context = NULL;
+    ov_tensor_t* remote_tensor = NULL;
+    ov_preprocess_prepostprocessor_t* preprocess = NULL;
+    ov_preprocess_input_info_t* preprocess_input_info = NULL;
+    ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL;
+    ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL;
+    ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL;
+    ov_layout_t* layout = NULL;
+    ov_model_t* new_model = NULL;
+
+    ov_output_const_port_t* input_port = NULL;
+    char* in_tensor_name = NULL;
+    char* out_tensor_name = NULL;
+    ov_shape_t* input_shape = NULL;
+    ov_element_type_e input_type;
+
+    const int height = 480;
+    const int width = 640;
+
+    // initialize the objects
+    ov_core_create(&core);
+    ov_core_read_model(core, "model.xml", "model.bin", &model);
+
+    // ...
+
+    //! [context_sharing_va]
+
+    // ...
+
+    ov_preprocess_prepostprocessor_create(model, &preprocess);
+    ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info);
+    ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info);
+    ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, U8);
+    ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info,
+                                                                  NV12_TWO_PLANES,
+                                                                  2,
+                                                                  "y",
+                                                                  "uv");
+    ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE");
+    ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width);
+    ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps);
+    ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, BGR);
+    ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR);
+    ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info);
+    ov_layout_create("NCHW", &layout);
+    ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout);
+    ov_preprocess_prepostprocessor_build(preprocess, &new_model);
+
+    VADisplay display = get_va_display();
+    // create the shared context object
+    ov_core_create_context(core,
+                           "GPU",
+                           4,
+                           &shared_va_context,
+                           ov_property_key_intel_gpu_context_type,
+                           "VA_SHARED",
+                           ov_property_key_intel_gpu_va_device,
+                           display);
+
+    // compile model within a shared context
+    ov_core_compile_model_with_context(core, new_model, shared_va_context, 0, &compiled_model);
+
+    ov_output_const_port_t* port_0 = NULL;
+    char* input_name_0 = NULL;
+    ov_model_const_input_by_index(new_model, 0, &port_0);
+    ov_port_get_any_name(port_0, &input_name_0);
+
+    ov_output_const_port_t* port_1 = NULL;
+    char* input_name_1 = NULL;
+    ov_model_const_input_by_index(new_model, 1, &port_1);
+    ov_port_get_any_name(port_1, &input_name_1);
+
+    ov_shape_t shape_y = {0, NULL};
+    ov_shape_t shape_uv = {0, NULL};
+    ov_const_port_get_shape(port_0, &shape_y);
+    ov_const_port_get_shape(port_1, &shape_uv);
+
+    // execute decoding and obtain decoded surface handle
+    VASurfaceID va_surface = decode_va_surface();
+    //     ...
+    //wrap decoder output into RemoteBlobs and set it as inference input
+    
+    ov_tensor_t* remote_tensor_y = NULL;
+    ov_tensor_t* remote_tensor_uv = NULL;
+    ov_remote_context_create_tensor(shared_va_context,
+                                    U8,
+                                    shape_y,
+                                    6,
+                                    &remote_tensor_y,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "VA_SURFACE",
+                                    ov_property_key_intel_gpu_dev_object_handle,
+                                    va_surface,
+                                    ov_property_key_intel_gpu_va_plane,
+                                    0);
+    ov_remote_context_create_tensor(shared_va_context,
+                                    U8,
+                                    shape_uv,
+                                    6,
+                                    &remote_tensor_uv,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "VA_SURFACE",
+                                    ov_property_key_intel_gpu_dev_object_handle,
+                                    va_surface,
+                                    ov_property_key_intel_gpu_va_plane,
+                                    1);
+
+    ov_compiled_model_create_infer_request(compiled_model, &infer_request);
+    ov_infer_request_set_tensor(infer_request, input_name_0, remote_tensor_y);
+    ov_infer_request_set_tensor(infer_request, input_name_1, remote_tensor_uv);
+    ov_infer_request_infer(infer_request);
+    //! [context_sharing_va]
+
+    // deinitialization
+    ov_free(input_name_0);
+    ov_free(input_name_1);
+    ov_output_const_port_free(port_0);
+    ov_output_const_port_free(port_1);
+    ov_layout_free(layout);
+    ov_preprocess_input_model_info_free(preprocess_input_model_info);
+    ov_preprocess_preprocess_steps_free(preprocess_input_steps);
+    ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info);
+    ov_preprocess_input_info_free(preprocess_input_info);
+    ov_model_free(new_model);
+    ov_preprocess_prepostprocessor_free(preprocess);
+    ov_tensor_free(remote_tensor_y);
+    ov_tensor_free(remote_tensor_uv);
+    ov_shape_free(&shape_y);
+    ov_shape_free(&shape_uv);
+    ov_infer_request_free(infer_request);
+    ov_compiled_model_free(compiled_model);
+    ov_model_free(model);
+    ov_model_free(new_model);
+    ov_remote_context_free(shared_va_context);
+    ov_core_free(core);
+
+    return 0;
+}
+#endif  // ENABLE_LIBVA
diff --git a/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp
new file mode 100644
index 00000000000000..826af8ddffe32b
--- /dev/null
+++ b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/c/openvino.h>
+#include <openvino/c/gpu/gpu_plugin_properties.h>
+#include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
+
+cl::Image2D get_y_image();
+cl::Image2D get_uv_image();
+
+int main() {
+    ov_core_t* core = NULL;
+    ov_model_t* model = NULL;
+    ov_compiled_model_t* compiled_model = NULL;
+    ov_infer_request_t* infer_request = NULL;
+    ov_preprocess_prepostprocessor_t* preprocess = NULL;
+    ov_preprocess_input_info_t* preprocess_input_info = NULL;
+    ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL;
+    ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL;
+    ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL;
+    ov_layout_t* layout = NULL;
+    ov_model_t* model_with_preproc = NULL;
+    ov_remote_context_t* gpu_context = NULL;
+    char* input_name0 = NULL;
+    char* input_name1 = NULL;
+    ov_output_const_port_t* input_port0 = NULL;
+    ov_output_const_port_t* input_port1 = NULL;
+    size_t height = 480;
+    size_t width = 640;
+
+    ov_core_create(&core);
+    ov_core_read_model(core, "model.xml", "model.bin", &model);
+
+    //! [init_preproc]
+    ov_preprocess_prepostprocessor_create(model, &preprocess);
+    ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info);
+    ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info);
+    ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, ov_element_type_e::U8);
+    ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info,
+                                                                  ov_color_format_e::NV12_TWO_PLANES,
+                                                                  2,
+                                                                  "y",
+                                                                  "uv");
+    ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE");
+    ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width);
+    ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps);
+    ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, ov_color_format_e::BGR);
+    ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR);
+    ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info);
+    ov_layout_create("NCHW", &layout);
+    ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout);
+    ov_preprocess_prepostprocessor_build(preprocess, &model_with_preproc);
+    //! [init_preproc]
+
+    ov_core_compile_model(core, model_with_preproc, "GPU", 0, &compiled_model);
+    ov_compiled_model_get_context(compiled_model, &gpu_context);
+    ov_compiled_model_create_infer_request(compiled_model, &infer_request);
+
+    {
+        //! [single_batch]
+        ov_model_const_input_by_index(model, 0, &input_port0);
+        ov_model_const_input_by_index(model, 1, &input_port1);
+        ov_port_get_any_name(input_port0, &input_name0);
+        ov_port_get_any_name(input_port1, &input_name1);
+
+        ov_shape_t shape_y, shape_uv;
+        ov_tensor_t* remote_tensor_y = NULL;
+        ov_tensor_t* remote_tensor_uv = NULL;
+        ov_const_port_get_shape(input_port0, &shape_y);
+        ov_const_port_get_shape(input_port1, &shape_uv);
+
+        cl::Image2D image_y = get_y_image();
+        cl::Image2D image_uv = get_uv_image();
+        ov_remote_context_create_tensor(gpu_context,
+                                        ov_element_type_e::U8,
+                                        shape_y,
+                                        4,
+                                        &remote_tensor_y,
+                                        ov_property_key_intel_gpu_shared_mem_type,
+                                        "OCL_IMAGE2D",
+                                        ov_property_key_intel_gpu_mem_handle,
+                                        image_y.get());
+
+        ov_remote_context_create_tensor(gpu_context,
+                                        ov_element_type_e::U8,
+                                        shape_uv,
+                                        4,
+                                        &remote_tensor_y,
+                                        ov_property_key_intel_gpu_shared_mem_type,
+                                        "OCL_IMAGE2D",
+                                        ov_property_key_intel_gpu_mem_handle,
+                                        image_uv.get());
+
+        ov_infer_request_set_tensor(infer_request, input_name0, remote_tensor_y);
+        ov_infer_request_set_tensor(infer_request, input_name1, remote_tensor_uv);
+        ov_infer_request_infer(infer_request);
+        //! [single_batch]
+
+        ov_free(input_name0);
+        ov_free(input_name1);
+        ov_output_const_port_free(input_port0);
+        ov_output_const_port_free(input_port1);
+
+        ov_layout_free(layout);
+        ov_preprocess_input_model_info_free(preprocess_input_model_info);
+        ov_preprocess_preprocess_steps_free(preprocess_input_steps);
+        ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info);
+        ov_preprocess_input_info_free(preprocess_input_info);
+        ov_preprocess_prepostprocessor_free(preprocess);
+
+        ov_tensor_free(remote_tensor_y);
+        ov_tensor_free(remote_tensor_uv);
+        ov_shape_free(&shape_y);
+        ov_shape_free(&shape_uv);
+
+        ov_infer_request_free(infer_request);
+        ov_compiled_model_free(compiled_model);
+        ov_model_free(model);
+        ov_model_free(model_with_preproc);
+        ov_remote_context_free(gpu_context);
+        ov_core_free(core);
+    }
+
+    return 0;
+}
diff --git a/docs/snippets/gpu/remote_objects_creation_c.cpp b/docs/snippets/gpu/remote_objects_creation_c.cpp
new file mode 100644
index 00000000000000..c870da6bb06a26
--- /dev/null
+++ b/docs/snippets/gpu/remote_objects_creation_c.cpp
@@ -0,0 +1,283 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/c/openvino.h>
+#include <openvino/c/gpu/gpu_plugin_properties.h>
+#include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
+
+#ifdef WIN32
+typedef void* ID3D11Device;
+#elif defined(ENABLE_LIBVA)
+#include <openvino/runtime/intel_gpu/ocl/va.hpp>
+#endif
+
+void* allocate_usm_buffer(size_t size);
+cl_mem allocate_cl_mem(size_t size);
+cl_context get_cl_context();
+cl_command_queue get_cl_queue();
+cl::Buffer allocate_buffer(size_t size);
+cl::Image2D allocate_image(size_t size);
+
+#ifdef WIN32
+ID3D11Device* get_d3d_device();
+#elif defined(ENABLE_LIBVA)
+VADisplay get_va_display();
+#endif
+
+int main() {
+    ov_core_t* core = NULL;
+    ov_model_t* model = NULL;
+    ov_compiled_model_t* compiled_model = NULL;
+    ov_remote_context_t* gpu_context = NULL;
+    ov_tensor_t* remote_tensor = NULL;
+
+    ov_output_const_port* input_port = NULL;
+    char* in_tensor_name = NULL;
+    char* out_tensor_name = NULL;
+    ov_shape_t input_shape;
+    ov_element_type_e input_type;
+
+    ov_core_create(&core);
+    ov_core_read_model(core, "model.xml", "model.bin", &model);
+
+    ov_model_const_input(model, &input_port);
+    ov_port_get_any_name(input_port, &in_tensor_name);
+    ov_const_port_get_shape(input_port, &input_shape);
+    ov_port_get_element_type(input_port, &input_type);
+    size_t input_size = 1;
+    for (auto i = 0; i < input_shape.rank; i++)
+        input_size *= input_shape.dims[i];
+
+    ov_core_compile_model(core, model, "GPU", 0, &compiled_model);
+    ov_compiled_model_get_context(compiled_model, &gpu_context);
+
+{
+    //! [wrap_usm_pointer]
+    void* shared_buffer = allocate_usm_buffer(input_size);
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    4,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "USM_USER_BUFFER",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    shared_buffer);
+    //! [wrap_usm_pointer]
+}
+
+{
+    //! [wrap_cl_mem]
+    cl_mem shared_buffer = allocate_cl_mem(input_size);
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    4,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "OCL_BUFFER",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    shared_buffer);
+    //! [wrap_cl_mem]
+}
+
+{
+    //! [wrap_cl_buffer]
+    cl::Buffer shared_buffer = allocate_buffer(input_size);
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    4,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "OCL_BUFFER",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    shared_buffer.get());
+    //! [wrap_cl_buffer]
+}
+
+{
+    //! [wrap_cl_image]
+    cl::Image2D shared_buffer = allocate_image(input_size);
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    4,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "OCL_IMAGE2D",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    shared_buffer.get());
+    //! [wrap_cl_image]
+}
+
+{
+    //! [allocate_usm_device]
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    2,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "USM_USER_BUFFER");
+    // Extract raw usm pointer from remote tensor
+    void* usm_ptr = NULL;
+    ov_tensor_data(remote_tensor, &usm_ptr);
+    //! [allocate_usm_device]
+}
+
+{
+    //! [allocate_usm_host]
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    input_shape,
+                                    2,
+                                    &remote_tensor,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "USM_HOST_BUFFER");
+    // Extract raw usm pointer from remote tensor
+    void* usm_ptr = NULL;
+    ov_tensor_data(remote_tensor, &usm_ptr);
+    //! [allocate_usm_host]
+}
+
+{
+    int64_t width = 1024;
+    int64_t height = 768;
+
+    int64_t y_plane_size = width * height;
+    int64_t uv_plane_size = width * height / 2;
+
+    ov_shape_t shape_y = {0, NULL};
+    int64_t dims_y[4] = {1, 1, height, width};
+    ov_shape_t shape_uv = {0, NULL};
+    int64_t dims_uv[4] = {1, 2, height / 2, width / 2};
+    ov_tensor_t* remote_tensor_y = NULL;
+    ov_tensor_t* remote_tensor_uv = NULL;
+
+    ov_shape_create(4, dims_y, &shape_y);
+    ov_shape_create(4, dims_uv, &shape_uv);
+
+    //! [create_nv12_surface]
+    cl::Image2D y_plane_surface = allocate_image(y_plane_size);
+    cl::Image2D uv_plane_surface = allocate_image(uv_plane_size);
+
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    shape_y,
+                                    4,
+                                    &remote_tensor_y,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "OCL_IMAGE2D",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    y_plane_surface.get());
+
+    ov_remote_context_create_tensor(gpu_context,
+                                    input_type,
+                                    shape_uv,
+                                    4,
+                                    &remote_tensor_uv,
+                                    ov_property_key_intel_gpu_shared_mem_type,
+                                    "OCL_IMAGE2D",
+                                    ov_property_key_intel_gpu_mem_handle,
+                                    uv_plane_surface.get());
+
+    ov_tensor_free(remote_tensor_y);
+    ov_tensor_free(remote_tensor_uv);
+    ov_shape_free(&shape_y);
+    ov_shape_free(&shape_uv);
+    //! [create_nv12_surface]
+}
+
+{
+    //! [context_from_cl_context]
+    cl_context cl_context = get_cl_context();
+    ov_core_create_context(core,
+                           "GPU",
+                           4,
+                           &gpu_context,
+                           ov_property_key_intel_gpu_context_type,
+                           "OCL",
+                           ov_property_key_intel_gpu_ocl_context,
+                           cl_context);
+    //! [context_from_cl_context]
+}
+
+{
+    //! [context_from_cl_queue]
+    cl_command_queue cl_queue = get_cl_queue();
+    cl_context cl_context = get_cl_context();
+    ov_core_create_context(core,
+                           "GPU",
+                           6,
+                           &gpu_context,
+                           ov_property_key_intel_gpu_context_type,
+                           "OCL",
+                           ov_property_key_intel_gpu_ocl_context,
+                           cl_context,
+                           ov_property_key_intel_gpu_ocl_queue,
+                           cl_queue);
+    //! [context_from_cl_queue]
+}
+
+#ifdef WIN32
+{
+    //! [context_from_d3d_device]
+    ID3D11Device* device = get_d3d_device();
+    ov_core_create_context(core,
+                           "GPU",
+                           4,
+                           &gpu_context,
+                           ov_property_key_intel_gpu_context_type,
+                           "VA_SHARED",
+                           ov_property_key_intel_gpu_va_device,
+                           device);
+    //! [context_from_d3d_device]
+}
+#elif defined(ENABLE_LIBVA)
+{
+    //! [context_from_va_display]
+    VADisplay display = get_va_display();
+    ov_core_create_context(core,
+                           "GPU",
+                           4,
+                           &gpu_context,
+                           ov_property_key_intel_gpu_context_type,
+                           "VA_SHARED",
+                           ov_property_key_intel_gpu_va_device,
+                           display);
+    //! [context_from_va_display]
+}
+#endif
+{
+    //! [default_context_from_core]
+    ov_core_get_default_context(core, "GPU", &gpu_context);
+    // Extract ocl context handle from RemoteContext
+    size_t size = 0;
+    char* params = nullptr;
+    // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0"
+    // You need parse it.
+    ov_remote_context_get_params(gpu_context, &size, &params);
+    //! [default_context_from_core]
+}
+
+{
+    //! [default_context_from_model]
+    ov_compiled_model_get_context(compiled_model, &gpu_context);
+    // Extract ocl context handle from RemoteContext
+    size_t size = 0;
+    char* params = nullptr;
+    // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0"
+    // You need parse it.
+    ov_remote_context_get_params(gpu_context, &size, &params);
+    //! [default_context_from_model]
+}
+
+ov_compiled_model_free(compiled_model);
+ov_model_free(model);
+ov_remote_context_free(gpu_context);
+ov_core_free(core);
+
+return 0;
+}

From 5e98696464489a47e02d05607757c11896f354fa Mon Sep 17 00:00:00 2001
From: Haiqi Pan <haiqi.pan@intel.com>
Date: Tue, 21 Mar 2023 20:29:35 -0700
Subject: [PATCH 021/296] Fix Windows build warnings in template and core tests
 (#15967)

* fix C4305

* 1.0f

* Element

* fix c4244

* fix truncation from double to float in grn.cpp

* Revert "fix truncation from double to float in grn.cpp"

This reverts commit 5263b37cb2d4114971db4192305c82ff063edea0.

* fix grn.cpp

* add 4305

* fix low

* add TearDown

* revert softmax.cpp

* pragram

* fix conflicts

* fix conflicts

* size_t -> ov::label_t

* WIN32

---------

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 src/core/tests/CMakeLists.txt                    |  5 -----
 src/core/tests/type_prop/matmul.cpp              |  2 +-
 src/plugins/intel_gpu/src/graph/program_node.cpp |  2 +-
 .../template/tests/functional/CMakeLists.txt     |  4 ----
 .../tests/functional/op_reference/grn.cpp        | 16 ++++++++--------
 .../tests/functional/op_reference/softmax.cpp    |  4 ++++
 .../include/behavior/plugin/caching_tests.hpp    |  3 +++
 7 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt
index e1dd3e60b7c1d1..6cd2bc51af1259 100644
--- a/src/core/tests/CMakeLists.txt
+++ b/src/core/tests/CMakeLists.txt
@@ -2,11 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    # 'argument': conversion from 'size_t' to 'int', possible loss of data
-    ie_add_compiler_flags(/wd4267)
-    ie_add_compiler_flags(/wd4244)
-endif()
 
 set(TARGET_NAME ov_core_unit_tests)
 
diff --git a/src/core/tests/type_prop/matmul.cpp b/src/core/tests/type_prop/matmul.cpp
index f7b6116a259adf..327e8574c53301 100644
--- a/src/core/tests/type_prop/matmul.cpp
+++ b/src/core/tests/type_prop/matmul.cpp
@@ -528,7 +528,7 @@ TEST(type_prop, matmul_propagate_labels_on_interval_dims) {
 }
 
 TEST(type_prop, matmul_propagate_label_on_b_input_after_reshape) {
-    constexpr size_t my_label = 2;
+    constexpr ov::label_t my_label = 2;
     auto marked_dim = Dimension(2, 3);
     ov::DimensionTracker::set_label(marked_dim, my_label);
 
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 6055cd23407f01..5b66ad11a25149 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -1215,4 +1215,4 @@ void program_node::init_onednn_primitive_attributes() {
 }
 
 
-#endif // ENABLE_ONEDNN_FOR_GPU
+#endif // ENABLE_ONEDNN_FOR_GPU
\ No newline at end of file
diff --git a/src/plugins/template/tests/functional/CMakeLists.txt b/src/plugins/template/tests/functional/CMakeLists.txt
index 6239c0e3fb599f..b45364d93c606f 100644
--- a/src/plugins/template/tests/functional/CMakeLists.txt
+++ b/src/plugins/template/tests/functional/CMakeLists.txt
@@ -6,11 +6,7 @@
 set(TARGET_NAME ov_template_func_tests)
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4244)
-    ie_add_compiler_flags(/wd4250)
     ie_add_compiler_flags(/wd4305)
-    ie_add_compiler_flags(/wd4756)
-    ie_add_compiler_flags(/wd4018)
 endif()
 
 ov_add_test_target(
diff --git a/src/plugins/template/tests/functional/op_reference/grn.cpp b/src/plugins/template/tests/functional/op_reference/grn.cpp
index 4c2b50e5d20b0d..4322da7cd3c921 100644
--- a/src/plugins/template/tests/functional/op_reference/grn.cpp
+++ b/src/plugins/template/tests/functional/op_reference/grn.cpp
@@ -59,17 +59,17 @@ std::vector<GrnParams> generateGrnParams(const element::Type& type) {
     using T = typename element_type_traits<IN_ET>::value_type;
     std::vector<GrnParams> grnParams {
         // bias 1e-6 // 2D // 3D // 4D
-        GrnParams(1e-6, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(1e-6f, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                   std::vector<T> {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}),
-        GrnParams(1e-6, PartialShape {2, 3, 4}, type,
+        GrnParams(1e-6f, PartialShape {2, 3, 4}, type,
                   std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                   std::vector<T> {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784,
                                   0.433574,  0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}),
-        GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type,
+        GrnParams(1e-6f, PartialShape {1, 2, 3, 4}, type,
                   std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                   std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
                                   0.997055,  0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}),
-        GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type,
+        GrnParams(1e-6f, PartialShape {2, 2, 3, 4}, type,
                   std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                   25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                   std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
@@ -77,17 +77,17 @@ std::vector<GrnParams> generateGrnParams(const element::Type& type) {
                                   0.559857,  0.564684, 0.56921,  0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6,
                                   0.828589,  0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}),
         // bias 100.25 // 2D // 3D // 4D
-        GrnParams(100.25, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(100.25f, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                   std::vector<T> {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}),
-        GrnParams(100.25, PartialShape {2, 3, 4}, type,
+        GrnParams(100.25f, PartialShape {2, 3, 4}, type,
                   std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                   std::vector<T> {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641,
                                   0.41125,   0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503,  0.547945, 0.664327, 0.662047, 0.659774, 0.657534}),
-        GrnParams(100.25, PartialShape {1, 2, 3, 4}, type,
+        GrnParams(100.25f, PartialShape {1, 2, 3, 4}, type,
                   std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                   std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
                                   0.790789,  0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}),
-        GrnParams(100.25, PartialShape {2, 2, 3, 4}, type,
+        GrnParams(100.25f, PartialShape {2, 2, 3, 4}, type,
                   std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                   25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                   std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
diff --git a/src/plugins/template/tests/functional/op_reference/softmax.cpp b/src/plugins/template/tests/functional/op_reference/softmax.cpp
index e0e1a3b1e2c0e3..293df1a3cd5887 100644
--- a/src/plugins/template/tests/functional/op_reference/softmax.cpp
+++ b/src/plugins/template/tests/functional/op_reference/softmax.cpp
@@ -10,6 +10,10 @@
 using namespace reference_tests;
 using namespace ov;
 
+#ifdef _WIN32
+#   pragma warning(disable : 4756)
+#endif  
+
 namespace {
 struct SoftmaxParams {
     template <class IT>
diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
index 0b4a22f03f2ab2..89b86ae7fe3dec 100644
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
@@ -94,6 +94,9 @@ class LoadNetworkCompiledKernelsCacheTest : virtual public LayerTestsUtils::Laye
         std::replace(test_name.begin(), test_name.end(), '\\', '_');
         cache_path = "LoadNetwork" + test_name + "_cache";
     }
+    void TearDown() override {
+        APIBaseTest::TearDown();
+    }
 };
 
 DISABLE_WARNING_MSVC_END(4250)

From 95636f7715097f2f0c5718491a8ac28c8248f0b0 Mon Sep 17 00:00:00 2001
From: Xuejun Zhai <xuejun.zhai@intel.com>
Date: Wed, 22 Mar 2023 11:35:24 +0800
Subject: [PATCH 022/296] [Unicode API] Add wide char for compiler model APIs
 (#16180)

* [Unicode API] Add wide char for compiler model APIs

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Avoid duplicated func description

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Fix format issue

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Add unite test for wstring of complie model

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Clear code

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Add unite test for other compile model unicode APIs

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Clear log output

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Add parameter of device for compiled model unicode test

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

---------

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>
---
 .../include/openvino/runtime/core.hpp         |  39 +++-
 src/inference/src/core.cpp                    |  14 ++
 .../behavior/ov_plugin/core_integration.hpp   | 180 ++++++++++++++++++
 3 files changed, 231 insertions(+), 2 deletions(-)

diff --git a/src/inference/include/openvino/runtime/core.hpp b/src/inference/include/openvino/runtime/core.hpp
index d761d8a2c52c33..3de1aad5ac0689 100644
--- a/src/inference/include/openvino/runtime/core.hpp
+++ b/src/inference/include/openvino/runtime/core.hpp
@@ -196,22 +196,29 @@ class OPENVINO_RUNTIME_API Core {
      * operation.
      *
      * @return A compiled model.
+     * @{
      */
     CompiledModel compile_model(const std::string& model_path, const AnyMap& properties = {});
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+    CompiledModel compile_model(const std::wstring& model_path, const AnyMap& properties = {});
+#endif
+    /// @}
+
     /**
-     * @brief Reads and loads a compiled model from IR / ONNX / PDPD file to the default OpenVINI device selected by
+     * @brief Reads and loads a compiled model from IR / ONNX / PDPD file to the default OpenVINO device selected by
      * AUTO plugin.
      *
      * This can be more efficient than using read_model + compile_model(Model) flow
      * especially for cases when caching is enabled and cached model is available
      *
      * @tparam Properties Should be the pack of `std::pair<std::string, ov::Any>` types
-     * @param model_path path to model
+     * @param model_path path to model with string or wstring
      * @param properties Optional pack of pairs: (property name, property value) relevant only for this
      * load operation
      *
      * @return A compiled model
+     * @{
      */
     template <typename... Properties>
     util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::string& model_path,
@@ -219,6 +226,15 @@ class OPENVINO_RUNTIME_API Core {
         return compile_model(model_path, AnyMap{std::forward<Properties>(properties)...});
     }
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+    template <typename... Properties>
+    util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::wstring& model_path,
+                                                                           Properties&&... properties) {
+        return compile_model(model_path, AnyMap{std::forward<Properties>(properties)...});
+    }
+#endif
+    /// @}
+
     /**
      * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD file.
      *
@@ -231,11 +247,19 @@ class OPENVINO_RUNTIME_API Core {
      * operation.
      *
      * @return A compiled model.
+     * @{
      */
     CompiledModel compile_model(const std::string& model_path,
                                 const std::string& device_name,
                                 const AnyMap& properties = {});
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+    CompiledModel compile_model(const std::wstring& model_path,
+                                const std::string& device_name,
+                                const AnyMap& properties = {});
+#endif
+    /// @}
+
     /**
      * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD file.
      *
@@ -249,6 +273,7 @@ class OPENVINO_RUNTIME_API Core {
      * load operation.
      *
      * @return A compiled model.
+     * @{
      */
     template <typename... Properties>
     util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::string& model_path,
@@ -257,6 +282,16 @@ class OPENVINO_RUNTIME_API Core {
         return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...});
     }
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+    template <typename... Properties>
+    util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::wstring& model_path,
+                                                                           const std::string& device_name,
+                                                                           Properties&&... properties) {
+        return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...});
+    }
+#endif
+    /// @}
+
     /**
      * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
      * @param model String with a model in IR/ONNX/PDPD format.
diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp
index 9da46ee74fae3e..0a2fba9072b6ff 100644
--- a/src/inference/src/core.cpp
+++ b/src/inference/src/core.cpp
@@ -131,6 +131,12 @@ CompiledModel Core::compile_model(const std::string& model_path, const AnyMap& c
     return compile_model(model_path, ov::DEFAULT_DEVICE_NAME, config);
 }
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+CompiledModel Core::compile_model(const std::wstring& model_path, const AnyMap& config) {
+    return compile_model(ov::util::wstring_to_string(model_path), config);
+}
+#endif
+
 CompiledModel Core::compile_model(const std::string& model_path, const std::string& device_name, const AnyMap& config) {
     OV_CORE_CALL_STATEMENT({
         auto exec = _impl->compile_model(model_path, device_name, config);
@@ -138,6 +144,14 @@ CompiledModel Core::compile_model(const std::string& model_path, const std::stri
     });
 }
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+CompiledModel Core::compile_model(const std::wstring& model_path,
+                                  const std::string& device_name,
+                                  const AnyMap& config) {
+    return compile_model(ov::util::wstring_to_string(model_path), device_name, config);
+}
+#endif
+
 CompiledModel Core::compile_model(const std::string& model,
                                   const ov::Tensor& weights,
                                   const std::string& device_name,
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index 96b09fe68f8889..e5421ea7b8c977 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -17,6 +17,7 @@
 #    define GTEST_COUT std::cerr << "[          ] [ INFO ] "
 #    include <codecvt>
 #    include <functional_test_utils/skip_tests_config.hpp>
+#    include "openvino/pass/manager.hpp"
 #endif
 
 namespace ov {
@@ -203,6 +204,185 @@ TEST(OVClassBasicTest, smoke_createMockEngineConfigThrows) {
 }
 
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+inline void generateModelFile() {
+    ov::pass::Manager manager;
+    manager.register_pass<ov::pass::Serialize>("test_model.xml", "test_model.bin");
+    auto function = ngraph::builder::subgraph::makeConvPoolReluNoReshapes({1, 3, 227, 227});
+    manager.run_passes(function);
+}
+
+TEST(OVClassBasicTest, compile_model_no_property_unicode) {
+    std::string model_xml_name = "test_model.xml";
+    std::string model_bin_name = "test_model.bin";
+    generateModelFile();
+    for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+        std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+        std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix);
+        std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix);
+        GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl;
+
+        try {
+            bool is_copy_successfully;
+            is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_xml_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelXmlPathW) << "'";
+            }
+
+            is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_bin_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelBinPathW) << "'";
+            }
+
+            ov::Core core = createCoreWithTemplate();
+
+            OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW));
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            GTEST_COUT << "OK" << std::endl;
+        } catch (const ov::Exception& e_next) {
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            CommonTestUtils::removeFile(model_xml_name);
+            CommonTestUtils::removeFile(model_bin_name);
+            FAIL() << e_next.what();
+        }
+    }
+    CommonTestUtils::removeFile(model_xml_name);
+    CommonTestUtils::removeFile(model_bin_name);
+}
+
+TEST(OVClassBasicTest, compile_model_with_property_unicode) {
+    std::string model_xml_name = "test_model.xml";
+    std::string model_bin_name = "test_model.bin";
+    generateModelFile();
+    for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+        std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+        std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix);
+        std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix);
+        GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl;
+
+        try {
+            bool is_copy_successfully;
+            is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_xml_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelXmlPathW) << "'";
+            }
+
+            is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_bin_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelBinPathW) << "'";
+            }
+
+            ov::Core core = createCoreWithTemplate();
+
+            OV_ASSERT_NO_THROW(
+                core.compile_model(modelXmlPathW, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)));
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            GTEST_COUT << "OK" << std::endl;
+        } catch (const ov::Exception& e_next) {
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            CommonTestUtils::removeFile(model_xml_name);
+            CommonTestUtils::removeFile(model_bin_name);
+            FAIL() << e_next.what();
+        }
+    }
+    CommonTestUtils::removeFile(model_xml_name);
+    CommonTestUtils::removeFile(model_bin_name);
+}
+
+TEST_P(OVClassBasicTestP, compile_model_with_device_no_property_unicode) {
+    std::string model_xml_name = "test_model.xml";
+    std::string model_bin_name = "test_model.bin";
+    generateModelFile();
+    for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+        std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+        std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix);
+        std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix);
+        GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl;
+        try {
+            bool is_copy_successfully;
+            is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_xml_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelXmlPathW) << "'";
+            }
+
+            is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_bin_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelBinPathW) << "'";
+            }
+
+            ov::Core core = createCoreWithTemplate();
+
+            OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW, target_device));
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            GTEST_COUT << "OK" << std::endl;
+        } catch (const ov::Exception& e_next) {
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            CommonTestUtils::removeFile(model_xml_name);
+            CommonTestUtils::removeFile(model_bin_name);
+            FAIL() << e_next.what();
+        }
+    }
+    CommonTestUtils::removeFile(model_xml_name);
+    CommonTestUtils::removeFile(model_bin_name);
+}
+
+TEST_P(OVClassBasicTestP, compile_model_with_device_with_property_unicode) {
+    std::string model_xml_name = "test_model.xml";
+    std::string model_bin_name = "test_model.bin";
+    generateModelFile();
+    for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) {
+        std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex];
+        std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix);
+        std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix);
+        GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl;
+
+        try {
+            bool is_copy_successfully;
+            is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_xml_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelXmlPathW) << "'";
+            }
+
+            is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW);
+            if (!is_copy_successfully) {
+                FAIL() << "Unable to copy from '" << model_bin_name << "' to '"
+                       << ::ov::util::wstring_to_string(modelBinPathW) << "'";
+            }
+
+            ov::Core core = createCoreWithTemplate();
+
+            OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW,
+                                                  target_device,
+                                                  ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)));
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            GTEST_COUT << "OK" << std::endl;
+        } catch (const ov::Exception& e_next) {
+            CommonTestUtils::removeFile(modelXmlPathW);
+            CommonTestUtils::removeFile(modelBinPathW);
+            CommonTestUtils::removeFile(model_xml_name);
+            CommonTestUtils::removeFile(model_bin_name);
+            FAIL() << e_next.what();
+        }
+    }
+    CommonTestUtils::removeFile(model_xml_name);
+    CommonTestUtils::removeFile(model_bin_name);
+}
+#endif
+
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPOR
 TEST_P(OVClassBasicTestP, smoke_registerPluginsXMLUnicodePath) {
     const std::string pluginXML = getPluginFile();
 

From a204b04faed3e482cafb7a929f93b876e487c207 Mon Sep 17 00:00:00 2001
From: River Li <river.li@intel.com>
Date: Wed, 22 Mar 2023 13:45:03 +0800
Subject: [PATCH 023/296] fix mem leak (#16456)

---
 samples/c/hello_classification/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/c/hello_classification/main.c b/samples/c/hello_classification/main.c
index b3748356ad2fd0..4fbf0c5f7a99b6 100644
--- a/samples/c/hello_classification/main.c
+++ b/samples/c/hello_classification/main.c
@@ -72,6 +72,7 @@ struct infer_result* tensor_to_infer_result(ov_tensor_t* tensor, size_t* result_
         results[i].probability = float_data[i];
     }
 
+    ov_shape_free(&output_shape);
     return results;
 }
 

From a71c83d366b98953c702a50725e3fdcd95dc95e0 Mon Sep 17 00:00:00 2001
From: "Min, Byungil" <byungil.min@intel.com>
Date: Wed, 22 Mar 2023 15:15:02 +0900
Subject: [PATCH 024/296] [GPU] Resolve eltwise kernel build failure (#16458)

Signed-off-by: Min, Byungil <byungil.min@intel.com>
---
 .../eltwise/eltwise_kernel_blocked_opt.cpp    | 44 +++++++++----------
 .../tests/test_cases/eltwise_gpu_test.cpp     |  7 ++-
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp
index ed520fb4596e44..0ef8692bb348c2 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp
@@ -12,9 +12,9 @@
 namespace kernel_selector {
 static inline bool InputHasFeatureBroadcast(const eltwise_params& params, const size_t op_num, const size_t input_idx);
 static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output);
-static inline int SelectVecSizeFromFormat(const eltwise_params& params, size_t index);
-static inline int GetInnerFeatureBlockSize(const eltwise_params& arg, size_t index);
-static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index);
+static inline int SelectVecSizeFromFormat(const DataTensor&);
+static inline int GetInnerFeatureBlockSize(const DataTensor&);
+static inline int GetInnerBatchBlockSize(const DataTensor&);
 static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params);
 
 
@@ -105,13 +105,13 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params, const optional_pa
         return false;
 
     for (size_t i = 0; i < ewParams.inputs.size(); i++) {
-        if ((SelectVecSizeFromFormat(ewParams, i) == 1) &&
+        if ((SelectVecSizeFromFormat(ewParams.inputs[i]) == 1) &&
             !IsBroadcastingPossibleInput(ewParams.inputs[i], ewParams.outputs[0])) {
             return false;
         }
     }
 
-    const auto vec_size = SelectVecSizeFromFormat(ewParams, 0);
+    const auto vec_size = SelectVecSizeFromFormat(ewParams.outputs[0]);
     const auto input0 = ewParams.inputs[0];
     const auto& output = ewParams.outputs[0];
     // Check that padding before features doesn't mis-align the blocks
@@ -148,7 +148,7 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params, const optional_pa
 }
 
 JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_params& params, bool /*use_vload*/) const {
-    const auto vec_size = SelectVecSizeFromFormat(params, 0);
+    const auto vec_size = SelectVecSizeFromFormat(params.outputs[0]);
     JitConstants jit = {};
     std::string vload_decls;
 
@@ -179,7 +179,7 @@ JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_param
             bool feature_broadcasting = (params.inputs[input_idx].Feature().v == 1 && params.outputs[0].Feature().v != 1);
             bool spatial_broadcasting = (params.inputs[input_idx].LogicalSize() == params.outputs[0].Feature().v &&
                                         params.inputs[input_idx].LogicalSize() == params.inputs[input_idx].Feature().v &&
-                                        GetInnerBatchBlockSize(params, input_idx) == 1 && !Padded(params.inputs[input_idx]));
+                                        GetInnerBatchBlockSize(params.inputs[input_idx]) == 1 && !Padded(params.inputs[input_idx]));
             bool full_tensor = (params.inputs[input_idx].LogicalSize() == params.outputs[0].LogicalSize() && !Padded(params.inputs[input_idx]));
 
             // Based on dimension, get a string of indexing for formmatted GET_INDEX
@@ -278,9 +278,9 @@ JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_param
 
 JitConstants EltwiseKernel_blocked_opt::GetJitConstants(const eltwise_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
-    const auto vec_size = SelectVecSizeFromFormat(params, 0);
-    const auto inner_feature_blk_size = GetInnerFeatureBlockSize(params, 0);
-    const auto inner_batch_blk_size = GetInnerBatchBlockSize(params, 0);
+    const auto vec_size = SelectVecSizeFromFormat(params.outputs[0]);
+    const auto inner_feature_blk_size = GetInnerFeatureBlockSize(params.outputs[0]);
+    const auto inner_batch_blk_size = GetInnerBatchBlockSize(params.outputs[0]);
 
     jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
     jit.AddConstant(MakeJitConstant("BLOCK_SIZE", vec_size));
@@ -376,7 +376,7 @@ EltwiseKernelBase::DispatchData EltwiseKernel_blocked_opt::SetDefault(const eltw
     // so that each global id can be an index of each work group.
     // It also makes an index for fomatted GET_INDEX macro if needed(e.g. feature broadcasting, fusing).
     KernelData kd = KernelData::Default<eltwise_params>(params);
-    dispatchData.gws = {std::max(CalculateTotalWorkItemCount(params) / SelectVecSizeFromFormat(params, 0), (size_t)1), 1, 1};
+    dispatchData.gws = {std::max(CalculateTotalWorkItemCount(params) / SelectVecSizeFromFormat(params.outputs[0]), (size_t)1), 1, 1};
     dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
 
     return dispatchData;
@@ -384,8 +384,8 @@ EltwiseKernelBase::DispatchData EltwiseKernel_blocked_opt::SetDefault(const eltw
 
 // Local
 static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params) {
-    auto feature = Align(params.outputs[0].Feature().v, GetInnerFeatureBlockSize(params, 0));
-    auto batch = Align(params.outputs[0].Batch().v, GetInnerBatchBlockSize(params, 0));
+    auto feature = Align(params.outputs[0].Feature().v, GetInnerFeatureBlockSize(params.outputs[0]));
+    auto batch = Align(params.outputs[0].Batch().v, GetInnerBatchBlockSize(params.outputs[0]));
     size_t spatial = 0;
     if (DataTensor::ChannelsCount(params.outputs[0].GetLayout()) == 5)
         spatial = params.outputs[0].X().v * params.outputs[0].Y().v * params.outputs[0].Z().v;
@@ -395,10 +395,10 @@ static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params) {
     return (feature * batch * spatial);
 }
 
-static inline int SelectVecSizeFromFormat(const eltwise_params& arg, size_t index) {
+static inline int SelectVecSizeFromFormat(const DataTensor& tensor) {
     // No feature inner block : not acceptable for calculation of ordered index
-    auto in_layout = arg.inputs[index].GetLayout();
-    switch (in_layout) {
+    auto layout = tensor.GetLayout();
+    switch (layout) {
     case DataLayout::b_fs_yx_fsv4:
         return 4;
     case DataLayout::b_fs_yx_fsv16:
@@ -419,9 +419,9 @@ static inline int SelectVecSizeFromFormat(const eltwise_params& arg, size_t inde
     }
 }
 
-static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index) {
-    auto in_layout = arg.inputs[index].GetLayout();
-    switch (in_layout) {
+static inline int GetInnerBatchBlockSize(const DataTensor& tensor) {
+    auto layout = tensor.GetLayout();
+    switch (layout) {
     case DataLayout::b_fs_yx_fsv4:
     case DataLayout::b_fs_yx_fsv16:
     case DataLayout::b_fs_zyx_fsv16:
@@ -445,9 +445,9 @@ static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index
     return 1;
 }
 
-static inline int GetInnerFeatureBlockSize(const eltwise_params& arg, size_t index) {
-    auto in_layout = arg.inputs[index].GetLayout();
-    switch (in_layout) {
+static inline int GetInnerFeatureBlockSize(const DataTensor& tensor) {
+    auto layout = tensor.GetLayout();
+    switch (layout) {
     case DataLayout::b_fs_yx_fsv4:
         return 4;
     case DataLayout::b_fs_yx_fsv16:
diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
index e3e9d06c3ce1a7..b1721f2b7f1ce3 100644
--- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
@@ -4331,10 +4331,11 @@ struct eltwise_random_test_param_generator : std::vector<eltwise_random_test_par
 
     eltwise_random_test_param_generator& broadcast_params(data_types type, format::type input_format, format::type output_format) {
         push_back(eltwise_random_test_params{ type, {1, 1, 48, 64},  {1, 10, 48, 64}, input_format, input_format, output_format, eltwise_mode::sum, false });
-        push_back(eltwise_random_test_params{ type, {1, 16, 8, 8},   {1, 1, 8, 8},  input_format, input_format, output_format, eltwise_mode::sum, false });
-        push_back(eltwise_random_test_params{ type, {1, 36, 8, 16},  {1, 36, 1, 1},  input_format, input_format, output_format, eltwise_mode::sum, false });
+        push_back(eltwise_random_test_params{ type, {1, 16, 8, 8},   {1, 1, 8, 8},    input_format, input_format, output_format, eltwise_mode::sum, false });
+        push_back(eltwise_random_test_params{ type, {1, 36, 8, 16},  {1, 36, 1, 1},   input_format, input_format, output_format, eltwise_mode::sum, false });
         push_back(eltwise_random_test_params{ type, {1, 36, 4, 4},   {1, 1, 4, 4},    input_format, input_format, output_format, eltwise_mode::sum, false });
         push_back(eltwise_random_test_params{ type, {1, 8, 4, 4},    {1, 1, 1, 1},    input_format, format::bfyx, output_format, eltwise_mode::sum, false });
+        push_back(eltwise_random_test_params{ type, {1, 1, 1, 1},    {1, 8, 4, 4},    input_format, format::bfyx, output_format, eltwise_mode::sum, false });
         return *this;
     }
 
@@ -4360,6 +4361,7 @@ struct eltwise_random_test_param_generator : std::vector<eltwise_random_test_par
     eltwise_random_test_param_generator& broadcast_params_zyx(data_types type, format::type input_format, format::type output_format) {
         push_back(eltwise_random_test_params{ type, {1, 1,  4, 4, 8},  {1, 10, 4, 4, 8}, input_format, input_format, output_format, eltwise_mode::sum, false });
         push_back(eltwise_random_test_params{ type, {1, 36, 8, 8, 16}, {1, 36, 1, 1, 1}, input_format, input_format, output_format, eltwise_mode::sum, false });
+        push_back(eltwise_random_test_params{ type, {1, 1,  1, 1, 1},  {1, 16, 4, 4, 2}, input_format, input_format, output_format, eltwise_mode::sum, false });
         return *this;
     }
 
@@ -4371,6 +4373,7 @@ struct eltwise_random_test_param_generator : std::vector<eltwise_random_test_par
     eltwise_random_test_param_generator& broadcast_params_bsv_zyx(data_types type, format::type input_format, format::type output_format) {
         push_back(eltwise_random_test_params{ type, {32, 1,  4, 4, 8}, {32, 10, 4, 4, 8}, input_format, input_format, output_format, eltwise_mode::sum, false });
         push_back(eltwise_random_test_params{ type, {32, 36, 4, 4, 4}, {32, 36, 1, 1, 1}, input_format, input_format, output_format, eltwise_mode::sum, false });
+        push_back(eltwise_random_test_params{ type, {1,  1,  1, 1, 1}, {32, 16, 4, 4, 4}, input_format, input_format, output_format, eltwise_mode::sum, false });
         return *this;
     }
 };

From 6cfea099d8266d6cca96967594fdeab095885ba9 Mon Sep 17 00:00:00 2001
From: Przemyslaw Wysocki <przemyslaw.wysocki@intel.com>
Date: Wed, 22 Mar 2023 07:22:44 +0100
Subject: [PATCH 025/296] [PyOV] Align Python API's attributes and methods
 between its modules (#15889)

* Complete alignment

* Minor change

* Apply discussion results

* Apply discussion comments

* Clang

* Apply CR

* Code style
---
 .../python/src/pyopenvino/core/core.cpp       | 15 +++++
 .../src/pyopenvino/core/infer_request.cpp     | 37 ++++++-----
 .../python/src/pyopenvino/graph/dimension.cpp | 17 +++++
 .../pyopenvino/graph/discrete_type_info.cpp   |  6 +-
 .../python/src/pyopenvino/graph/model.cpp     | 32 ++++++++++
 .../pyopenvino/graph/types/element_type.cpp   |  8 +++
 .../python/tests/test_runtime/test_core.py    | 12 ++--
 .../tests/test_runtime/test_dimension.py      | 63 +++++++++++++++++++
 .../tests/test_runtime/test_infer_request.py  | 16 +++--
 .../python/tests/test_runtime/test_model.py   | 57 ++++++++++++-----
 .../python/tests/test_runtime/test_type.py    | 16 +++++
 .../tests/test_utils/test_data_dispatch.py    |  4 +-
 12 files changed, 241 insertions(+), 42 deletions(-)
 create mode 100644 src/bindings/python/tests/test_runtime/test_dimension.py

diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp
index ef94b298c9480f..c110dcd5bd7776 100644
--- a/src/bindings/python/src/pyopenvino/core/core.cpp
+++ b/src/bindings/python/src/pyopenvino/core/core.cpp
@@ -583,6 +583,21 @@ void regclass_Core(py::module m) {
             :type extensions: list[openvino.runtime.Extension]
         )");
 
+    cls.def("get_available_devices",
+            &ov::Core::get_available_devices,
+            py::call_guard<py::gil_scoped_release>(),
+            R"(
+                Returns devices available for inference Core objects goes over all registered plugins.
+
+                GIL is released while running this function.
+
+                :returns: A list of devices. The devices are returned as: CPU, GPU.0, GPU.1, GNA...
+                    If there more than one device of specific type, they are enumerated with .# suffix.
+                    Such enumerated device can later be used as a device name in all Core methods like:
+                    compile_model, query_model, set_property and so on.
+                :rtype: list
+            )");
+
     cls.def_property_readonly("available_devices",
                               &ov::Core::get_available_devices,
                               py::call_guard<py::gil_scoped_release>(),
diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp
index eb71fd7f953bcf..585441569f9e77 100644
--- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp
+++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp
@@ -12,6 +12,7 @@
 
 #include "pyopenvino/core/common.hpp"
 #include "pyopenvino/core/containers.hpp"
+#include "pyopenvino/utils/utils.hpp"
 
 PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap);
 PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap);
@@ -647,21 +648,29 @@ void regclass_InferRequest(py::module m) {
             :rtype: List[openvino.runtime.ConstOutput]
         )");
 
-    cls.def_property_readonly("inputs",
-                              &InferRequestWrapper::get_input_tensors,
-                              R"(
-                                Gets all input tensors of this InferRequest.
-                                
-                                :rtype: List[openvino.runtime.Tensor]
-                              )");
+    cls.def_property_readonly(
+        "inputs",
+        [](InferRequestWrapper& self) {
+            Common::utils::deprecation_warning("inputs", "2024.0", "Please use 'input_tensors' property instead.");
+            return self.get_input_tensors();
+        },
+        R"(
+            Gets all input tensors of this InferRequest.
+            
+            :rtype: List[openvino.runtime.Tensor]
+            )");
 
-    cls.def_property_readonly("outputs",
-                              &InferRequestWrapper::get_output_tensors,
-                              R"(
-                                Gets all output tensors of this InferRequest.
-                                
-                                :rtype: List[openvino.runtime.Tensor]
-                              )");
+    cls.def_property_readonly(
+        "outputs",
+        [](InferRequestWrapper& self) {
+            Common::utils::deprecation_warning("outputs", "2024.0", "Please use 'output_tensors' property instead.");
+            return self.get_output_tensors();
+        },
+        R"(
+            Gets all output tensors of this InferRequest.
+            
+            :rtype: List[openvino.runtime.Tensor]
+            )");
 
     cls.def_property_readonly("input_tensors",
                               &InferRequestWrapper::get_input_tensors,
diff --git a/src/bindings/python/src/pyopenvino/graph/dimension.cpp b/src/bindings/python/src/pyopenvino/graph/dimension.cpp
index fc98155c95ae36..b8e6241bad4a07 100644
--- a/src/bindings/python/src/pyopenvino/graph/dimension.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/dimension.cpp
@@ -91,6 +91,15 @@ void regclass_graph_Dimension(py::module m) {
                 Return this dimension's min_dimension as integer.
                 This dimension must be dynamic and non-negative.
 
+                :return: Value of the dimension.
+                :rtype: int
+            )");
+    dim.def_property_readonly("min_length",
+                              &ov::Dimension::get_min_length,
+                              R"(
+                Return this dimension's min_dimension as integer.
+                This dimension must be dynamic and non-negative.
+
                 :return: Value of the dimension.
                 :rtype: int
             )");
@@ -103,7 +112,15 @@ void regclass_graph_Dimension(py::module m) {
                 :return: Value of the dimension.
                 :rtype: int
             )");
+    dim.def_property_readonly("max_length",
+                              &ov::Dimension::get_max_length,
+                              R"(
+                Return this dimension's max_dimension as integer.
+                This dimension must be dynamic and non-negative.
 
+                :return: Value of the dimension.
+                :rtype: int
+            )");
     dim.def("same_scheme",
             &ov::Dimension::same_scheme,
             py::arg("dim"),
diff --git a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
index 74b81eb62a45cf..dca43473035be7 100644
--- a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
@@ -9,6 +9,7 @@
 #include <pybind11/stl_bind.h>
 
 #include "openvino/core/type.hpp"
+#include "pyopenvino/utils/utils.hpp"
 
 namespace py = pybind11;
 
@@ -29,7 +30,10 @@ void regclass_graph_DiscreteTypeInfo(py::module m) {
     discrete_type_info.def_readonly("version_id", &ov::DiscreteTypeInfo::version_id);
     discrete_type_info.def_readonly("parent", &ov::DiscreteTypeInfo::parent);
 
-    discrete_type_info.def("get_version", &ov::DiscreteTypeInfo::get_version);
+    discrete_type_info.def("get_version", []() {
+        Common::utils::deprecation_warning("get_version()", "2024.0", "Please use version attribute instead.");
+        return &ov::DiscreteTypeInfo::get_version;
+    });
     discrete_type_info.def("hash", [](const ov::DiscreteTypeInfo& self) {
         return self.hash();
     });
diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp
index 6c898713f0647e..f95801bdb11258 100644
--- a/src/bindings/python/src/pyopenvino/graph/model.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/model.cpp
@@ -480,6 +480,14 @@ void regclass_graph_Model(py::module m) {
                     :return: ParameterVector containing model parameters.
                     :rtype: ParameterVector
                  )");
+    model.def_property_readonly("parameters",
+                                &ov::Model::get_parameters,
+                                R"(
+                                        Return the model parameters.
+                                        
+                                        :return: ParameterVector containing model parameters.
+                                        :rtype: ParameterVector
+                                    )");
     model.def("get_results",
               &ov::Model::get_results,
               R"(
@@ -488,6 +496,14 @@ void regclass_graph_Model(py::module m) {
                     :return: ResultVector containing model parameters.
                     :rtype: ResultVector
                  )");
+    model.def_property_readonly("results",
+                                &ov::Model::get_results,
+                                R"(
+                                        Return a list of model outputs.
+
+                                        :return: ResultVector containing model parameters.
+                                        :rtype: ResultVector
+                                    )");
     model.def("get_result",
               &ov::Model::get_result,
               R"(
@@ -496,6 +512,14 @@ void regclass_graph_Model(py::module m) {
                     :return: Node object representing result.
                     :rtype: openvino.runtime.Node
                  )");
+    model.def_property_readonly("result",
+                                &ov::Model::get_result,
+                                R"(
+                                        Return single result.
+
+                                        :return: Node object representing result.
+                                        :rtype: openvino.runtime.Node
+                                    )");
     model.def("get_result_index",
               (int64_t(ov::Model::*)(const ov::Output<ov::Node>&) const) & ov::Model::get_result_index,
               py::arg("value"),
@@ -561,6 +585,14 @@ void regclass_graph_Model(py::module m) {
 
                     :rtype: bool
                  )");
+    model.def_property_readonly("dynamic",
+                                &ov::Model::is_dynamic,
+                                R"(
+                                        Returns true if any of the op's defined in the model
+                                        contains partial shape.
+
+                                        :rtype: bool
+                                    )");
     model.def("input", (ov::Output<ov::Node>(ov::Model::*)()) & ov::Model::input);
 
     model.def("input", (ov::Output<ov::Node>(ov::Model::*)(size_t)) & ov::Model::input, py::arg("index"));
diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp
index 0123dd780e2c1c..41524dfd1b6bad 100644
--- a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp
@@ -68,11 +68,17 @@ void regclass_graph_Type(py::module m) {
     type.def("is_static", &ov::element::Type::is_static);
     type.def("is_dynamic", &ov::element::Type::is_dynamic);
     type.def("is_real", &ov::element::Type::is_real);
+    type.def_property_readonly("real", &ov::element::Type::is_real);
     type.def("is_integral", &ov::element::Type::is_integral);
+    type.def_property_readonly("integral", &ov::element::Type::is_integral);
     type.def("is_integral_number", &ov::element::Type::is_integral_number);
+    type.def_property_readonly("integral_number", &ov::element::Type::is_integral_number);
     type.def("is_signed", &ov::element::Type::is_signed);
+    type.def_property_readonly("signed", &ov::element::Type::is_signed);
     type.def("is_quantized", &ov::element::Type::is_quantized);
+    type.def_property_readonly("quantized", &ov::element::Type::is_quantized);
     type.def("get_type_name", &ov::element::Type::get_type_name);
+    type.def_property_readonly("type_name", &ov::element::Type::get_type_name);
     type.def("compatible",
              &ov::element::Type::compatible,
              py::arg("other"),
@@ -121,5 +127,7 @@ void regclass_graph_Type(py::module m) {
         )");
 
     type.def_property_readonly("size", &ov::element::Type::size);
+    type.def("get_size", &ov::element::Type::size);
     type.def_property_readonly("bitwidth", &ov::element::Type::bitwidth);
+    type.def("get_bitwidth", &ov::element::Type::bitwidth);
 }
diff --git a/src/bindings/python/tests/test_runtime/test_core.py b/src/bindings/python/tests/test_runtime/test_core.py
index 15c6a2ed553eb1..87709aa443316c 100644
--- a/src/bindings/python/tests/test_runtime/test_core.py
+++ b/src/bindings/python/tests/test_runtime/test_core.py
@@ -176,11 +176,13 @@ def test_get_version(device):
 
 def test_available_devices(device):
     core = Core()
-    devices = core.available_devices
-    assert device in devices, (
-        f"Current device '{device}' is not listed in "
-        f"available devices '{', '.join(devices)}'"
-    )
+    devices_attr = core.available_devices
+    devices_method = core.get_available_devices()
+    for devices in (devices_attr, devices_method):
+        assert device in devices, (
+            f"Current device '{device}' is not listed in "
+            f"available devices '{', '.join(devices)}'"
+        )
 
 
 def test_get_property(device):
diff --git a/src/bindings/python/tests/test_runtime/test_dimension.py b/src/bindings/python/tests/test_runtime/test_dimension.py
new file mode 100644
index 00000000000000..697e11555590b0
--- /dev/null
+++ b/src/bindings/python/tests/test_runtime/test_dimension.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino.runtime import Dimension
+
+
+def test_dynamic_dimension():
+    dim = Dimension()
+    assert dim.is_dynamic
+    assert str(dim) == "?"
+    assert dim.to_string() == "?"
+    assert str(dim.__repr__) == "<bound method PyCapsule.__repr__ of <Dimension: ?>>"
+
+
+def test_dynamic_dimension_with_bounds():
+    dim = Dimension(2, 5)
+    assert str(dim) == "2..5"
+    assert dim.to_string() == "2..5"
+    assert not dim.is_static
+    assert dim.is_dynamic
+    assert dim.get_min_length() == 2
+    assert dim.min_length == 2
+    assert dim.get_max_length() == 5
+    assert dim.max_length == 5
+
+
+def test_static_dimension():
+    dim = Dimension(2)
+    assert str(dim) == "2"
+    assert dim.to_string() == "2"
+    assert dim.is_static
+    assert not dim.is_dynamic
+    assert len(dim) == 2
+    assert dim.get_length() == 2
+
+
+def test_dim_same_scheme():
+    assert Dimension().same_scheme(Dimension()) is True
+    assert Dimension(3).same_scheme(Dimension(3)) is True
+    assert Dimension(3).same_scheme(Dimension(4)) is False
+    assert Dimension().same_scheme(Dimension(4)) is False
+
+
+def test_dim_compatible():
+    assert Dimension().compatible(Dimension()) is True
+    assert Dimension(3).compatible(Dimension(3)) is True
+    assert Dimension(3).compatible(Dimension(4)) is False
+    assert Dimension().compatible(Dimension(4)) is True
+
+
+def test_dim_relax():
+    assert Dimension().relaxes(Dimension()) is True
+    assert Dimension(3).relaxes(Dimension(3)) is True
+    assert Dimension(3).relaxes(Dimension(4)) is False
+    assert Dimension().relaxes(Dimension(4)) is True
+
+
+def test_dim_refine():
+    assert Dimension().refines(Dimension()) is True
+    assert Dimension(3).refines(Dimension(3)) is True
+    assert Dimension(3).refines(Dimension(4)) is False
+    assert Dimension().refines(Dimension(4)) is False
diff --git a/src/bindings/python/tests/test_runtime/test_infer_request.py b/src/bindings/python/tests/test_runtime/test_infer_request.py
index 4540e9d7ebe8d9..4755fde6a77ca5 100644
--- a/src/bindings/python/tests/test_runtime/test_infer_request.py
+++ b/src/bindings/python/tests/test_runtime/test_infer_request.py
@@ -267,7 +267,7 @@ def test_batched_tensors(device):
             assert np.array_equal(actual[idx], _tmp)
 
 
-def test_inputs_outputs_property(device):
+def test_inputs_outputs_property_and_method(device):
     num_inputs = 10
     input_shape = [1]
     params = [ops.parameter(input_shape, np.uint8) for _ in range(num_inputs)]
@@ -277,10 +277,14 @@ def test_inputs_outputs_property(device):
     request = compiled_model.create_infer_request()
     data = [np.atleast_1d(i) for i in range(num_inputs)]
     results = request.infer(data).values()
-    for result, output_tensor in zip(results, request.outputs):
+    for result, output_tensor in zip(results, request.output_tensors):
         assert np.array_equal(result, output_tensor.data)
-    for input_data, input_tensor in zip(data, request.inputs):
+    for input_data, input_tensor in zip(data, request.input_tensors):
         assert np.array_equal(input_data, input_tensor.data)
+    for input_tensor in request.input_tensors:
+        assert list(input_tensor.get_shape()) == input_shape
+    for output_tensor in request.output_tensors:
+        assert list(output_tensor.get_shape()) == input_shape
 
 
 @pytest.mark.skip(reason="Sporadically failed. Need further investigation. Ticket - 95967")
@@ -395,7 +399,7 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype, shared_flag):
 
     request.infer([tensor1, array1], shared_memory=shared_flag)
 
-    assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
+    assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1)))
 
 
 @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
@@ -419,8 +423,7 @@ def test_async_mixed_values(device, ov_type, numpy_dtype, shared_flag):
 
     request.start_async([tensor1, array1], shared_memory=shared_flag)
     request.wait()
-
-    assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
+    assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1)))
 
 
 @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
@@ -512,6 +515,7 @@ def test_infer_queue_iteration(device):
     it = iter(infer_queue)
     infer_request = next(it)
     assert isinstance(infer_request, InferRequest)
+    assert infer_request.userdata is None
     with pytest.raises(StopIteration):
         next(it)
 
diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py
index b13e5015275403..20f58201ee84e3 100644
--- a/src/bindings/python/tests/test_runtime/test_model.py
+++ b/src/bindings/python/tests/test_runtime/test_model.py
@@ -48,8 +48,10 @@ def test_function_add_outputs_tensor_name():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     new_outs = function.add_outputs("relu_t1")
     assert len(function.get_results()) == 2
+    assert len(function.results) == 2
     assert "relu_t1" in function.outputs[1].get_tensor().names
     assert len(new_outs) == 1
     assert new_outs[0].get_node() == function.outputs[1].get_node()
@@ -64,8 +66,10 @@ def test_function_add_outputs_op_name():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     new_outs = function.add_outputs(("relu1", 0))
     assert len(function.get_results()) == 2
+    assert len(function.results) == 2
     assert len(new_outs) == 1
     assert new_outs[0].get_node() == function.outputs[1].get_node()
     assert new_outs[0].get_index() == function.outputs[1].get_index()
@@ -78,9 +82,9 @@ def test_function_add_output_port():
     relu1.get_output_tensor(0).set_names({"relu_t1"})
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
-    assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     new_outs = function.add_outputs(relu1.output(0))
-    assert len(function.get_results()) == 2
+    assert len(function.results) == 2
     assert len(new_outs) == 1
     assert new_outs[0].get_node() == function.outputs[1].get_node()
     assert new_outs[0].get_index() == function.outputs[1].get_index()
@@ -94,6 +98,7 @@ def test_function_add_output_incorrect_tensor_name():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     with pytest.raises(RuntimeError) as e:
         function.add_outputs("relu_t")
     # Verify that absent output name is present in error message
@@ -108,6 +113,7 @@ def test_function_add_output_incorrect_idx():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     with pytest.raises(RuntimeError) as e:
         function.add_outputs(("relu1", 1234))
     # Verify that op name and port number are present in error message
@@ -123,6 +129,7 @@ def test_function_add_output_incorrect_name():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     with pytest.raises(RuntimeError) as e:
         function.add_outputs(("relu_1", 0))
     # Verify that absent op name is present in error message
@@ -139,8 +146,10 @@ def test_add_outputs_several_tensors():
     relu3 = ops.relu(relu2, name="relu3")
     function = Model(relu3, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     new_outs = function.add_outputs(["relu_t1", "relu_t2"])
     assert len(function.get_results()) == 3
+    assert len(function.results) == 3
     assert len(new_outs) == 2
     assert new_outs[0].get_node() == function.outputs[1].get_node()
     assert new_outs[0].get_index() == function.outputs[1].get_index()
@@ -158,8 +167,10 @@ def test_add_outputs_several_ports():
     relu3 = ops.relu(relu2, name="relu3")
     function = Model(relu3, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     new_outs = function.add_outputs([("relu1", 0), ("relu2", 0)])
     assert len(function.get_results()) == 3
+    assert len(function.results) == 3
     assert len(new_outs) == 2
     assert new_outs[0].get_node() == function.outputs[1].get_node()
     assert new_outs[0].get_index() == function.outputs[1].get_index()
@@ -175,6 +186,7 @@ def test_add_outputs_incorrect_value():
     relu2 = ops.relu(relu1, name="relu2")
     function = Model(relu2, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     with pytest.raises(TypeError) as e:
         function.add_outputs(0)
     assert "Incorrect type of a value to add as output." in str(e.value)
@@ -187,6 +199,7 @@ def test_add_outputs_incorrect_outputs_list():
     relu1.get_output_tensor(0).set_names({"relu_t1"})
     function = Model(relu1, [param], "TestFunction")
     assert len(function.get_results()) == 1
+    assert len(function.results) == 1
     with pytest.raises(TypeError) as e:
         function.add_outputs([0, 0])
     assert "Incorrect type of a value to add as output at index 0" in str(e.value)
@@ -283,6 +296,9 @@ def test_get_batch():
     param = model.get_parameters()[0]
     param.set_layout(Layout("NC"))
     assert get_batch(model) == 2
+    param = model.parameters[0]
+    param.set_layout(Layout("NC"))
+    assert get_batch(model) == 2
 
 
 def test_get_batch_chwn():
@@ -292,41 +308,53 @@ def test_get_batch_chwn():
     add = ops.add(param1, param2)
     add2 = ops.add(add, param3)
     model = Model(add2, [param1, param2, param3], "TestFunction")
-    param = model.get_parameters()[0]
-    param.set_layout(Layout("CHWN"))
+    param_method = model.get_parameters()[0]
+    param_attr = model.parameters[0]
+    param_method.set_layout(Layout("CHWN"))
+    param_attr.set_layout(Layout("CHWN"))
     assert get_batch(model) == 4
 
 
 def test_set_batch_dimension():
     model = generate_add_model()
-    model_param1 = model.get_parameters()[0]
-    model_param2 = model.get_parameters()[1]
+    model_param1_method = model.get_parameters()[0]
+    model_param2_method = model.get_parameters()[1]
+    model_param1_attr = model.parameters[0]
+    model_param2_attr = model.parameters[1]
     # check batch == 2
-    model_param1.set_layout(Layout("NC"))
+    model_param1_method.set_layout(Layout("NC"))
+    model_param1_attr.set_layout(Layout("NC"))
     assert get_batch(model) == 2
     # set batch to 1
     set_batch(model, Dimension(1))
     assert get_batch(model) == 1
     # check if shape of param 1 has changed
-    assert model_param1.get_output_shape(0) == PartialShape([1, 1])
+    assert model_param1_method.get_output_shape(0) == PartialShape([1, 1])
+    assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1])
     # check if shape of param 2 has not changed
-    assert model_param2.get_output_shape(0) == PartialShape([2, 1])
+    assert model_param2_method.get_output_shape(0) == PartialShape([2, 1])
+    assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1])
 
 
 def test_set_batch_int():
     model = generate_add_model()
-    model_param1 = model.get_parameters()[0]
-    model_param2 = model.get_parameters()[1]
+    model_param1_method = model.get_parameters()[0]
+    model_param2_method = model.get_parameters()[1]
+    model_param1_attr = model.parameters[0]
+    model_param2_attr = model.parameters[1]
     # check batch == 2
-    model_param1.set_layout(Layout("NC"))
+    model_param1_method.set_layout(Layout("NC"))
+    model_param1_attr.set_layout(Layout("NC"))
     assert get_batch(model) == 2
     # set batch to 1
     set_batch(model, 1)
     assert get_batch(model) == 1
     # check if shape of param 1 has changed
-    assert model_param1.get_output_shape(0) == PartialShape([1, 1])
+    assert model_param1_method.get_output_shape(0) == PartialShape([1, 1])
+    assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1])
     # check if shape of param 2 has not changed
-    assert model_param2.get_output_shape(0) == PartialShape([2, 1])
+    assert model_param2_method.get_output_shape(0) == PartialShape([2, 1])
+    assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1])
 
 
 def test_set_batch_default_batch_size():
@@ -335,6 +363,7 @@ def test_set_batch_default_batch_size():
     model_param1.set_layout(Layout("NC"))
     set_batch(model)
     assert model.is_dynamic()
+    assert model.dynamic
 
 
 def test_reshape_with_ports():
diff --git a/src/bindings/python/tests/test_runtime/test_type.py b/src/bindings/python/tests/test_runtime/test_type.py
index b31f36edf9f7e2..28ed0708d7f3e0 100644
--- a/src/bindings/python/tests/test_runtime/test_type.py
+++ b/src/bindings/python/tests/test_runtime/test_type.py
@@ -64,11 +64,18 @@ def test_basic_ovtypes(ovtype,
     assert ovtype.is_static() is static_flag
     assert ovtype.is_dynamic() is dynamic_flag
     assert ovtype.is_real() is real_flag
+    assert ovtype.real is real_flag
     assert ovtype.is_integral() is integral_flag
+    assert ovtype.integral is integral_flag
     assert ovtype.is_signed() is signed_flag
+    assert ovtype.signed is signed_flag
     assert ovtype.is_quantized() is quantized_flag
+    assert ovtype.quantized is quantized_flag
     assert ovtype.get_type_name() == type_name
+    assert ovtype.type_name == type_name
+    assert ovtype.get_size() == type_size
     assert ovtype.size == type_size
+    assert ovtype.get_bitwidth() == type_bitwidth
     assert ovtype.bitwidth == type_bitwidth
 
 
@@ -77,15 +84,22 @@ def test_undefined_ovtype():
     assert ov_type.is_static() is True
     assert ov_type.is_dynamic() is False
     assert ov_type.is_real() is False
+    assert ov_type.real is False
     assert ov_type.is_integral() is True
+    assert ov_type.integral is True
     assert ov_type.is_signed() is False
+    assert ov_type.signed is False
     assert ov_type.is_quantized() is False
+    assert ov_type.quantized is False
     assert ov_type.get_type_name() == "undefined"
+    assert ov_type.type_name == "undefined"
+    assert ov_type.get_size() == 0
     assert ov_type.size == 0
 
     # Note: might depend on the system
     import sys
     assert ov_type.bitwidth == sys.maxsize * 2 + 1
+    assert ov_type.get_bitwidth() == sys.maxsize * 2 + 1
 
 
 def test_dynamic_ov_type():
@@ -98,7 +112,9 @@ def test_dynamic_ov_type():
     assert ov_type.is_quantized() is False
     assert ov_type.get_type_name() == "dynamic"
     assert ov_type.size == 0
+    assert ov_type.get_size() == 0
     assert ov_type.bitwidth == 0
+    assert ov_type.get_bitwidth() == 0
 
 
 @pytest.mark.parametrize(("ovtype_one", "ovtype_two", "expected"), [
diff --git a/src/bindings/python/tests/test_utils/test_data_dispatch.py b/src/bindings/python/tests/test_utils/test_data_dispatch.py
index 254cf890458bb8..fad863f61a52e8 100644
--- a/src/bindings/python/tests/test_utils/test_data_dispatch.py
+++ b/src/bindings/python/tests/test_utils/test_data_dispatch.py
@@ -157,8 +157,8 @@ def test_ndarray_copied_dispatcher(device, input_shape):
     result, infer_request = _run_dispatcher(device, test_data, False, input_shape)
 
     assert result == {}
-    assert np.array_equal(infer_request.inputs[0].data, test_data)
+    assert np.array_equal(infer_request.input_tensors[0].data, test_data)
 
     test_data[0] = 2.0
 
-    assert not np.array_equal(infer_request.inputs[0].data, test_data)
+    assert not np.array_equal(infer_request.input_tensors[0].data, test_data)

From 04a2c4ce61917a381cc963e8b2c783ac0b62dcba Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.park@intel.com>
Date: Wed, 22 Mar 2023 16:38:28 +0900
Subject: [PATCH 026/296] [GPU] Add shape agnostic optimized FullyConnectedIMAD
 kernel (#16417)

* [GPU] Added shape agnostic kernel for fully_connected_gpu_imad

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Add fully_connected_gpu_imad shape agnostic TCs for ov_gpu_unit_tests

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Apply comments

Signed-off-by: Andrew Park <andrew.park@intel.com>

---------

Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../cl_kernels/fully_connected_gpu_imad.cl    | 64 ++++++++++++++---
 .../fully_connected_kernel_imad.cpp           | 71 +++++++++++--------
 .../test_cases/fully_connected_gpu_test.cpp   | 25 +++++++
 3 files changed, 121 insertions(+), 39 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl
index 4fb15bdcc06d8a..499c153da69c0b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl
@@ -17,6 +17,7 @@
 
 REQD_SUB_GROUP_SIZE(SIMD_SIZE)
 KERNEL(fully_connected_gpu_imad)(
+    OPTIONAL_SHAPE_INFO_ARG
     const __global INPUT0_TYPE* input,
     __global OUTPUT_TYPE* output,
     const __global FILTER_TYPE* weights
@@ -36,19 +37,34 @@ KERNEL(fully_connected_gpu_imad)(
 #if HAS_OFM_LEFTOVERS || HAS_IFM_LEFTOVERS
     const uint sglid = get_sub_group_local_id();
 #endif
+#if IS_DYNAMIC
+    // In dynamic kernel, TILE_BATCH is set to the initial tile batch size for stack arrays such as dotProd
+    // and tile_batch is calculated as an adjusted value from tile_batch_max_size by given global work size
+#if OUTPUT_3D
+    const uint tile_batch = OUTPUT_FEATURE_NUM / (uint)get_global_size(2);
+#else
+    const uint tile_batch = OUTPUT_BATCH_NUM / (uint)get_global_size(1);
+#endif
+#else
+    const uint tile_batch = TILE_BATCH;
+#endif
 
 #if OUTPUT_3D
     const uint batch = (uint)get_global_id(1);
-    const uint skip_f = (uint)get_global_id(2) * TILE_BATCH;
+    const uint skip_f = (uint)get_global_id(2) * tile_batch;
 #else
-    const uint batch = (uint)get_global_id(1) * TILE_BATCH;
+    const uint batch = (uint)get_global_id(1) * tile_batch;
     const uint skip_f = (uint)get_global_id(2);
 #endif
 
     // Accumulators initialization
     MAKE_VECTOR_TYPE(int, TILE_OFM) dotProd[TILE_BATCH];
     MAKE_VECTOR_TYPE(uint, TILE_OFM) idx_w;
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
             dotProd[ob_idx][of_idx] = 0;
         #if !HAS_IFM_LEFTOVERS
@@ -103,7 +119,11 @@ KERNEL(fully_connected_gpu_imad)(
     #endif // HAS_OFM_LEFTOVERS
         }
 
-        unroll_for(uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+    #if IS_DYNAMIC
+        for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+    #else
+        unroll_for(uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+    #endif
             // Loading inputs
         #if OUTPUT_3D
             __global INPUT0_TYPE* current_input = &input[INPUT0_GET_INDEX(batch, skip_f + ob_idx, 0, 0)];
@@ -172,7 +192,11 @@ KERNEL(fully_connected_gpu_imad)(
         }
     }
 
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         // Loading inputs
     #if OUTPUT_3D
         __global INPUT0_TYPE* current_input = &input[INPUT0_GET_INDEX(batch, skip_f + ob_idx, 0, 0)];
@@ -221,7 +245,11 @@ KERNEL(fully_connected_gpu_imad)(
 #if BIAS_TERM
     #if BIAS_PER_OUTPUT
         MAKE_VECTOR_TYPE(uint, TILE_OFM) bias_index[TILE_BATCH];
-        unroll_for(uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+    #if IS_DYNAMIC
+        for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+    #else
+        unroll_for(uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+    #endif
             unroll_for (uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
             #if OUTPUT_3D
                 bias_index[ob_idx][of_idx] = GET_DATA_INDEX(BIAS, batch, skip_f + ob_idx, feature + of_idx * SIMD_SIZE, 0);
@@ -238,7 +266,11 @@ KERNEL(fully_connected_gpu_imad)(
     #endif
 
     MAKE_VECTOR_TYPE(float, TILE_OFM) dequantized[TILE_BATCH];
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
         #if HAS_OFM_LEFTOVERS
             if (feature + of_idx * SIMD_SIZE < OF_NUMBER)
@@ -252,7 +284,11 @@ KERNEL(fully_connected_gpu_imad)(
     }
 #else
     MAKE_VECTOR_TYPE(float, TILE_OFM) dequantized[TILE_BATCH];
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
         #if HAS_OFM_LEFTOVERS
             if (feature + of_idx * SIMD_SIZE < OF_NUMBER)
@@ -263,7 +299,11 @@ KERNEL(fully_connected_gpu_imad)(
 #endif
 
 #if HAS_FUSED_OPS
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
         #if HAS_OFM_LEFTOVERS
             if (feature + of_idx * SIMD_SIZE < OF_NUMBER) {
@@ -282,7 +322,11 @@ KERNEL(fully_connected_gpu_imad)(
         }
     }
 #else
-    unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) {
+#if IS_DYNAMIC
+    for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#else
+    unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) {
+#endif
         unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) {
         #if HAS_OFM_LEFTOVERS
             if (feature + of_idx * SIMD_SIZE < OF_NUMBER) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp
index 2a6ebbbd45ec32..a38a7af0c77cb0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp
@@ -42,6 +42,7 @@ ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const {
     k.EnableTensorPitches();
     k.EnableBatching();
     k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.EnableDynamicShapesSupport();
     return k;
 }
 
@@ -57,23 +58,25 @@ DeviceFeaturesKey FullyConnectedKernelIMAD::get_required_device_features_key(con
 
 FullyConnectedKernelIMAD::Parent::DispatchData FullyConnectedKernelIMAD::SetDefault(const fully_connected_params& params, int) const {
     auto dispatchData = Parent::SetDefault(params);
-    auto tuning_data = GetTuningParams(params);
 
-    if (params.outputs[0].GetLayout() == DataLayout::bfyx) {
-        dispatchData.gws[0] = RoundUp(params.outputs[0].Y().v, tuning_data.sub_group_size * tuning_data.tile_ofm) /
-                              tuning_data.tile_ofm * tuning_data.slm_div_factor;
-        dispatchData.gws[1] = params.outputs[0].Batch().v;
-        dispatchData.gws[2] = params.outputs[0].Feature().v / tuning_data.tile_batch;
-    } else {
-        dispatchData.gws[0] = RoundUp(params.outputs[0].Feature().v, tuning_data.sub_group_size * tuning_data.tile_ofm) /
-                              tuning_data.tile_ofm * tuning_data.slm_div_factor;
-        dispatchData.gws[1] = params.outputs[0].Batch().v / tuning_data.tile_batch;
-        dispatchData.gws[2] = 1;
-    }
+    if (!params.has_dynamic_tensors()) {
+        auto tuning_data = GetTuningParams(params);
+        if (params.outputs[0].GetLayout() == DataLayout::bfyx) {
+            dispatchData.gws[0] = RoundUp(params.outputs[0].Y().v, tuning_data.sub_group_size * tuning_data.tile_ofm) /
+                                  tuning_data.tile_ofm * tuning_data.slm_div_factor;
+            dispatchData.gws[1] = params.outputs[0].Batch().v;
+            dispatchData.gws[2] = params.outputs[0].Feature().v / tuning_data.tile_batch;
+        } else {
+            dispatchData.gws[0] = RoundUp(params.outputs[0].Feature().v, tuning_data.sub_group_size * tuning_data.tile_ofm) /
+                                  tuning_data.tile_ofm * tuning_data.slm_div_factor;
+            dispatchData.gws[1] = params.outputs[0].Batch().v / tuning_data.tile_batch;
+            dispatchData.gws[2] = 1;
+        }
 
-    dispatchData.lws[0] = tuning_data.work_group_size;
-    dispatchData.lws[1] = 1;
-    dispatchData.lws[2] = 1;
+        dispatchData.lws[0] = tuning_data.work_group_size;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
+    }
 
     return dispatchData;
 }
@@ -86,6 +89,14 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par
     const auto& fc_params = static_cast<const fully_connected_params&>(params);
     const auto& in = fc_params.inputs[0];
     const auto& wei = fc_params.weights;
+    auto out_l = fc_params.outputs[0].GetLayout();
+
+    // Dynamic kernel doesn't support dynamic weights
+    if (fc_params.is_shape_agnostic && in.is_dynamic()) {
+        if ((out_l == DataLayout::bfyx && in.Y().v == 0) ||
+            (out_l == DataLayout::bf && in.Feature().v == 0))
+            return false;
+    }
 
     if ((in.X().pad.before != 0) || (in.X().pad.after != 0) ||
         (in.Y().pad.before != 0) || (in.Y().pad.after != 0)) {
@@ -93,7 +104,6 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par
         return false;
     }
 
-    auto out_l = fc_params.outputs[0].GetLayout();
     if (out_l == DataLayout::bfyx) {
         // We don't support 4d output
         if (in.X().v > 1)
@@ -147,25 +157,28 @@ FullyConnectedKernelIMAD::FullyConnectedTuningData FullyConnectedKernelIMAD::Get
     // In most cases SIMD8 works faster than SIMD16
     tuning_data.sub_group_size = 8;
 
-    auto mk_size = if_num * ib_num;
-    auto mn_size = of_num * ob_num;
+    if (!params.is_shape_agnostic) {
+        auto mk_size = if_num * ib_num;
+        auto mn_size = of_num * ob_num;
 
-    // Known cases where simd16 works better than simd8
-    bool simd16_is_faster = mk_size >= 1000 * 1024 && mn_size >= 1000 * 1024;
-    simd16_is_faster |= mk_size == 128 * 768 && mn_size == 128 * 3072;
+        // Known cases where simd16 works better than simd8
+        bool simd16_is_faster = mk_size >= 1000 * 1024 && mn_size >= 1000 * 1024;
+        simd16_is_faster |= mk_size == 128 * 768 && mn_size == 128 * 3072;
 
-    // Some specific HW doesn't support SIMD8, force SIMD16 to respect this HW
-    // For other SIMD16 exceptions check that if_num is divided by 64 (SIMD16 * ISV4) because
-    // if there are leftovers then SIMD8 is more preferrable
-    if (!IsSIMDSizeSupported(params.engineInfo, 8) || (simd16_is_faster && if_num % 64 == 0)) {
-        tuning_data.sub_group_size = 16;
+        // Some specific HW doesn't support SIMD8, force SIMD16 to respect this HW
+        // For other SIMD16 exceptions check that if_num is divided by 64 (SIMD16 * ISV4) because
+        // if there are leftovers then SIMD8 is more preferrable
+        if (!IsSIMDSizeSupported(params.engineInfo, 8) || (simd16_is_faster && if_num % 64 == 0)) {
+            tuning_data.sub_group_size = 16;
+        }
     }
-
     tuning_data.tile_ofm = 2;
 
     tuning_data.tile_batch = tuning_data.sub_group_size == 8 ? 16 : 8;
-    while (tile_batch_max_size % tuning_data.tile_batch != 0)
-        tuning_data.tile_batch--;
+    if (!params.has_dynamic_tensors()) {
+        while (tile_batch_max_size % tuning_data.tile_batch != 0)
+            tuning_data.tile_batch--;
+    }
 
     size_t sub_group_pack_size = tuning_data.sub_group_size * tuning_data.pack_size;
     tuning_data.in_f_blocks_number = CeilDiv(if_num, sub_group_pack_size);
diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
index e3722341a997c3..c63218e4ff21cb 100644
--- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
@@ -2180,6 +2180,7 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam<fully_connected_dy
 
 using dynamic_fully_connected_gpu_f32_3d = dynamic_fully_connected_gpu<float, float, float, float>;
 using dynamic_fully_connected_gpu_f16_3d = dynamic_fully_connected_gpu<FLOAT16, FLOAT16, FLOAT16, FLOAT16>;
+using dynamic_fully_connected_gpu_i8_3d = dynamic_fully_connected_gpu<int8_t, int8_t, int8_t, float>;
 
 static const std::vector<ov::Dimension::value_type>
     dyn_batches_full = {1, 2, 4, 7, 8, 9, 15, 16, 31, 32, 33, 47, 48, 49, 58, 63, 64};
@@ -2194,6 +2195,10 @@ TEST_P(dynamic_fully_connected_gpu_f16_3d, basic) {
     run_test();
 }
 
+TEST_P(dynamic_fully_connected_gpu_i8_3d, basic) {
+    run_test();
+}
+
 INSTANTIATE_TEST_SUITE_P(
     smoke,
     dynamic_fully_connected_gpu_f32_3d,
@@ -2214,6 +2219,16 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values(false, true))
 );
 
+INSTANTIATE_TEST_SUITE_P(
+    smoke,
+    dynamic_fully_connected_gpu_i8_3d,
+    ::testing::Combine(
+        ::testing::Values(dyn_batches_smoke),
+        ::testing::Values(10, 32, 42, 53, 64, 128),
+        ::testing::Values(2, 9, 128),
+        ::testing::Values(false, true))
+);
+
 INSTANTIATE_TEST_SUITE_P(
     full,
     dynamic_fully_connected_gpu_f32_3d,
@@ -2233,3 +2248,13 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values(2, 9, 16, 32, 64, 128),
         ::testing::Values(false, true))
 );
+
+INSTANTIATE_TEST_SUITE_P(
+    full,
+    dynamic_fully_connected_gpu_i8_3d,
+    ::testing::Combine(
+        ::testing::Values(dyn_batches_full),
+        ::testing::Values(10, 32, 42, 53, 64, 128),
+        ::testing::Values(2, 9, 16, 32, 64, 128),
+        ::testing::Values(false, true))
+);

From f1c3356cfc00e45cf69153171fc381d34a25d131 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Wed, 22 Mar 2023 12:01:16 +0400
Subject: [PATCH 027/296] Small Plugin DG changes (#16432)

---
 docs/IE_PLUGIN_DG/Building.md               | 3 ++-
 src/plugins/template/src/compiled_model.hpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/IE_PLUGIN_DG/Building.md b/docs/IE_PLUGIN_DG/Building.md
index 9330d658a248f0..f40f2296e85122 100644
--- a/docs/IE_PLUGIN_DG/Building.md
+++ b/docs/IE_PLUGIN_DG/Building.md
@@ -30,6 +30,7 @@ Once the commands above are executed, the OpenVINO Developer Package is generate
        * `openvino::unitTestUtils` - static library with unit tests utilities 
        * `openvino::ngraphFunctions` - static library with the set of `ov::Model` builders
        * `openvino::funcSharedTests` - static library with common functional tests
+       * `openvino::ngraph_reference` - static library with operation reference implementations.
 
 > **NOTE**: it's enough just to run `cmake --build . --target ov_dev_targets` command to build only targets from the
 > OpenVINO Developer package.
@@ -61,7 +62,7 @@ $ cmake -DENABLE_FUNCTIONAL_TESTS=OFF -DOpenVINODeveloperPackage_DIR=../openvino
 
 - `src/CMakeLists.txt` to build a plugin shared library from sources:
 @snippet template/src/CMakeLists.txt cmake:plugin
-   > **NOTE**: `openvino::runtime` target is imported from the OpenVINO Developer Package.
+   > **NOTE**: `openvino::...` targets are imported from the OpenVINO Developer Package.
 
 - `tests/functional/CMakeLists.txt` to build a set of functional plugin tests:
 @snippet template/tests/functional/CMakeLists.txt cmake:functional_tests
diff --git a/src/plugins/template/src/compiled_model.hpp b/src/plugins/template/src/compiled_model.hpp
index e8e908a3278979..911edf72abb515 100644
--- a/src/plugins/template/src/compiled_model.hpp
+++ b/src/plugins/template/src/compiled_model.hpp
@@ -37,7 +37,7 @@ class CompiledModel : public ov::ICompiledModel {
 
     void set_property(const ov::AnyMap& properties) override;
 
-    virtual ov::Any get_property(const std::string& name) const override;
+    ov::Any get_property(const std::string& name) const override;
 
     std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
 

From 0070e8d9392490c48ec885c6fdf083ad50abdaba Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 22 Mar 2023 12:02:59 +0400
Subject: [PATCH 028/296] [TF FE] Fix problems with invalidation of decoders
 (#16464)

* [TF FE] Fix problems with invalidation of decoders

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Fix comment

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../tensorflow/src/decoder_argdef.hpp         | 19 ++++++++++++++++-
 .../tensorflow/src/decoder_proto.hpp          | 21 ++++++++++++++++++-
 .../tensorflow/src/graph_iterator_proto.hpp   | 10 +++++----
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp
index 5b01025ee4df7a..a188a8a5cb8890 100644
--- a/src/frontends/tensorflow/src/decoder_argdef.hpp
+++ b/src/frontends/tensorflow/src/decoder_argdef.hpp
@@ -10,6 +10,8 @@
 #include "openvino/frontend/tensorflow/decoder.hpp"
 
 namespace tensorflow {
+class GraphDef;
+class FunctionDef;
 class OpDef_ArgDef;
 }  // namespace tensorflow
 
@@ -19,14 +21,23 @@ namespace tensorflow {
 
 class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase {
 public:
-    explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, const std::string& op_type)
+    explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def,
+                           const std::shared_ptr<::tensorflow::GraphDef>& graph_def,
+                           const std::shared_ptr<::tensorflow::FunctionDef>& func_def,
+                           const std::string& op_type)
         : m_arg_def(arg_def),
+          m_graph_def(graph_def),
+          m_func_def(func_def),
           m_op_type(op_type) {}
 
     explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def,
+                           const std::shared_ptr<::tensorflow::GraphDef>& graph_def,
+                           const std::shared_ptr<::tensorflow::FunctionDef>& func_def,
                            const std::string& op_type,
                            const std::string& producer_name)
         : m_arg_def(arg_def),
+          m_graph_def(graph_def),
+          m_func_def(func_def),
           m_op_type(op_type),
           m_producer_name(producer_name) {}
 
@@ -49,6 +60,12 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase {
 
 private:
     const ::tensorflow::OpDef_ArgDef* m_arg_def;
+    // For existence of OpDef_ArgDef object corresponding to the main graph node,
+    // GraphDef object must live in the memory
+    const std::shared_ptr<::tensorflow::GraphDef> m_graph_def;
+    // For existence of OpDef_ArgDef object corresponding to the body graph node,
+    // both GraphDef and FunctionDef objects must be alive in the memory
+    const std::shared_ptr<::tensorflow::FunctionDef> m_func_def;
     const std::string m_op_type;
     const std::string m_producer_name;
 };
diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp
index db1f113882451d..570fd7e7eebda5 100644
--- a/src/frontends/tensorflow/src/decoder_proto.hpp
+++ b/src/frontends/tensorflow/src/decoder_proto.hpp
@@ -12,6 +12,8 @@
 #include "types.pb.h"
 
 namespace tensorflow {
+class GraphDef;
+class FunctionDef;
 class NodeDef;
 class AttrValue;
 }  // namespace tensorflow
@@ -29,7 +31,18 @@ void parse_producer_name(const std::string& producer_port_name,
 
 class DecoderProto : public ov::frontend::tensorflow::DecoderBase {
 public:
-    explicit DecoderProto(const ::tensorflow::NodeDef* node_def) : m_node_def(node_def) {}
+    explicit DecoderProto(const ::tensorflow::NodeDef* node_def,
+                          const std::shared_ptr<::tensorflow::GraphDef>& graph_def)
+        : m_node_def(node_def),
+          m_graph_def(graph_def),
+          m_func_def(nullptr) {}
+
+    explicit DecoderProto(const ::tensorflow::NodeDef* node_def,
+                          const std::shared_ptr<::tensorflow::GraphDef>& graph_def,
+                          const std::shared_ptr<::tensorflow::FunctionDef>& func_def)
+        : m_node_def(node_def),
+          m_graph_def(graph_def),
+          m_func_def(func_def) {}
 
     ov::Any get_attribute(const std::string& name) const override;
 
@@ -51,6 +64,12 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase {
 private:
     std::vector<::tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const;
     const ::tensorflow::NodeDef* m_node_def;
+    // For existence of NodeDef object corresponding to the main graph node,
+    // GraphDef object must live in the memory
+    const std::shared_ptr<::tensorflow::GraphDef> m_graph_def;
+    // For existence of NodeDef object corresponding to the body graph node,
+    // both GraphDef and FunctionDef objects must be alive in the memory
+    const std::shared_ptr<::tensorflow::FunctionDef> m_func_def;
 };
 }  // namespace tensorflow
 }  // namespace frontend
diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
index 1fa836e3b036e1..a5e2fc1ae6c0c0 100644
--- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
@@ -45,12 +45,13 @@ class GraphIteratorProto : public GraphIterator {
         for (int input_ind = 0; input_ind < input_size; ++input_ind) {
             auto input_arg = &m_func_def->signature().input_arg(input_ind);
             m_input_names.push_back(input_arg->name());
-            m_decoders.push_back(std::make_shared<DecoderArgDef>(input_arg, "input_arg"));
+            m_decoders.push_back(std::make_shared<DecoderArgDef>(input_arg, m_graph_def, m_func_def, "input_arg"));
         }
 
         // fill all node defs from library functions
         for (int node_ind = 0; node_ind < nodes_size; ++node_ind) {
-            m_decoders.push_back(std::make_shared<DecoderProto>(&(m_func_def->node_def(node_ind))));
+            m_decoders.push_back(
+                std::make_shared<DecoderProto>(&(m_func_def->node_def(node_ind)), m_graph_def, m_func_def));
         }
 
         // fill all outputs from library functions
@@ -60,7 +61,8 @@ class GraphIteratorProto : public GraphIterator {
             auto output_arg = &m_func_def->signature().output_arg(output_ind);
             m_output_names.push_back(output_arg->name());
             auto producer_name = ret_map.at(output_arg->name());
-            m_decoders.push_back(std::make_shared<DecoderArgDef>(output_arg, "output_arg", producer_name));
+            m_decoders.push_back(
+                std::make_shared<DecoderArgDef>(output_arg, m_graph_def, m_func_def, "output_arg", producer_name));
         }
     }
 
@@ -76,7 +78,7 @@ class GraphIteratorProto : public GraphIterator {
         auto nodes_size = m_graph_def->node_size();
         m_decoders.resize(static_cast<size_t>(nodes_size));
         for (int node_ind = 0; node_ind < nodes_size; ++node_ind) {
-            m_decoders[node_ind] = std::make_shared<DecoderProto>(&m_graph_def->node(node_ind));
+            m_decoders[node_ind] = std::make_shared<DecoderProto>(&m_graph_def->node(node_ind), m_graph_def);
         }
 
         // initialize a library map

From c14e6ef48e4870a19b95519ff9ba65c6c86fcc4c Mon Sep 17 00:00:00 2001
From: hyunback kim <hyunback.kim@intel.com>
Date: Wed, 22 Mar 2023 17:08:10 +0900
Subject: [PATCH 029/296] [GPU] Use 4dim directly for onednn in gemm (#16182)

* [GPU] Use 4-dim directly for onednn in gemm
   We were collapsing n-dim into 3d for onednn gemm, But it is not necessary, up to 4d.

Signed-off-by: hyunback <hyunback.kim@intel.com>
---
 .../src/graph/impls/onednn/gemm_onednn.cpp     | 10 ----------
 .../intel_gpu/src/graph/impls/onednn/utils.cpp | 17 ++++++++++-------
 .../tests/fusions/gemm_fusion_test.cpp         | 18 +++++++++++++++---
 3 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
index 84bcdd83d2edf3..309a4e24285437 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
@@ -301,16 +301,6 @@ struct gemm_onednn : typed_primitive_onednn_impl<gemm> {
     }
 
     static std::unique_ptr<primitive_impl> create(const gemm_node& arg, const kernel_impl_params& impl_params) {
-        bool full_tensor_or_per_tensor = true;
-        for (auto prim : arg.get_fused_primitives()) {
-            if (prim.input_layout.is_static() && prim.output_layout.is_static()) {
-                full_tensor_or_per_tensor &=
-                    prim.input_layout.count() == prim.output_layout.count() || prim.input_layout.count() == 1;
-            }
-        }
-        if (!full_tensor_or_per_tensor) {
-            IE_THROW() << "Unimplemented: per channel binary post-operation is not supported for onednn gemm. Refer PR(#15353) message.";
-        }
         auto& engine = impl_params.prog->get_engine();
         auto& config = impl_params.prog->get_config();
         auto attr = arg.get_onednn_primitive_attributes();
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
index 6b217b196c922d..09e977b5edcda1 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@@ -68,22 +68,25 @@ dnnl::memory::dims convert_tensor(cldnn::tensor t, size_t dims, bool is_grouped)
 dnnl::memory::dims convert_gemm_tensor(cldnn::tensor t, size_t dims, bool batched_dims_can_be_removed) {
     auto sizes = t.sizes(default_fmt_for_dims(dims, false));
     dnnl::memory::dims res(sizes.begin(), sizes.end());
-    if (dims > 3) {
-        for (size_t i = 0; i < dims - 3; i++) {
+    if (dims > 4) {
+        for (size_t i = 0; i < dims - 4; i++) {
             res[i + 1] *= res[i];
         }
-        res.erase(res.begin(), res.begin() + dims - 3);
+        res.erase(res.begin(), res.begin() + dims - 4);
     }
-    if (res.size() == 3 && batched_dims_can_be_removed) {
+    if (res.size() == 4 && batched_dims_can_be_removed) {
         res.erase(res.begin());
     }
     return res;
 }
 
 dnnl::memory::format_tag convert_gemm_data_format(dnnl::memory::dims dims) {
-    if (dims.size() > 3)
-        throw std::runtime_error("[clDNN] Unsupported dims size for onednn gemm: should be <= 3");
-    return dims.size() == 3 ? dnnl::memory::format_tag::abc : dnnl::memory::format_tag::ab;
+    switch (dims.size()) {
+    case 2: return dnnl::memory::format_tag::ab;
+    case 3: return dnnl::memory::format_tag::abc;
+    case 4: return dnnl::memory::format_tag::abcd;
+    default: throw std::invalid_argument("[clDNN] Unsupported conversion from "+ std::to_string(dims.size()) + " to onednn format_tag");
+    }
 }
 
 
diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
index 34b35f26c054ef..847c9192dd83b4 100644
--- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
@@ -113,6 +113,9 @@ class GemmFusingTest : public ::BaseFusingTest<gemm_test_params> {
 #define CASE_GEMM_2IN_FP16_3 { { 1, 1, 64, 64 }, { 1, 1, 64, 64 } }, { 1, 1, 64, 64 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx
 #define CASE_GEMM_2IN_FP16_4 { { 1, 2, 64, 128 }, { 1, 2, 256, 64 } }, { 1, 2, 256, 128 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx
 #define CASE_GEMM_2IN_FP16_5 { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx
+#define CASE_GEMM_2IN_FP16_5D_1 { { 2, 3, 4, 6, 5 }, { 2, 3, 6, 4, 5 } }, { 2, 3, 6, 6, 5 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
+#define CASE_GEMM_2IN_FP16_6D_1 { { 2, 3, 7, 5, 3, 2 }, { 2, 3, 5, 7, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfwzyx, data_types::f16, format::bfwzyx
+
 #define CASE_GEMM_2IN_U8U8_1 { { 1, 1, 2, 2 }, { 1, 1, 2, 2 } }, { 1, 1, 2, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx
 #define CASE_GEMM_2IN_U8U8_2 { { 1, 2, 64, 128 }, { 1, 2, 256, 64 } }, { 1, 2, 256, 128 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx
 #define CASE_GEMM_2IN_U8U8_3 { { 1, 1, 16, 32 }, { 1, 1, 32, 16 } }, { 1, 1, 32, 32 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx
@@ -298,11 +301,14 @@ TEST_P(gemm_2in_add, eltwise_postop) {
         add_data_size.feature[0] = 1;
     add_data_layout.set_tensor(add_data_size);
 
+    auto in_layout0 = get_input_layout(p, 0);
+    auto in_layout1 = get_input_layout(p, 1);
+
     create_topologies(
-        input_layout("input0", get_input_layout(p, 0)),
-        input_layout("input1", get_input_layout(p, 1)),
+        input_layout("input0", in_layout0),
+        input_layout("input1", in_layout1),
         data("add_data", get_mem(add_data_layout, 1.0f/p.kernel.count())),
-        gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32),
+        gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()),
         eltwise("add_prim", { input_info("gemm_prim"), input_info("add_data") }, p.eltwise_m, p.default_type),
         reorder("reorder_bfyx", input_info("add_prim"), p.default_format, data_types::f32)
     );
@@ -318,6 +324,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_add, ::testing::ValuesIn(std::vec
     gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sum },
     gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::prod },
     gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sub },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sum },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::prod },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sub },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sum },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::prod },
+    gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sub },
 }));
 
 class gemm_2in_act_scale_quantize_i8 : public GemmFusingTest {};

From cbb25e94839c8fbb482e19e3754f149eda34c214 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 22 Mar 2023 09:08:31 +0100
Subject: [PATCH 030/296] [DOCS] Proofreading developer documentation moved
 from wiki. (#15886)

Minor stylistic and grammar corrections. Fixing links

* Apply suggestions from code review

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>
---
 src/common/snippets/README.md                 |   3 +-
 .../snippets/docs/snippets_cpu_target.md      |  14 +-
 .../snippets/docs/snippets_design_guide.md    | 108 +++++++-------
 src/plugins/hetero/README.md                  |  62 ++++----
 src/plugins/intel_cpu/README.md               |  13 +-
 src/plugins/intel_cpu/docs/cpu_emulation.md   |  19 ++-
 .../intel_cpu/docs/debug_capabilities.md      |   9 +-
 .../docs/internal_cpu_plugin_optimization.md  |   3 +-
 .../docs/performance_analysis_ITT_counters.md |  18 ++-
 .../docs/runtime_parameters_cache.md          |  26 +++-
 src/plugins/intel_gpu/README.md               |  17 ++-
 .../intel_gpu/docs/basic_data_structures.md   |  94 ++++++------
 .../intel_gpu/docs/execution_of_inference.md  |  20 +--
 src/plugins/intel_gpu/docs/gpu_debug_utils.md | 140 +++++++++---------
 src/plugins/intel_gpu/docs/gpu_kernels.md     |  31 ++--
 .../intel_gpu/docs/gpu_memory_formats.md      |  23 +--
 .../docs/gpu_plugin_driver_troubleshooting.md |  23 ++-
 .../intel_gpu/docs/gpu_plugin_ops_enabling.md |  82 +++++-----
 .../intel_gpu/docs/gpu_plugin_unit_test.md    | 120 +++++++--------
 .../docs/graph_optimization_passes.md         |  29 ++--
 .../docs/memory_allocation_gpu_plugin.md      |  47 +++---
 .../intel_gpu/docs/simplified_workflow.md     |   5 +-
 .../intel_gpu/docs/source_code_structure.md   |  21 +--
 src/tests/README.md                           |  55 +++----
 .../plugin/conformance/test_runner/README.md  | 127 ++++++++--------
 25 files changed, 587 insertions(+), 522 deletions(-)

diff --git a/src/common/snippets/README.md b/src/common/snippets/README.md
index eca770a584cda2..8f9d55bc11714b 100644
--- a/src/common/snippets/README.md
+++ b/src/common/snippets/README.md
@@ -2,12 +2,13 @@
 
 ## Key Contacts
 
-Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding snippets.
+For assistance regarding snippets, contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group.
 
 * [SnippetS design guide](./docs/snippets_design_guide.md)
 * [CPU target for SnippetS code generator](./docs/snippets_cpu_target.md)
 
 ## See also
+
  * [OpenVINO™ README](../../../README.md)
  * [OpenVINO Core Components](../../README.md)
  * [Developer documentation](../../../docs/dev/index.md)
\ No newline at end of file
diff --git a/src/common/snippets/docs/snippets_cpu_target.md b/src/common/snippets/docs/snippets_cpu_target.md
index 04b70f7df8708e..68f03202c4761d 100644
--- a/src/common/snippets/docs/snippets_cpu_target.md
+++ b/src/common/snippets/docs/snippets_cpu_target.md
@@ -1,12 +1,12 @@
-# CPU target for SnippetS code generator
+# CPU Target for SnippetS Code Generator
 
-Snippets in its first generation can be seen as a generalization over generic eltwise node. First generation of snippets has lack of integration with oneDNN and so patterns it supports should be kept orthogonal to what is fused with post-ops. 
+Snippets in its first generation can be seen as a generalization over a generic eltwise node. First generation of snippets does not have integration with oneDNN, and the patterns it supports should be kept orthogonal to what is fused with post-ops.
 
-POC CPU implementation could be found [here](https://github.com/openvinotoolkit/openvino/pull/2824)
+See the example of POC CPU implementation [here](https://github.com/openvinotoolkit/openvino/pull/2824).
 
 First 8 kernel parameters are passed by structure which is unpacked inside a kernel into the registers. The rest are passed through the stack.
 
-Loop trip count should be placed to some GP register, as well as work amount. Moreover, we need to load all the parameters into GP registers. If we assume that we have enough registers than it can be done before the loop body.
+The loop trip count should be placed to a GP register, as well as the work amount. Moreover, you need to load all the parameters into GP registers. If you assume that you have enough registers, then it can be done before the loop body.
 
 ```
 auto param0 = abi_params[0];
@@ -18,9 +18,9 @@ auto work_amount = abi_params[3];
 
 ## Memory operations
 
-Load could be Vector, Scalar and Broadcast. Only native vector size for an architecture is supported (e.g. 16 on AVX-512)
+A load could be Vector, Scalar, and Broadcast. Only the native vector size for an architecture is supported (for example, 16 on AVX-512).
 
-Memory operation also generates post increments for the pointer it uses. 
+Memory operation also generates post increments for the pointer it uses.
 
 - `MemoryEmitter`
     - `StoreEmitter`
@@ -50,8 +50,8 @@ Tensor data can be passed with strides.
 `Data` corresponds to a constant table and wraps this entity for the CPU.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO SnippetS](../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [Developer documentation](../../../../docs/dev/index.md)
- 
\ No newline at end of file
diff --git a/src/common/snippets/docs/snippets_design_guide.md b/src/common/snippets/docs/snippets_design_guide.md
index 01b005b20e4ec9..d495b35a3fc437 100644
--- a/src/common/snippets/docs/snippets_design_guide.md
+++ b/src/common/snippets/docs/snippets_design_guide.md
@@ -1,26 +1,26 @@
-# SnippetS design guide
-This document describes the design and rationale for snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). Proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824).
+# SnippetS Design Guide
+This document describes the design and rationale for a snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). A proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824).
 
 ## Rationale
 
-We believe that core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers.
+Core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers.
 
-We believe **potential speedup is proportional to shrink in memory-walked bytes**. So we can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. Number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member.
+The **potential speedup is proportional to shrink in memory-walked bytes**. Therefore, you can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. The number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member.
 
-We design SnippetS generator for back-end developers. The main purpose of inventing snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc.
+The SnippetS generator is designed for back-end developers. The main purpose of inventing the snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc.
 
-We believe that creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evelition. We aim to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (e.g. instruction scheduling) to the backend H/W.
+Creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evolution. The aim is to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (for example, instruction scheduling) to the backend H/W.
 
-We do not aim to invent a DSL for SnippetS and would like to keep it this way. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough.
+There are no plans to invent a DSL for SnippetS. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough.
 
-**Snippet** is a compiled compute **kernel** generated from a subgraph using SnippetS code generator for specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain which defines snippet schedule. It’s a subject of extension for future generations.
+**Snippet** is a compiled compute **kernel** generated from a subgraph using the SnippetS code generator for a specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain that defines snippet schedule. It is a subject of extension for future generations.
 
-We use nGraph as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. We aim to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed).
+nGraph is used as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. The aim is to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed).
 
-**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. Snippet somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks.
+**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. A snippet is somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks.
 **Subgraph** once extracted from full topology IR is **treated as an operation and data flow descriptor in scalar notation** (similar to OpenCL/CUDA). Tensor sizes are used for defining scheduling domain and detecting broadcasts/reductions.
 
-We split operations into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, elements-wise math and ReLU does in this category. Implementation **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repaks. For **layout-specific** operation semantics and implementation depends on the layout. For example, the Yolo region. Patterns to fuse constructed in terms of taxonomy above.
+Operations are split into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent(-specific)**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, like elements-wise math and ReLU in this category. Implementation of **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repacks. **Layout-specific** operation semantics and implementation depend on the layout. For example, the Yolo region. Patterns to fuse are constructed in terms of taxonomy above.
 
 ## Design
 
@@ -28,19 +28,19 @@ Code generation is split into 2 phases, **tokenization** and **lowering**.
 
 ### Tokenization
 
-Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule) 
+Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule).
 
-Procedure of finding subgraphs suitable for code generation is called **tokenization**, meaning that we split the topology tree into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It also could be seen as and modified into a basic block construction problem, since we also find a leader and potentially terminators. Implementation can be found [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp).
+A procedure of finding subgraphs suitable for code generation is called **tokenization**. During tokenization the topology tree is split into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It may also be seen as and modified into a basic block construction problem, since there is a leader and potentially terminators. See the example of implementation [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp).
 
-Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization on the other hand has the only limitation on specific operation types which are **suitable and profitable** to fuse with respect to original topology correctness (keeping it as a direct acyclic graph).
+Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization, on the other hand, has the only limitation on specific operation types which are **suitable and profitable** to fuse, respecting original topology correctness (keeping it as a direct acyclic graph).
 
-The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (e.g. layout assignment, memory allocation, etc.).
+The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (for example, layout assignment, memory allocation, etc.).
 
 ### Supported subgraph patterns
 
-Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. 
+Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes.
 
-Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs.
+Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs serves as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs.
 
 ```mermaid
  flowchart LR
@@ -60,12 +60,12 @@ class nodeA3 steel1
 ```
 
 Operations are greedily added to the subgraph until
-1. New operation doesn’t introduce a loop in a topology function.
+1. New operation does not introduce a loop in a topology function.
 1. Number of inputs and outputs satisfies target criteria.
 1. Operation is not a predecessor of topology output.
-1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). 
+1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible).
 
-If a potential subgraph doesn’t meet any of criteria above, the procedure continues to find a new leader.
+If a potential subgraph does not meet any of the criteria above, the procedure continues to find a new leader.
 
 ### Lowering
 
@@ -82,27 +82,27 @@ Lowering is a sequence of subgraph (snippet body) traversal passes to generate a
 
 #### Common optimizations
 
-Constants are treated as inputs for a subgraph with an exception for scalar cases (since we don’t need to schedule them). `snippets::op::Scalar` is used to represent this kind of constants.
+Constants are treated as inputs for a subgraph with an exception for scalar cases (since they do not need to be scheduled). `snippets::op::Scalar` is used to represent this kind of constants.
 
-If such Scalar comes as a second input of Power operation, it’s replaced with `snippets::op::PowerStatic`.
+If such Scalar comes as a second input of Power operation, it is replaced with `snippets::op::PowerStatic`.
 
 #### Canonicalization
 
-The goal of this step is to apply target independent and schedule related optimizations and to make snippet **schedulable**.
+The goal of this step is to apply target-independent and schedule-related optimizations and to make a snippet **schedulable**.
 
 ##### Domain normalization
 
 All input and output shapes are normalized to 6D for future schedule generation. If shape propagation fails or leads to inconsistent output shapes an exception is raised.
 
-Layout assigned by user code and passed to a `generate` function is propagated through subgraph on this step as well. Layout is passed to a generate function as a `BlockedShapeVector` which is a `std::vector<BlockedShape>` , while `BlockedShape` is `std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>`. For example, if backend supports `NCHW16c` layout and tensor has size of `<1, 42, 17, 31>` and hold single precision floating point this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation.
+The layout assigned by a user code and passed to a `generate` function is propagated through a subgraph on this step as well. The layout is passed to a `generate` function as a `BlockedShapeVector` which is a `std::vector<BlockedShape>` , while `BlockedShape` is `std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>`. For example, if backend supports `NCHW16c` layout and a tensor has a size of `<1, 42, 17, 31>` and holds single precision floating point, this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation.
 
 ##### Dialect conversion
 
-The goal for this step is to transform a subgraph (body function) into a form possible to code generation. Input for this step is subgraph in a canonical form output is a subgraph in snippets dialect.
+The goal for this step is to transform a subgraph (body function) into a form possible for code generation. Input for this step is a subgraph in a canonical form. Output is a subgraph in snippets dialect.
 
-Snippet or kernel is formed around the subgraph body in a sequence of traversal steps. Let’s walk through these steps with the smallest possible subgraph which contains out of single `[Add]` operation. 
+A snippet or a kernel is formed around the subgraph body in a sequence of traversal steps. Let us walk through these steps with the smallest possible subgraph which contains a single `[Add]` operation.
 
-While we extract subgraphs with the tokenization part we explicitly insert Parameters and Results to its body to form a complete nGraph Function.
+When subgraphs are extracted with the tokenization part, Parameters and Results are explicitly inserted to its body to form a complete nGraph Function.
 
 ```mermaid
 flowchart LR
@@ -118,11 +118,11 @@ class nodeA8 steel1
 class nodeA1,nodeA3 steel1
 ```
 
-This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM)
+This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point, kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM).
 
 ###### Explicit memory operations
 
-As a next step explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derived from `MatcherPass`.
+As a next step, explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derive from `MatcherPass`.
 
 ```mermaid
 flowchart LR
@@ -142,16 +142,16 @@ class nodeA8 carbon1
 class nodeA1,nodeA3,nodeA6,nodeA7 steel1
 ```
 
-By default, memory operations assumes vector memory access, if scalar access is needed special passes `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores`  should be executed.
+By default, memory operations assume vector memory access. If scalar access is needed, special `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores` passes should be executed.
 
 ###### Explicit broadcast
 
-For each operation in body function inputs are checked against broadcasting. In case of parameters to be broadcasted explicit broadcast operation is generated. For example, if for the subgraph above we have `<1, 42, 17, 31>` and `<1, 42, 17, 1>` resulting subgraph is going to be
+For each operation in body function inputs are checked against broadcasting. When Parameters are to be broadcasted, an explicit broadcast operation is generated. For example, with `<1, 42, 17, 31>` and `<1, 42, 17, 1>` for the subgraph above, the resulting subgraph will be:
 
 ```mermaid
 flowchart LR
-    nodeA1("Parameter\n<1, 42, 17, 1>") --> node6("Load\n<1, 42, 17, 1>")
-    node6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>")
+    nodeA1("Parameter\n<1, 42, 17, 1>") --> nodeA6("Load\n<1, 42, 17, 1>")
+    nodeA6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>")
     nodeA9("BroadcastMove\n<1, 42, 17, 31>") --> nodeA2(Add)
     nodeA3("Parameter\n<1, 42, 17, 31>") --> nodeA7("Load\n<1, 42, 17, 31>")
     nodeA7("Load\n<1, 42, 17, 31>") ---> nodeA2(Add)
@@ -164,10 +164,10 @@ classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
 class nodeA2 daisy1
 class nodeA5 moss1
 class nodeA8,nodeA9 carbon1
-class nodeA1,nodeA3,node6,nodeA7 steel1
+class nodeA1,nodeA3,nodeA6,nodeA7 steel1
 ```
 
-If load followed by broadcast is detected then this pair is replaced by a single Broadcast load instruction. Like the following
+If Load followed by Broadcast is detected, then this pair is replaced by a single BroadcastLoad instruction:
 
 ```mermaid
 flowchart LR
@@ -187,7 +187,7 @@ class nodeA8 carbon1
 class nodeA1,nodeA3,nodeA6,nodeA7 steel1
 ```
 
-Broadcast and regular streaming vector load is possible from the same pointer. Broadcast load should always go before streaming load. Broadcast load for non the most varying dimension is not generated, however it affects the generated schedule.
+Broadcast and regular streaming vector load is possible from the same pointer. BroadcastLoad should always go before streaming load. BroadcastLoad for non the most varying dimension is not generated, however it affects the generated schedule.
 
 #### Target-specific optimizations
 
@@ -197,13 +197,13 @@ Target developers can plug in to the code generation pipeline some specific opti
 
 #### Register allocation
 
-Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as a function pass `ngraph::snippets::pass::AssignRegisters` and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored batter, either to become target independent or use target specific abstraction to acquire a new register*
+Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as the `ngraph::snippets::pass::AssignRegisters` function pass and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored better, either to become target independent or to use target-specific abstraction to acquire a new register*
 
-#### Schedule generation 
+#### Schedule generation
 
-The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. `Kernel` and `Tile` operations are introduced for this purpose. Each of this operation has a constructor from code region described as a collection of operation and operands pairs `Kernel(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);`. 
+The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. The `Kernel` and `Tile` operations are introduced for this purpose. Each of these operations has a constructor from code region described as a collection of operation and operand pairs `Kernel(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);`.
 
-If we return to example above this comes to a following hierarchical IR. If we limit scope to layout oblivious operations with broadcasting support, tile could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. Special pass replaces memory operations on vector to scalar versions for tail subgraph. 
+The example above can be used for the following hierarchical IR. If the scope to layout oblivious operations with broadcasting support is limited, `Tile` could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. A special pass replaces memory operations on vector with scalar versions for tail subgraph.
 
 ```mermaid
 graph LR
@@ -244,13 +244,13 @@ class nodeD1 no-stroke
 ```
 
 Where
-*  `Kernel` constants a collection of the tiles, corresponds to a Subgraph node and responsible for function signature generation, calls generators for all tiles and data sections
-* `Tile` contains single subgraph body, vector or scalar
-* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs
+* `Kernel` is a collection of the tiles, corresponds to a Subgraph node and is responsible for function signature generation. It calls generators for all tiles and data sections.
+* `Tile` contains a single subgraph body, a vector or a scalar.
+* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs.
 
 #### Target code emission
 
-Target code emission is table based. Target is responsible for filling `jitters` table field in `Generator` class. 
+A target code emission is table based. A target is responsible for filling `jitters` table field in `Generator` class.
 
 ```
 std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
@@ -260,9 +260,9 @@ std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(
 
 An OpenVINO plugin is treated as a target for snippets.
 
-Each nGraph node is mapped to a convertor function which creates `Emitter` form this node. Each specific emitter should extend from `Emitter`. It is used to map this node to target code and has `emit_code` and `emit_data` methods. `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as Emitter derived snippets::Emitter class which knows how to translate this Op to Target specific ISA. (ex. xbyak is a jit backend for CPU plugin).
+Each nGraph node is mapped to a converter function which creates `Emitter` form of the node. Each specific emitter should extend from `Emitter`. It is used to map the node to the target code and has `emit_code` and `emit_data` methods. The `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as `Emitter` derived `snippets::Emitter` class which knows how to translate this Op to Target-specific ISA. (for example, xbyak is a jit backend for CPU plugin).
 
-For minimal code generator support target should provide emitters for the following operations
+For minimal code generator support, a target should provide emitters for the following operations:
 
 * `Kernel`
 * `Tile`
@@ -273,29 +273,29 @@ For minimal code generator support target should provide emitters for the follow
 * `Store`
 * `ScalarStore`
 
-Once a schedule is generated, target code is emitted from a kernel in Generator::generate method by executing Kernel::emit_code function. Since Kernel and Tile represents hierarchical
+Once a schedule is generated, a target code is emitted from a kernel in `Generator::generate` method by executing `Kernel::emit_code` function. Since `Kernel` and `Tile` represent hierarchical IR.
 
 ##### Dialect extensibility
 
-Target can potentially extend snippets dialect with target specific operation for code emission. It should implement:
+A target can potentially extend the snippets dialect with a target-specific operation for code emission. It should implement:
 
-* nGraph operation (ex. `class FMA : public ngraph::op::Op`)
-* Emitter for this operation (ex. `class FmaEmitter : public Emitter` )
-* register this pair in `jitters` map
+* nGraph operation (for example, `class FMA : public ngraph::op::Op`)
+* Emitter for the operation (for example, `class FmaEmitter : public Emitter` )
+* register the pair in `jitters` map
 
 ### Calling convention
 
-Parameters for a generated snippet are split into schedule-invariant and schedule-dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as scheduling domain.
+Parameters for a generated snippet are split into schedule invariant and schedule dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as the scheduling domain.
 
 ### Diagnostics
 
 #### Reference mode
 
-Subgraph can be executed with nGraph references if no generator is present.
+A subgraph can be executed with nGraph references if no generator is present.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO SnippetS](../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [Developer documentation](../../../../docs/dev/index.md)
-
diff --git a/src/plugins/hetero/README.md b/src/plugins/hetero/README.md
index 952013815bc154..0935adc0f363a1 100644
--- a/src/plugins/hetero/README.md
+++ b/src/plugins/hetero/README.md
@@ -1,4 +1,4 @@
-# OpenVINO Hetero plugin design overview
+# OpenVINO Hetero Plugin Design Overview
 
 ## Subgraphs selection
 
@@ -6,17 +6,17 @@ Algorithm:
 
 For each plugin
 1. Select *root* node
-    * Node not in subgraph previously constructed
-    * Affinity is equal to plugin name
-2. Select adjacent node to any node in already subgraph which is not in rejected list
-    * if there are no such nodes **end**
-3. Check selected node has same affinity
-4. Add node to subgraph if check was successful or add to rejected list otherwise
-5. Check global condition
-    * Nodes in rejected list can never be added to subgraph
-    * Nodes not in subgraph and not in rejected list can possibly be added later
-    * Check subgraph topology (the only check now is there are no indirect subgraph self-references)
-6. If global condition was failed remove last node from subgraph, add it to rejected list and go to step 5
+    * A node not in a previously constructed subgraph
+    * Affinity is equal to the plugin name
+2. Select an adjacent node to any node in a present subgraph which is not on the *rejected* list
+    * If there are no such nodes **end**
+3. Verify that the selected node has the same affinity
+4. Add a node to a subgraph if the check has been successful or add to the *rejected* list otherwise
+5. Check a global condition
+    * Nodes in the *rejected* list can never be added to a subgraph
+    * Nodes not in a subgraph and not in the *rejected* list can possibly be added later
+    * Check the subgraph topology (the only check now is whether there are no indirect subgraph self-references)
+6. If a global condition has failed, remove the last node from a subgraph. Add it to the *rejected* list and go to step 5.
     * we can rollback multiple times here because rejected list is changed every time
 7. Go to step 2
 
@@ -32,7 +32,7 @@ graph TD;
     6-->7;
 ```
 
-Nodes [1,2,3,5,6,7] are supported in plugin, [4] is not
+Nodes [1,2,3,5,6,7] are supported in the plugin, [4] is not
 
 Possible roots: [1,2,3,5,6,7]
 1. Select root [1]
@@ -50,27 +50,27 @@ Possible roots: [1,2,3,5,6,7]
 4. Merge [5]
     * Subgraph: [1,2,3,5]
     * Rejected: []
-    * Global condition: There is possible self-references through node [4] but we do not know yet, ok
+    * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok
 5. Merge [6]
     * Subgraph: [1,2,3,5,6]
     * Rejected: []
-    * Global condition: There is possible self-references through node [4] but we do not know yet, ok
+    * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok
 6. Merge [7]
     * Subgraph: [1,2,3,5,6,7]
     * Rejected: []
-    * Global condition: There is possible self-references through node [4] but we do not know yet, ok
+    * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok
 7. Failed to merge [4]
     * Subgraph: [1,2,3,5,6,7]
     * Rejected: [4]
-    * Global condition: There is self-references through node [4], reject
+    * Global condition: There are self-references throughout a node [4], reject
 8. Rollback [7]
     * Subgraph: [1,2,3,5,6]
     * Rejected: [4,7]
-    * Global condition: There is self-references through node [4], reject
+    * Global condition: There are self-references throughout a node [4], reject
 9. Rollback [6]
     * Subgraph: [1,2,3,5]
     * Rejected: [4,6,7]
-    * Global condition: There is self-references through node [4], reject
+    * Global condition: There are self-references throughout a node [4], reject
 10. Rollback [5]
     * Subgraph: [1,2,3]
     * Rejected: [4,5,6,7]
@@ -97,11 +97,11 @@ Possible roots: [5,6,7]
 5. Merge [2]
     * Subgraph: [2,3,5,6,7]
     * Rejected: []
-    * Global condition: There is possible self-references through node [4] but we do not know yet, ok
+    * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok
 6. Failed to merge [4]
     * Subgraph: [2,3,5,6,7]
     * Rejected: [4]
-    * Global condition: There is self-references through node [4], reject
+    * Global condition: There are self-references throughout a node [4], reject
 7. Rollback [2]
     * Subgraph: [3,5,6,7]
     * Rejected: [2,4]
@@ -113,7 +113,7 @@ Possible roots: [] no roots, **END**
 Subgraphs: [1,2,3], [3,5,6,7]
 
 Select best subgraph:
-* When we have multiple subgraphs larger ([3,5,6,7]) is always selected, always
+* When there are multiple subgraphs, a larger one ([3,5,6,7]) is **always** selected.
 
 Repeat previous steps with remaining nodes [1,2]
 
@@ -124,18 +124,18 @@ The final result is:
 
 ## Subgraphs self reference detection
 
-1. For each node in network build a list of reachable node (transitive closure)
-2. For each pair of nodes in subgraph find `path` nodes (nodes through one node in pair reachable to other)
-    * assume `src` - one node in pair, `dst` - other node in pair
-    * get all nodes reachable from `src`
-    * in those nodes find nodes through you can reach `dst` those will be our `path` node
-3. Results for pairs is cached.
-4. Check if there intersection between `path` nodes set and rejected nodes set for each nodes pair in subgraph
-5. In case of intersection we have a self-reference and subgraph is invalid
+1. For each node in a network build a list of reachable nodes (transitive closure).
+2. For each pair of nodes in a subgraph find `path` nodes (nodes through one node in pair reachable to other).
+    * assume `src` - one node in a pair, `dst` - other node in a pair
+    * get all reachable nodes from `src`
+    * in the nodes find nodes through which you can reach `dst`. These will be the `path` nodes.
+3. Results for pairs are cached.
+4. Check whether there is an intersection between `path` nodes set and rejected nodes set for each pair of nodes in a subgraph.
+5. If an intersection happens, a self-reference occurs, and a subgraph is invalid.
 
 ## See also
+
  * [OpenVINO™ README](../../../README.md)
  * [OpenVINO Core Components](../../README.md)
  * [OpenVINO Plugins](../README.md)
  * [Developer documentation](../../../docs/dev/index.md)
- 
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/README.md b/src/plugins/intel_cpu/README.md
index f7afe70ab1520f..87530644ebe748 100644
--- a/src/plugins/intel_cpu/README.md
+++ b/src/plugins/intel_cpu/README.md
@@ -2,17 +2,17 @@
 
 ## Key Contacts
 
-Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding CPU.
+For assistance regarding CPU, contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group.
 
 ## Components
 
 CPU Plugin contains the following components:
 
-* [docs](./docs/) - contains developer documentation pages for the component.
-* [src](./src/) - folder contains sources of the core component.
-* [tests](./tests/) - contains tests for OpenVINO Plugin components.
-* [thirdparty](./thirdparty/) - contains third-party modules.
-* [tools](./tools/) - contains tools and helpers for OpenVINO Plugin components.
+* [docs](./docs/) - developer documentation pages for the component.
+* [src](./src/) - sources of the core component.
+* [tests](./tests/) - tests for OpenVINO Plugin components.
+* [thirdparty](./thirdparty/) - third-party modules.
+* [tools](./tools/) - tools and helpers for OpenVINO Plugin components.
 
 ## Tutorials
 
@@ -23,6 +23,7 @@ CPU Plugin contains the following components:
 * [Internal CPU Plugin Optimizations](./docs/internal_cpu_plugin_optimization.md)
 
 ## See also
+
  * [OpenVINO™ README](../../../README.md)
  * [OpenVINO Core Components](../../README.md)
  * [OpenVINO Plugins](../README.md)
diff --git a/src/plugins/intel_cpu/docs/cpu_emulation.md b/src/plugins/intel_cpu/docs/cpu_emulation.md
index d431eda5d0d47d..6b11116e8c1296 100644
--- a/src/plugins/intel_cpu/docs/cpu_emulation.md
+++ b/src/plugins/intel_cpu/docs/cpu_emulation.md
@@ -2,9 +2,9 @@
 
 Intel SDE can be used for emulating CPU architecture, checking for AVX/SSE transitions, bad pointers and data misalignment, etc.
 
-Also supports debugging within emulation.
+It also supports debugging within emulation.
 
-In general the tool can be used for all kind of troubleshooting activities except performance analysis.
+In general, the tool can be used for all kinds of troubleshooting activities except performance analysis.
 
 See [Documentation](https://www.intel.com/content/www/us/en/developer/articles/tool/software-development-emulator.html) for more information
 
@@ -19,17 +19,24 @@ OV_CPU_BLOB_DUMP_FORMAT=TEXT OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution \
 
 - Running _cpuFuncTests_ on some old architecture, for example Sandy Bridge:
 
-`/path/to/sde -snd -- ./cpuFuncTests`
+```sh
+/path/to/sde -snd -- ./cpuFuncTests
+```
 
 - Count AVX/SSE transitions for the current host:
 
-`/path/to/sde -ast -- ./benchmark_app -m path/to/model.xml`
+```sh
+/path/to/sde -ast -- ./benchmark_app -m path/to/model.xml
+```
 
-> **NOTE**: Best way to check for AVX/SSE transitions is to run within Alder Lake emulation:
+> **NOTE**: The best way to check for AVX/SSE transitions is to run within Alder Lake emulation:
 
-`/path/to/sde -adl -- ./benchmark_app -m path/to/model.xml`
+```sh
+/path/to/sde -adl -- ./benchmark_app -m path/to/model.xml
+```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_cpu/docs/debug_capabilities.md b/src/plugins/intel_cpu/docs/debug_capabilities.md
index 6ae506fb4f6968..bffa9aff5fd9b6 100644
--- a/src/plugins/intel_cpu/docs/debug_capabilities.md
+++ b/src/plugins/intel_cpu/docs/debug_capabilities.md
@@ -1,19 +1,20 @@
-# CPU Plugin debug capabilities
+# CPU Plugin Debug Capabilities
 
-The page describes list of useful debug features, controlled by environment variables.
+The page describes a list of useful debug features, controlled by environment variables.
 
 They can be activated at runtime and might be used for analyzing issues, getting more context, comparing execution results, etc.
 
-To have CPU debug capabilities available at runtime the following CMake option should be used when building the plugin:
+To have CPU debug capabilities available at runtime, use the following CMake option when building the plugin:
 * `ENABLE_DEBUG_CAPS`. Default is `OFF`
 
-The following debug capabilities are available with the latest openvino:
+The following debug capabilities are available with the latest OpenVINO:
 
 - [Verbose mode](../src/docs/verbose.md)
 - [Blob dumping](../src/docs/blob_dumping.md)
 - [Graph serialization](../src/docs/graph_serialization.md)
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md b/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md
index 377792a6dc9ec1..169e6eab2255e8 100644
--- a/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md
+++ b/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md
@@ -135,7 +135,7 @@ class nodeB2,nodeB3,nodeB5,nodeB6,nodeB7,nodeB9 steel1
 ```
 ## Fusing Convolution and Sum Layers
 
-A combination of convolution, simple, and Eltwise layers with the sum operation results in a single layer called *Convolution*:  
+A combination of convolution, simple, and Eltwise layers with the sum operation results in a single layer called *Convolution*:
 
 ```mermaid
 flowchart TD
@@ -216,6 +216,7 @@ CPU plugin removes a Power layer from a topology if it has the following paramet
   - <b>offset</b> = 0
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md b/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md
index 263b043dd12d23..1cb302b9ab2f9f 100644
--- a/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md
+++ b/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md
@@ -1,4 +1,4 @@
-# Performance analysis using ITT counters
+# Performance Analysis Using ITT Counters
 
 ## Contents
 
@@ -21,8 +21,11 @@ For performance analysis, follow the steps below:
 
 ### Intel SEAPI
 
-#### Example of tool run:
-`python ~/tools/IntelSEAPI/runtool/sea_runtool.py -o trace -f gt ! ./benchmark_app -niter 1 -nireq 1 -nstreams 1 -api sync -m ./resnet-50-pytorch/resnest-50-pytorch.xml`
+#### Example of running the tool:
+
+```sh
+python ~/tools/IntelSEAPI/runtool/sea_runtool.py -o trace -f gt ! ./benchmark_app -niter 1 -nireq 1 -nstreams 1 -api sync -m ./resnet-50-pytorch/resnest-50-pytorch.xml
+```
 
 #### Mandatory parameters:
 * -o trace – output file name
@@ -34,8 +37,11 @@ Generated file can be opened with google chrome using "chrome://tracing" URL.
 
 ### Intel Vtune Profiler
 
-#### Example of tool run:
-`vtune -collect hotspots -k sampling-mode=hw -k enable-stack-collection=true -k stack-size=0 -k sampling-interval=0.5 -- ./benchmark_app -nthreads=1 -api sync -niter 1 -nireq 1 -m ./resnet-50-pytorch/resnet-50-pytorch.xml`
+#### Example of running the tool:
+
+```sh
+vtune -collect hotspots -k sampling-mode=hw -k enable-stack-collection=true -k stack-size=0 -k sampling-interval=0.5 -- ./benchmark_app -nthreads=1 -api sync -niter 1 -nireq 1 -m ./resnet-50-pytorch/resnet-50-pytorch.xml
+```
 
 #### Mandatory parameters:
 * -collect hotspots
@@ -49,9 +55,9 @@ Generated file can be opened with Vtune client.
 Use API defined in [openvino/itt](https://docs.openvinotoolkit.org/latest/itt_2include_2openvino_2itt_8hpp.html) module.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
  * [OpenVINO GPU Plugin](../README.md)
  * [Developer documentation](../../../../docs/dev/index.md)
- 
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/docs/runtime_parameters_cache.md b/src/plugins/intel_cpu/docs/runtime_parameters_cache.md
index 85ccea276cb2b0..5eee9fcac20cc0 100644
--- a/src/plugins/intel_cpu/docs/runtime_parameters_cache.md
+++ b/src/plugins/intel_cpu/docs/runtime_parameters_cache.md
@@ -1,22 +1,30 @@
-# CPU plugin runtime parameters cache
+# CPU Plugin Runtime Parameters Cache
 
 ## Checklist for the runtime cache implementation
-1. Determine what data will be cached. We usually use the Executor concept that represents a junction of the executable code, usually JIT generated kernel, with some precomputed algorithm parameters.
-2. Provide a key that uniquelly identifies the cached value as a funtion of dynamically changing parameters, i.e. shapes, dynamic input that determines the algorithm parameters, etc. To be used in a hash table, the key must have the following static interface:
+
+1. Determine what data will be cached.
+
+1. Determine what data will be cached. It is commonly recommended to use the Executor concept that represents a junction of the executable code, usually JIT generated kernel, with some precomputed algorithm parameters.
+
+2. Provide a key that uniquely identifies the cached value as a function of dynamically changing parameters, that is, shapes, dynamic input that determines the algorithm parameters, etc. To be used in a hash table, the key must have the following static interface:
    ```cpp
    struct KeyType {
        size_t hash() const;
        bool operator== () const;
    };
    ```
-3. Provide a builder, that is, a callable object of the following signature: 
+
+3. Provide a builder, that is, a callable object of the following signature:
    ```cpp
    ValueType build(const KeyType& key);
    ```
-   The `ValueType` is a type to be cached (e.g. shared pointer to Executor object). Remember that in the current cache implementation, a default constructed `ValueType()` object is considered empty, so it is better to use `std::shared_ptr` as the `ValueType`. The builder instantiates a specific type of cached entity from the `key`, thus the `key` completely defines the cached data. The builder is used to creat the `ValueType` object in case of cache miss.
-4. Refactor the specific implementation of the `prepareParams()` method to extract the cached object construction logic (e.g. the algorithm parameters recalculation and JIT kernel generation) into the builder.
+   The `ValueType` is a type to be cached (for example, a shared pointer to Executor object). Remember that in the current cache implementation, a default constructed `ValueType()` object is considered empty. Therefore, it is better to use `std::shared_ptr` as the `ValueType`. The builder instantiates a specific type of cached entity from the `key`, so the `key` completely defines the cached data. The builder is used to create the `ValueType` object in case of a cache miss.
+
+4. Refactor the specific implementation of the `prepareParams()` method to extract the cached object construction logic (for example, the algorithm parameters recalculation and JIT kernel generation) into the builder.
+
 5. Add the key generation code into the `prepareParams()` method to query the cache.
-6. Implement cache usage as the following:
+
+6. Implement cache usage as follows:
    ```cpp
    void preapareParams() override {
         ... //code that prepares parameters for the key
@@ -31,6 +39,7 @@
         execPtr = result.first; 
    }
    ```
+
 7. To provide smoke testing of these changes, add repeated shapes to the "target shapes" part of the corresponding single layer test definition:
     ```cpp
     { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
@@ -38,7 +47,7 @@
         {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}, {10, 10, 5}}}  // input 1
     },
     ```
-   It worth to mention that placing two identical target shapes one after another does not trigger the cache, since another optimization based on the fact that the shapes have not been changed takes place. For example, the following test definition does not properly test the cache:
+   **Note that placing two identical target shapes one after another does not trigger the cache,** since another optimization based on the fact that the shapes have not been changed takes place. For example, the following test definition does not properly test the cache:
     ```cpp
     { // the shape infer and params preparation stages will be skipped for the second target shapes combination since the shapes are not changed
         {{-1, -1, -1}, {{5, 5, 5}, {5, 5, 5}}}, // input 0
@@ -47,6 +56,7 @@
     ```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/README.md b/src/plugins/intel_gpu/README.md
index 40d33a173c49e0..d8b81154f9368e 100644
--- a/src/plugins/intel_gpu/README.md
+++ b/src/plugins/intel_gpu/README.md
@@ -4,7 +4,7 @@ GPU plugin in [OpenVINO toolkit](https://github.com/openvinotoolkit/openvino) su
 
 ## Key Contacts
 
-Please contact a member of [openvino-ie-gpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-gpu-maintainers) group, for assistance regarding GPU.
+For assistance regarding GPU, contact a member of [openvino-ie-gpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-gpu-maintainers) group.
 
 ## Components
 
@@ -32,29 +32,32 @@ GPU Plugin contains the following components:
 * [GPU plugin unit test](./docs/gpu_plugin_unit_test.md)
 
 ## Attached licenses
+
 GPU plugin uses 3<sup>rd</sup>-party components licensed under following licenses:
 - *googletest* under [Google License](https://github.com/google/googletest/blob/master/googletest/LICENSE)
 - *OpenCL™ ICD and C++ Wrapper under [Khronos™ License](https://github.com/KhronosGroup/OpenCL-CLHPP/blob/master/LICENSE.txt)
 - *RapidJSON* under [Tencent License](https://github.com/Tencent/rapidjson/blob/master/license.txt)
 
 ## Support
-Please report issues and suggestions
-[GitHub issues](https://github.com/openvinotoolkit/openvino/issues).
+
+To report issues and make suggestions, see [GitHub issues](https://github.com/openvinotoolkit/openvino/issues).
 
 ## How to Contribute
-We welcome community contributions to GPU plugin. If you have an idea how to improve the library:
+
+Community contributions to GPU plugin are highly welcome. If you have a suggestion on how to improve the library:
 
 - Share your proposal via
  [GitHub issues](https://github.com/openvinotoolkit/openvino/issues)
 - Ensure you can build the product and run all the tests with your patch
-- In the case of a larger feature, create a test
+- In case of a larger feature, create a test
 - Submit a [pull request](https://github.com/openvinotoolkit/openvino/pulls)
 
 We will review your contribution and, if any additional fixes or modifications
-are necessary, may provide feedback to guide you. When accepted, your pull
-request will be merged into our GitHub repository.
+are necessary, we may provide feedback to guide you. Once your pull request 
+has been approved, it will be merged into our GitHub repository.
 
 ## System Requirements
+
 GPU plugin supports Intel® HD Graphics, Intel® Iris® Graphics and Intel® Arc™ Graphics and is optimized for Gen9-Gen12LP, Gen12HP architectures
 
 GPU plugin currently uses OpenCL™ with multiple Intel OpenCL™ extensions and requires Intel® Graphics Driver to run.
diff --git a/src/plugins/intel_gpu/docs/basic_data_structures.md b/src/plugins/intel_gpu/docs/basic_data_structures.md
index 087ea86b4b0d0d..a11f8ab666ab6a 100644
--- a/src/plugins/intel_gpu/docs/basic_data_structures.md
+++ b/src/plugins/intel_gpu/docs/basic_data_structures.md
@@ -1,4 +1,4 @@
-# Basic data structures of GPU graph and overall flow
+# Basic Data Structures of GPU Graph and Overall Flow
 
 ## Overall graph data structure
 <a name="fig1"></a>
@@ -60,23 +60,23 @@ d1 ..> d2 : Dependency
 ```
 
 There are three levels of abstraction in the graph structures being used in the gpu plugin : *topology*, *program*, *network*. <br>
-The above <a href="#fig1">figure</a> presents the overall data structures. 
+The above <a href="#fig1">figure</a> presents the overall data structures.
 
-First, the original model should be presented as a corresponding *topology*, which is consisting of primitives and their connections. It can be regarded as a simple graph structure representing the original model.  
+First, the original model should be presented as a corresponding *topology*, which consists of primitives and their connections. It can be regarded as a simple graph structure representing the original model.
 
-Then the topology is to be converted to a *program*, which is consisting of *program_nodes* corresponding to the original primitives and their connections. 
+Then the topology is to be converted to a *program*, which consists of *program_nodes* corresponding to the original primitives and their connections.
 Here, the majority of the transformation and optimizations are performed on the *program*.
-Also, the *primitive_impl* is created for each *program_node* at this stage, which holds the selected kernels for each *program_node* and the required information to run the kernels such as work group sizes and kernel arguments, etc. The final source code of the kernels are decided and compiled at this stage, too.
-Note that a *program* is common for the streams, i.e., there is only one *program* created for all the streams. 
+Also, the *primitive_impl* is created for each *program_node* at this stage, which holds the selected kernels for each *program_node* and the required information to run the kernels, such as work group sizes and kernel arguments, etc. The final source code of the kernels is decided and compiled at this stage, too.
+Note that a *program* is common for the streams, that is, there is only one *program* created for all the streams.
 
-Once the *program* is finalized, then the *network* is built from the *program* for each stream. 
-A *network* is consisting of primitive instances (a.k.a *primitive_inst*) that contains the required memory allocations for the kernels. 
-Then finally we can run the *network* by running the network::execute().
+Once the *program* is finalized, the *network* is built from the *program* for each stream.
+A *network* consists of primitive instances (*primitive_inst*) that contain the required memory allocations for the kernels.
+Finally, you can run the *network* using the `network::execute()` method.
 
-The more detailed description of each component is to be described in the below sections. 
+A more detailed description of each component is described in the sections below.
 
 
-## primitive 
+## primitive
 ```cpp
 struct primitive { 
 ...
@@ -87,16 +87,16 @@ struct primitive {
 ...
 };
 ```
-A *primitive* is the primary representation of an operation in gpu plugin, which comprises a graph structure, i.e., the *topology*. A *primitive* is to be created for a layer operation in the original model and holds the basic information about the operation, such as required input, output, attributes, as well as its own id, a.k.a *primitive_id*. Here, the *primitive_id* is a unique string id assigned to each *primitive* throughout the processing. <br>
+A *primitive* is the primary representation of an operation in GPU plugin, which comprises a graph structure, that is, the *topology*. A *primitive* is to be created for a layer operation in the original model and holds the basic information about the operation, such as required input, output, attributes, as well as its own id (*primitive_id*). Here, the *primitive_id* is a unique string id assigned to each *primitive* throughout the processing. <br>
 
-The APIs of the available primitives can be found [here](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/include/intel_gpu/primitives).<br>
+See the APIs of the available [primitives](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/include/intel_gpu/primitives).<br>
 
-An example creation of a arg_max_min primitive:
+An example creation of a `arg_max_min` primitive:
 ```cpp
 cldnn::arg_max_min top_k_prim = cldnn::arg_max_min("top_k", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, "", padding(), data_types::f32);
 ```
 
-In GPU plugin, the *primitives* are converted from ngraph operations, which can be found [here](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops).
+In GPU plugin, the *primitives* are converted from ngraph [operations](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops).
 
 ## topology
 ```cpp
@@ -107,9 +107,9 @@ struct topology{
 };
 ```
 
-A *topology* is a graph structure consisting of *primitives* and their connections. Here a connection is defined by input primitives assigned to a primitive.  
+A *topology* is a graph structure consisting of *primitives* and their connections. Here, a connection is defined by input primitives assigned to a primitive.
 
-A simple example of creation of a topology, which is consisting of two poolings, one concatenation of the poolings, and a reorder primitive, is shown as follows:
+A simple example of creating a topology, which consists of two poolings, one concatenation of the poolings, and a reorder primitive, is as follows:
 ```cpp
 auto input0 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}});
 auto input1 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}});
@@ -127,9 +127,9 @@ topology topology(input_layout("input0", input0->get_layout()),
                   reorder("reorder", "concat", reorder_layout));
 ```
 
-In the above example, "pool0" is the *primitive_id* of the first pooling, and "input0" is the *primitive_id* of the input primitive of it. The latter parameters such as pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1} are other properties for pooling primitive, pooling_mode, tensor size, stride, respectively.
+In the example above, "pool0" is the *primitive_id* of the first pooling, and "input0" is the *primitive_id* of the input primitive of it. The  `pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}` parameters stand for other properties for pooling: primitive, pooling_mode, tensor size, stride, respectively.
 
-Note that topology is created from ngraph representation in the gpu plugin. Manual definition of a topology shown in the above snippet is usually for unittest purpose.
+Note that topology is created from ngraph representation in the GPU plugin. Manual definition of a topology shown in the snippet above is usually for the purpose of a unit test.
 
 ## program_node (impl)
 
@@ -147,14 +147,15 @@ struct program_node {
 ...
 };
 ```
-A program is consisting of program_nodes which are created from primitives. ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L353)) A program_node is created by a factory for each primitive type, i.e., primitive_type, which is associated to each primitive as type ([link](https://github.com/openvinotoolkit/openvino/blob/173f328c53d39dd42ecdb9de9e04f9d2c266683f/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp#L79)). (Note that this primitive_type is used to create primitive_inst or call choose_impl too.) 
+A program consists of *program_nodes* which are created from primitives. ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L353)) A *program_node* is created by a factory for each *primitive type*, that is, *primitive_type*, which is associated to each primitive as a type ([link](https://github.com/openvinotoolkit/openvino/blob/173f328c53d39dd42ecdb9de9e04f9d2c266683f/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp#L79)). Note that this *primitive_type* is used to create *primitive_inst* or call *choose_impl* too.
 
-Basically a program_node holds the following information which is to be decided throughout the transformation / optimization processes in a program:
-* layout : output layout of a program_node. ([impl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp))
-* dependencies : a list of program_nodes whose outputs are used by the current program_node as the inputs 
-* memory dependencies : a list of program_nodes, the live ranges of the outputs of them overlaps with that of the current program_node
-* fused operations : fused operations to the current program_node
-* selected impl : The primitive_impl object which holds the information for the selected kernel required to run it, such as the selected kernels, work group size, etc. Also this object has the methods to set kernel arguments for a primitive_inst and execute the kernel by enqueueing it to the command queue. 
+A *program_node* holds the following information which is to be decided throughout the transformation / optimization processes in a program:
+
+* layout: output layout of a *program_node*. ([impl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp))
+* dependencies: a list of *program_nodes*, the outputs of which are used by the current *program_node* as the inputs
+* memory dependencies : a list of *program_nodes*, the live ranges of their outputs of them overlap with that of the current *program_node*
+* fused operations: fused operations to the current *program_node*
+* selected impl: The *primitive_impl* object which holds the information for the selected kernel required to run it, such as the selected kernels, work group size, etc. Also, this object has the methods to set kernel arguments for a *primitive_inst* and execute the kernel by enqueueing it to the command queue.
 
 ## program (impl)
 
@@ -174,16 +175,16 @@ struct program {
 ```
 The major tasks that are done while building a program are as follows:
 ([ref](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L433))
-* Init graph : Create an initial program consisting of program_nodes built from a given topology 
-* Optimization (Major optimizations will be dealt with from another section TBD)  
-   * pre-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L474)): Optimizations done before graph_compilation. Notable passes are as follows: 
-        * prepare_primitive_fusing : decision of fusing 
-        * reorder_inputs : decision of preferred layout / impl (ocl vs onednn) and adding reorders w.r.t the decision
-   * post-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L437)) Optimizations done after graph_compilation <br>
-        * post_optimize_weights : Add reorder for the weights toward preferred formats (as generic nodes) <br>
-        * propagate_constants : Transfer and reorder original weight data to the generic_nodes created at post_optimize_weights. Here, note that the constant propagation is doing weight reorder by running actual network (w/ is_internal = true). To this end, a temporal program is created/built/run within this pass. <br>
-
-* Kernel selection and graph compilations ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L436)) : Select best kernel for the program_node and create the impl (i.e., primitive_impl), and collect the kernel source code strings to the kernels_cache. 
+* Init graph: Create an initial program consisting of *program_nodes* built from a given topology.
+* Optimization (Major optimizations will be dealt with from another section TBD)
+   * pre-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L474)): Optimizations done before *graph_compilation*. Notable passes are as follows:
+        * *prepare_primitive_fusing*: decision of fusing
+        * *reorder_inputs*: decision of preferred layout / impl (ocl vs onednn) and adding reorders w.r.t the decision
+   * post-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L437)) Optimizations done after *graph_compilation* <br>
+        * *post_optimize_weights*: Add reorder for the weights toward preferred formats (as generic nodes) <br>
+        * *propagate_constants*: Transfer and reorder original weight data to the *generic_nodes* created at *post_optimize_weights*. Note that the constant propagation is doing a weight reorder by running the actual network (w/ is_internal = true). To this end, a temporal program is created/built/run within this pass. <br>
+
+* Kernel selection and graph compilations ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L436)): Select best kernel for the *program_node* and create the impl (that is, *primitive_impl*), and collect the kernel source code strings to the kernels_cache.
 * Kernel compilation ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L451)): JIT compilation of the collected kernels. Currently 9 kernels are combined as a batch and compiled at a time. Also the batches are compiled in parallel. See [here](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp#L400).
 
 ## primitive_inst (impl)
@@ -203,12 +204,12 @@ class primitive_inst {
 ...
 };
 ```
-Once all processing at a program level is finished, a network is to be built from the program. 
-primitive_inst is the basic component comprising a network. 
-While each primitive_inst object is still associated to the corresponding  program_node, it holds the required memory objects such as output memory objects and intermediate memory objects that are to be used by that node. A brief description for the two kinds of memory allocated for a primitive_inst is as follows:
+Once all processing at a program level has been finished, a network is to be built from the program.
+The *primitive_inst* is the basic component comprising a network.
+While each *primitive_inst* object is still associated with the corresponding *program_node*, it holds the required memory objects, such as output memory objects and intermediate memory objects that are to be used by that node. A brief description of the two kinds of memory allocated for a *primitive_inst* is as follows:
 
-* output memory : An output memory of a primitive_inst is allocated at the creation of each primitive_inst ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L210)), unless its output is reusing the input memory or the node is a mutable data to be used as a 2nd output. The general output tensors are allocated by the memory pool, so that the memory could be reused by other nodes when it is not needed. (Note that constants data are not reusable and should retain the own memory, so that they could be shared by multiple streams. More descriptions about memory pool will be given by dedicated section (TBD)).
-* intermediate memory ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L215)): Some kernels requires intermediate memories in addition to the input/output memories such as [detection_output](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp#L155). The allocation happens after all primitive_insts are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a processing_order to use the predecessors' allocation information while the creation of primitive_inst is done in a order sorted by memory_size. 
+* output memory: An output memory of a *primitive_inst* is allocated at the creation of each *primitive_inst* ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L210)), unless its output is reusing the input memory or the node is a mutable data to be used as a second output. The general output tensors are allocated by the memory pool, so that the memory could be reused by other nodes when it is not needed. Note that constants data is not reusable and should retain its own memory so that it could be shared by multiple streams. A more detailed description of the memory pool will be given in the dedicated section (TBD).
+* intermediate memory ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L215)): Some kernels require intermediate memories in addition to the input/output memories such as [detection_output](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp#L155). The allocation happens after all *primitive_insts* are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a *processing_order* to use the predecessors' allocation information while the creation of *primitive_inst* is done in an order sorted by *memory_size*.
 
 ## network (impl)
 ```cpp
@@ -230,14 +231,15 @@ struct network {
     void allocate_primitives();
 };
 ```
-When a network is built, the comprising primitives are allocated and dependencies among them are set ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L259)). 
+When a network is built, the comprising primitives are allocated and dependencies among them are set ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L259)).
+
+The major processes, done while a network is executed, are as follows ([impl]( https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L663)):
+* set arguments of the primitives (that is, set the [kernel_args](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp) required for running the kernels such as input/output memory address)
 
-The major processes done while a network is executed are as follows ([impl]( https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L663)) :
-* set arguments of the primitives (i.e., set the [kernel_args](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp) required for running the kernels such as input/output memory address)
+* [execute primitives](https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L849): Execute each primitive, that is, enqueue the kernels to the context queue.
 
-* [execute primitives](https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L849) : Execute each primitives, i.e., enqueue the kernels to the context queue. 
+## See Also
 
-## See also
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/execution_of_inference.md b/src/plugins/intel_gpu/docs/execution_of_inference.md
index 66a2a2ed2de913..6433ca633efe96 100644
--- a/src/plugins/intel_gpu/docs/execution_of_inference.md
+++ b/src/plugins/intel_gpu/docs/execution_of_inference.md
@@ -1,31 +1,33 @@
 # Execution of Inference
 
-Network execution happens when user calls `inferRequest->infer()` or `inferRequest->start_async()`. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/samples/cpp/benchmark_app/main.cpp#L929)
+Network execution is triggered when the  `inferRequest->infer()` or `inferRequest->start_async()` methods are called. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/samples/cpp/benchmark_app/main.cpp#L929)
 
-In high level, all we need to do is enqueuing OCL kernels with buffers. For that purpose, we need to find the `cldnn::network` instance as it contains the required buffers for execution. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/basic_data_structures.md#network-impl) `CPUStreamExecutor` is holding streams and the stream corresponds to the `cldnn::network` structure. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/inference/src/threading/ie_cpu_streams_executor.cpp#L263)
+At high level, all that is required to do is enqueuing OCL kernels with buffers. For that purpose, you need to find the `cldnn::network` instance, as it contains the required buffers for execution. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/basic_data_structures.md#network-impl) `CPUStreamExecutor` is holding streams, and the stream corresponds to the `cldnn::network` structure. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/inference/src/threading/ie_cpu_streams_executor.cpp#L263)
 
 The main body of network execution is `cldnn::network::execute_impl`. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L663) In this function, `set_arguments()` is called to set OpenCL arguments and `execute_primitive` is called to enqueue kernels to OCL queue.
-In case of synchronous API call(i.e. `inferRequest->infer()`), waiting for completion of kernels is also required. It is called from `cldnn::network_output::get_memory()` function. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp#L31)
+In case of a synchronous API call (that is, `inferRequest->infer()`), waiting for the completion of kernels is also required. It is called from the `cldnn::network_output::get_memory()` function. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp#L31)
 
 ## Optimized-out node
+
 During graph compilation [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/graph_optimization_passes.md), some nodes may be optimized out.
 
-For example, concat operation may be executed _implicitly_, or in other words, concat may be _optimized out_. Implicit concat is possible when the input of concat can put the output tensor directly into the result tensor of concat.
+For example, concat operation may be executed _implicitly_, or in other words, concat may be _optimized out_. Implicit concat is possible when the input of concat can put the output tensor directly into the resulting tensor of concat.
 
-In such case, we don't remove the node in the graph for integrity of node connection. Concat layer is just marked as **optimized-out** and not executed during runtime. [(src)](https://github.com/openvinotoolkit/openvino/blob/dc6e5c51ee4bfb8a26a02ebd7a899aa6a8eeb239/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp#L155)
+In such case, you do not remove the node in the graph for the integrity of the node connection. Concat layer is just marked as **optimized-out** and not executed during runtime. [(src)](https://github.com/openvinotoolkit/openvino/blob/dc6e5c51ee4bfb8a26a02ebd7a899aa6a8eeb239/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp#L155)
 
 ## Dumping layer in/out buffer during execution
-`cldnn::network::execute_impl` also contains some logic to dump layer in/out buffers for debugging purpose. As it is related to memory usage, it deserves some description, too.
+The `cldnn::network::execute_impl` function also contains some logic to dump layer in/out buffers for debugging purposes. As it is related to memory usage, it deserves some description, too.
 
-In order to dump buffers, we need to wait for the moment that the kernel is about to be called(for source buffer) or just called(for destination buffer). In other moments, we don't have the layer's buffer as the buffers are reused from memory pool. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md#memory-dependency-and-memory-pool)
+To dump buffers, you need to wait for the moment that the kernel is about to be called (for source buffer) or just called (for destination buffer). In other moments, you do not have the layer's buffer as the buffers are reused from the memory pool. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md#memory-dependency-and-memory-pool)
 
-`get_stream().finish()` is called firstly as we need to be synchronous with kernel execution. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L712) Then we can access the buffer. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L114) This access varies depending on the kind of buffer. If it is `usm_host` or `usm_shared`, it is just accessed directly. If it is `usm_device`, it is accessed after copying the data into host memory because host cannot access `usm_device` directly. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L312) If it is ocl memory, we map this into host memory. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L46) 
+The `get_stream().finish()` function is called first as you need to be synchronous with kernel execution. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L712). Then, you can access the buffer. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L114). This access varies depending on the kind of the buffer. If it is `usm_host` or `usm_shared`, it is just accessed directly. If it is `usm_device`, it is accessed after copying the data into host memory because the host cannot access `usm_device` directly. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L312) If it is OCL memory, you map this into host memory. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L46)
 
 Typical network execution happens with `usm_host` for network input and output and `usm_device` for the buffers inside the network.
 
-For usage of this dumping feature, please see [link](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#layer-inout-buffer-dumps).
+For usage of this dumping feature, see this [link](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#layer-inout-buffer-dumps).
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/gpu_debug_utils.md b/src/plugins/intel_gpu/docs/gpu_debug_utils.md
index 1acc57867782c5..33c07a06cc4280 100644
--- a/src/plugins/intel_gpu/docs/gpu_debug_utils.md
+++ b/src/plugins/intel_gpu/docs/gpu_debug_utils.md
@@ -1,21 +1,23 @@
-# GPU plugin debug utils
+# GPU Plugin Debug Utils
 
 This document is a list of useful debug features / tricks that might be used to find root cause of performance / functional issues. Some of them
 are available by default, but some others might require plugin recompilation.
 
 ## Debug Config
-`Debug_config` is an infra structure that contains number of easy-to-use debugging features. It has various control parameters. You can check list of parameters from the source code `cldnn::debug_configuration`.
+
+`Debug_config` is an infrastructure that contains several easy-to-use debugging features. It has various control parameters, which you can check from the source code `cldnn::debug_configuration`.
 
 ### How to use it
-First, this feature should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When openvino is released, it is turned off by default.
-The parameters should be set from environment variable when calling inference engine API.
+
+First, this feature should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When OpenVINO is released, it is turned off by default.
+The parameters should be set from an environment variable when calling inference engine API.
 
 ```
 $ OV_GPU_Verbose=1 ./benchmark_app ...      # Run benchmark_app with OV_GPU_Verbose option
 $ OV_GPU_DumpLayersPath="cldnn/" ./benchmark_app ...   # Run benchmark_app and store intermediate buffers into cldnn/ directory.
 ```
 
-For Windows OS, please use below syntax.
+For Windows OS, use the following syntax:
 
 ```
 Windows Power Shell:
@@ -28,38 +30,42 @@ Windows cmd.exe:
 ```
 
 ### Options syntax
+
 Plugin is able to parse different naming styles for debug options:
 1. `OV_GPU_SOME_OPTION`
 2. `OV_GPU_SomeOption`
 
 Behavior when both versions are specified is not defined.
 
-Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. In case if an option is set twice with different prefixes, then `OV_GPU` has higher priority.
-
-### List of parameters (There are actually more than this, please see OV_GPU_Help result)
-
-* `OV_GPU_Help`: Show help message of debug config.
-* `OV_GPU_Verbose`: Verbose execution. Currently, Verbose=1 and 2 are supported.
-* `OV_GPU_PrintMultiKernelPerf`: Print kernel latency for multi-kernel primitives. This is turned on by setting 1. Execution time is printed.
-* `OV_GPU_DisableUsm`: Disable the usage of usm (unified shared memory). This is turned on by setting 1.
-* `OV_GPU_DisableOnednn`: Disable onednn for discrete GPU (no effect for integrated GPU)
-* `OV_GPU_DumpGraphs`: Dump optimized graph into the path that this variable points. This is turned on by setting the destination path into this variable.
-* `OV_GPU_DumpSources`: Dump opencl sources
-* `OV_GPU_DumpLayersPath`: Enable intermediate buffer dump and store the tensors. This is turned on by setting the destination path into this variable. You can check the exact layer name from `OV_GPU_Verbose=1`.
-* `OV_GPU_DumpLayers`: Dump intermediate buffers only for the layers that this variable specifies. Multiple layers can be specified with space delimiter. Dump feature should be enabled through `OV_GPU_DumpLayersPath`
-* `OV_GPU_DumpLayersResult`: Dump output buffers of result layers only
-* `OV_GPU_DumpLayersDstOnly`: When dumping intermediate buffer, dump destination buffer only. This is turned on by setting 1.
-* `OV_GPU_DumpLayersLimitBatch`:        Limit the size of batch to dump
-* `OV_GPU_DryRunPath`:                  Dry run and serialize execution graph into the specified path
-* `OV_GPU_BaseBatchForMemEstimation`:   Base batch size to be used in memory estimation
-* `OV_GPU_AfterProc`:                   Run inference after the specified process PIDs are finished, separated by space. Supported on only on linux.
-* `OV_GPU_SerialCompile`:               Serialize creating primitives and compiling kernels
-* `OV_GPU_ForceImplType`:               Force implementation type of a target primitive or layer. [primitive or layout_name]:[impl_type] For primitives, fc:onednn, fc:ocl, do:cpu, do:ocl, reduce:ocl and reduce:onednn are supported
-* `OV_GPU_MaxKernelsPerBatch`:          Maximum number of kernels in a batch during compiling kernels
+Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. When an option is set twice with different prefixes, then `OV_GPU` has higher priority.
+
+### List of parameters 
+
+This is a part of the full list. To get all parameters, see OV_GPU_Help result.
+
+* `OV_GPU_Help`: Shows help message of debug config.
+* `OV_GPU_Verbose`: Verbose execution. Currently, `Verbose=1` and `2` are supported.
+* `OV_GPU_PrintMultiKernelPerf`: Prints kernel latency for multi-kernel primitives. This is turned on by setting `1`. Execution time is printed.
+* `OV_GPU_DisableUsm`: Disables the usage of usm (unified shared memory). This is turned on by setting `1`.
+* `OV_GPU_DisableOnednn`: Disables oneDNN for discrete GPU (no effect for integrated GPU).
+* `OV_GPU_DumpGraphs`: Dumps an optimized graph into the path that this variable points. This is turned on by setting the destination path into this variable.
+* `OV_GPU_DumpSources`: Dumps openCL sources
+* `OV_GPU_DumpLayersPath`: Enables intermediate buffer dump and store the tensors. This is turned on by setting the destination path into this variable. You can check the exact layer name from `OV_GPU_Verbose=1`.
+* `OV_GPU_DumpLayers`: Dumps intermediate buffers only for the layers that this variable specifies. Multiple layers can be specified with a space delimiter. Dump feature should be enabled through `OV_GPU_DumpLayersPath`.
+* `OV_GPU_DumpLayersResult`: Dumps output buffers of result layers only.
+* `OV_GPU_DumpLayersDstOnly`: When dumping intermediate buffer, dumps destination buffer only. This is turned on by setting `1`.
+* `OV_GPU_DumpLayersLimitBatch`:        Limits the size of a batch to dump.
+* `OV_GPU_DryRunPath`:                  Dry runs and serializes the execution graph into the specified path.
+* `OV_GPU_BaseBatchForMemEstimation`:   Base batch size to be used in memory estimation.
+* `OV_GPU_AfterProc`:                   Runs inference after the specified process PIDs are finished, separated by space. Supported only on Linux.
+* `OV_GPU_SerialCompile`:               Serializes creating primitives and compiling kernels.
+* `OV_GPU_ForceImplType`:               Forces implementation type of a target primitive or a layer. [primitive or layout_name]:[impl_type] For primitives, `fc:onednn`, `fc:ocl`, `do:cpu`, `do:ocl`, `reduce:ocl` and `reduce:oneDNN` are supported
+* `OV_GPU_MaxKernelsPerBatch`:          Maximum number of kernels in a batch during compiling kernels.
 
 ## Dump execution graph
-The execution graph (also known as runtime graph) is a device specific graph after all transformations applied by the plugin. It's a very useful
-feature for performance analysis and it allows to find a source of performance regressions quickly. Execution graph can be retrieved from the plugin
+
+The execution graph (also known as a runtime graph) is a device-specific graph after all transformations applied by the plugin. It is a very useful
+feature for performance analysis and it allows finding a source of performance regressions quickly. The execution graph can be retrieved from the plugin
 using `GetExecGraphInfo()` method of `InferenceEngine::ExecutableNetwork` and then serialized as usual IR:
 ```cpp
     ExecutableNetwork exeNetwork;
@@ -68,8 +74,8 @@ using `GetExecGraphInfo()` method of `InferenceEngine::ExecutableNetwork` and th
     execGraphInfo.serialize("/path/to/serialized/exec/graph.xml");
 ```
 
-The capability to retrieve execution graph and store it on the disk is integrated into `benchmark_app`. The execution graph can be simply dumped
-by setting additional parameter `-exec_graph_path exec_graph.xml` for `benchmark_app`. Output `xml` file has a format similar to usual IR, but contains
+The capability to retrieve the execution graph and store it on the disk is integrated into `benchmark_app`. The execution graph can be simply dumped
+by setting an additional parameter `-exec_graph_path exec_graph.xml` for `benchmark_app`. Output `xml` file has a format similar to usual IR, but contains
 execution nodes with some runtime info such as:
 - Execution time of each node
 - Mapping between nodes in final device specific graph and original input graph operations
@@ -78,7 +84,7 @@ execution nodes with some runtime info such as:
 - Primitive type
 - Inference precision
 
-Typical node in GPU execution graph looks as follows:
+A typical node in GPU execution graph looks as follows:
 ```
 <layer id="0" name="convolution" type="Convolution">
     <data execOrder="1" execTimeMcs="500" originalLayersNames="convolution,relu" outputLayouts="b_fs_yx_fsv16" outputPrecisions="FP16" primitiveType="convolution_gpu_bfyx_to_bfyx_f16" />
@@ -101,24 +107,24 @@ Typical node in GPU execution graph looks as follows:
 </layer>
 ```
 
-Most of the data here is very handy for the performance analysis. For example, for each node you can check that:
-- Nodes fusion works as expected on given models (i.e. some node is missing in execution graph and it's name is a part of `originalLayersNames` list for some other node)
+Most of the data here is very handy for performance analysis. For example, for each node you can check whether:
+- Nodes fusion works as expected on given models (that is, some node is missing in the execution graph and its name is a part of `originalLayersNames` list for some other node)
 - Input and output layouts of a node are optimal in each case
 - Input and output precisions are valid in each case
-- The node used expected kernel for execution
-- And the most important: actual execution time of each operation
+- The node used the expected kernel for execution
+- And most important: the actual execution time of each operation
 
 This graph can be visualized using Netron tool and all these properties can be analyzed there.
 
-Note: execution time collection for each primitive requires `CONFIG_KEY(PERF_COUNT)` to be enabled (`benchmark_app` does it automatically), thus the overall model execution time is usually much worse in such use cases.
+> **NOTE**: execution time collection for each primitive requires `CONFIG_KEY(PERF_COUNT)` to be enabled (`benchmark_app` does it automatically). Therefore, the overall model execution time is usually much worse in such use cases.
 
 ## Performance counters
 
-This feature is a simplified version of execution graph as it provides much less information, but it might be more suitable for quick analysis and some kind of
+This feature is a simplified version of the execution graph as it provides much less information, but it might be more suitable for quick analysis and some kind of
 processing with scripts.
 
 Performance counters can be retrieved from each `InferenceEngine::InferRequest` object using `getPerformanceCounts()` method. This feature is also integrated
-into `benchmark_app` and the counters can be printed to cout using `-pc` parameter.
+into `benchmark_app` and the counters can be printed to count using `-pc` parameter.
 
 The format looks as follows:
 
@@ -135,17 +141,16 @@ relu                OPTIMIZED_OUT  layerType: ReLU               realTime: 0
 Total time: 53877 microseconds
 ```
 
-So it allows to quickly check execution time of some operation on the device and make sure that correct primitive is used. Also, the output can be easily
-converted into .csv format and then used to collect any kind of statistics (e.g. execution time distribution by layer types).
+So it allows you to quickly check the execution time of some operation on the device and make sure that the correct primitive is used. Also, the output can be easily converted into the *.csv* format and then used to collect any kind of statistics (for example, execution time distribution by layer types).
 
 ## Graph dumps
 
-intel_gpu plugin allows to dump some info about intermediate stages in graph optimizer.
+*Intel_GPU* plugin allows you to dump some info about intermediate stages in the graph optimizer.
 
-* You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, please see [link](#debug-config).
+* You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, see the [link](#debug-config).
 
-* Alternative, you can also enable the dumps from the application source code:
-clDNN plugin has the special internal config option `graph_dumps_dir` which can be set from the user app via plugin config:
+* Alternatively, you can also enable the dumps from the application source code:
+clDNN plugin has the special internal config option - `graph_dumps_dir`, which can be set from the user app via plugin config:
 ```cpp
 Core ie;
 std::map<std::string, std::string> device_config;
@@ -153,7 +158,7 @@ device_config[CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR)] = "/some/existing/path/";
 ie.SetConfig(device_config, "GPU");
 ```
 
-For each stage it dumps:
+For each stage, it dumps:
 ```
 - cldnn_program_${program_id}_${stage_id}_${stage_name}.graph - graph saved in dot format which can be visualized via graphviz tool
 - cldnn_program_${program_id}_${stage_id}_${stage_name}.info - graph in text format
@@ -162,16 +167,16 @@ For each stage it dumps:
 - ${program_id}_${stage_id}_${stage_name}.xml - graph in a format of execution graph
 ```
 
-Main graph usually has `program_id = 0`, graphs with other `program_id` values are usually created internally for constant propagation or some other purposes.
+The main graph usually has `program_id = 0`. Graphs with other `program_id` values are usually created internally for constant propagation or some other purposes.
 
 ## Sources dumps
 
-Since intel_gpu source tree contains only *templates* of the OpenCL™ kernels, it's quite important to get full kernels source code.
+Since *Intel_GPU* source tree contains only *templates* of the OpenCL™ kernels, it is quite important to get full kernels source code.
 
-* You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, please see [link](#debug-config).
+* You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, see [link](#debug-config).
 
 * You can also dump OpenCL source code by changing OpenVINO source code:
-clDNN plugin has the special internal config option `sources_dumps_dir` which can be set from the user app via plugin config:
+clDNN plugin has the special internal config option - `sources_dumps_dir`, which can be set from the user app via plugin config:
 ```cpp
 Core ie;
 std::map<std::string, std::string> device_config;
@@ -184,12 +189,12 @@ When this key is enabled, the plugin dumps multiple files with the following nam
 clDNN_program_${program_id}_part_${bucket_id}.cl
 ```
 
-Note: `program_id` here might differ from `program_id` for the graph dumps as it's just a static counter for enumerating incoming programs.
+> **Note**: `program_id` here might differ from `program_id` for the graph dumps, as it is just a static counter for enumerating incoming programs.
 
-Each file contains a bucket of kernels that are compiled together. In case of any compilation errors, intel_gpu plugin will append compiler output
-in the end of corresponding source file.
+Each file contains a bucket of kernels that are compiled together. In case of any compilation errors, *Intel_GPU* plugin will append compiler output
+to the end of the corresponding source file.
 
-If you want to find some specific layer, then you'll need to use Debug/RelWithDebInfo build or modify base jitter method to append `LayerID` in release build:
+To find a specific layer, use "Debug/RelWithDebInfo" build or modify the base jitter method to append `LayerID` in the release build:
 ```cpp
 // inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
 JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) const {
@@ -200,19 +205,19 @@ JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) c
 }
 ```
 
-When source is dumped, it actually contains huge amount of macros(`#define`). For readability, you can run c preprocessor to apply the macros.
+When the source is dumped, it contains a huge amount of macros(`#define`). For readability, you can run *c preprocessor* to apply the macros.
 
 `$ cpp dumped_source.cl > clean_source.cl`
 
 
 ## Layer in/out buffer dumps
 
-In some cases you might want to get actual values in each layer execution to compare it with some reference blob. In order to do that we have
-`OV_GPU_DumpLayersPath` option in debug config. For the usage of debug config, please see [link](#debug-config).
+In some cases, you might want to get actual values in each layer execution to compare it with some reference blob. To do that, choose the
+`OV_GPU_DumpLayersPath` option in debug config. For the usage of debug config, see [link](#debug-config).
 
-As a prerequisite, enable ENABLE_DEBUG_CAPS from cmake configuration.
+As a prerequisite, enable `ENABLE_DEBUG_CAPS` from the cmake configuration.
 
-Then, check runtime layer name by executing benchmark_app with OV_GPU_Verbose=1. It is better to be checked with this than through IR because this may be slightly different. OV_GPU_Verbose=1 will show log of execution of each layer.
+Then, check the runtime layer name by executing *benchmark_app* with `OV_GPU_Verbose=1`. It is better to check it with `OV_GPU_Verbose=1` than through IR because this may be slightly different. `OV_GPU_Verbose=1` will show the log of execution of each layer.
 
 ```
 # As a prerequisite, enable ENABLE_DEBUG_CAPS from cmake configuration.
@@ -221,30 +226,31 @@ export OV_GPU_DumpLayers="layer_name_to_dump1 layer_name_to_dump2"
 export OV_GPU_DumpLayersDstOnly=1              # Set as 1 when you want to dump dest buff only
 ```
 
-Dump files have the following naming:
+Dump files are named in the following convention:
 ```
 ${layer_name_with_underscores}_${src/dst}_${port_id}.txt
 ```
 
-Each file contains single buffer in common planar format (`bfyx`, `bfzyx` or `bfwzyx`) where each value is stored on a separate line. The first line in the file constains buffer description, e.g:
+Each file contains a single buffer in a common planar format (`bfyx`, `bfzyx`, or `bfwzyx`), where each value is stored on a separate line. The first line in the file contains a buffer description, for example:
 ```
 shape: [b:1, f:1280, x:1, y:1, z:1, w:1, g:1] (count: 1280, original format: b_fs_yx_fsv16)
 ```
 
-For accuracy troubleshoot, you may want to compare the GPU plugin result against CPU plugin result. For CPU dump, see [Blob dumping](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/src/docs/blob_dumping.md)
+For troubleshooting the accuracy, you may want to compare the results of GPU plugin and CPU plugin. For CPU dump, see [Blob dumping](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/src/docs/blob_dumping.md)
 
 
-## Run int8 model on gen9 HW
+## Run int8 model on Gen9 HW
 
-As gen9 hw doesn't have hardware acceleration, low precision transformations are disabled by default, thus quantized networks are executed in full precision (fp16 or fp32) with explicit execution of quantize operations.
-If you don't have gen12 HW, but want to debug network's accuracy or performance of simple operations (which doesn't require dp4a support), then you can enable low precision pipeline on gen9 using one of the following ways:
-1. Add `{PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::YES}` option to the plugin config
+As Gen9 HW does not have hardware acceleration, low-precision transformations are disabled by default. Therefore, quantized networks are executed in full precision (FP16 or FP32), with explicit execution of quantize operations.
+If you do not have Gen12 HW, but want to debug the network's accuracy or performance of simple operations (which does not require dp4a support), then you can enable low precision pipeline on Gen9, with one of the following approaches:
+1. Add `{PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::YES}` option to the plugin config.
 2. Enforce `supports_imad = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp#L226)
 3. Enforce `conf.enableInt8 = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L366)
 
-After that the plugin will run exactly the same scope of transformations as on gen12HW and generate similar kernels (small difference is possible due to different EUs count)
+After that, the plugin will run exactly the same scope of transformations as on Gen12 HW and generate similar kernels (a small difference is possible due to different EUs count).
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/gpu_kernels.md b/src/plugins/intel_gpu/docs/gpu_kernels.md
index 176300fa04c833..4c675717103177 100644
--- a/src/plugins/intel_gpu/docs/gpu_kernels.md
+++ b/src/plugins/intel_gpu/docs/gpu_kernels.md
@@ -1,18 +1,18 @@
-# GPU kernels implementation overview
+# GPU Kernels Implementation Overview
 
 As mentioned in [GPU plugin structure](./source_code_structure.md), kernels for GPU plugin are located in `src/plugins/intel_gpu/src/kernel_selector` folder.
 
-For each operation we usually have multiple kernels that can support different parameters and/or optimized for different scenarios.
+For each operation, there are usually multiple kernels that can support different parameters and/or are optimized for different scenarios.
 
 Each operation has 3 major entities in kernel selector:
  - Operation specific `kernel_selector` instance
  - Operation parameters descriptor
  - Kernels itself with a set of heuristics inside for optimal selection
 
- ## Kernel selector instance
-For each operation we create kernel_selector class derived from `kernel_selector_base`. Basically, this class is needed to specify available kernels
-for given operation. Each kernel selector is used as singleton. For example:
+## Kernel selector instance
 
+For each operation, you create `kernel_selector` class derived from `kernel_selector_base`. Basically, this class is needed to specify available kernels
+for a given operation. Each kernel selector is used as a singleton. For example:
 
 ```cpp
 class mvn_kernel_selector : public kernel_selector_base {
@@ -57,7 +57,7 @@ auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_para
 
 ## Operation parameters
 
-The parameters of operation for kernel_selector are defined in corresponding `${op_name}_params` class which is derived from `base_params`. For example:
+The parameters of operation for `kernel_selector` are defined in corresponding `${op_name}_params` class which is derived from `base_params`. For example:
 ```cpp
 struct mvn_params : public base_params {
     mvn_params() : base_params(KernelType::MVN) {}
@@ -79,9 +79,9 @@ struct mvn_params : public base_params {
 };
 ```
 
-The derived class should parameterize base class with specific `KernelType` and add operation-specific parameters. The only method that must be implemented
-is `GetParamsKey()` which is used as a quick check for kernels applicability for current parameters, i.e. we take `ParamsKey` object calculated for input
-operation parameters and `ParamsKey` object for each kernel, so we can compare them and discard the kernels that don't support current parameters.
+The derived class should parameterize base class with a specific `KernelType` and add operation-specific parameters. The only method that must be implemented
+is `GetParamsKey()` which is used as a quick check for kernels applicability for current parameters. In other words, you take a `ParamsKey` object calculated for input
+operation parameters and a `ParamsKey` object for each kernel. Then, you can compare them and discard the kernels that do not support current parameters.
 `ParamsKey` is implemented as a set of bit masks, so the applicability check is quite simple:
 ```cpp
 const ParamsKey implKey = some_implementation->GetSupportedKey();
@@ -97,15 +97,15 @@ if (!((implKey.mask & paramsKey.mask) == paramsKey.mask))
 
 Each kernel must specify the following things:
 - Input parameters checks
-  - `GetSupportedKey()` method implementation which returns `ParamsKey` object for current implementation
-  - `Validate()` method that do more complex checks (optional)
-- Dispatch data (global/local workgroup sizes, scheduling algorithm, etc)
+  - `GetSupportedKey()` method implementation, which returns `ParamsKey` object for current implementation.
+  - `Validate()` method, that does more complex checks (optional).
+- Dispatch data (global/local workgroup sizes, scheduling algorithm, etc.)
 - Kernel name - must be passes to base class c-tor
 - Kernel arguments specification - description of each argument in corresponding OpenCL™ kernel
-- Additional JIT constants required for kernel - set of macro definitions that must be added to thi kernel template to make full specialization for given params
-- Supported fused operations (if any) - a list of supported operations that can be fused into current kernel
+- Additional JIT constants required for kernel - set of macro definitions that must be added to the kernel template to make full specialization for given params
+- Supported fused operations (if any) - a list of supported operations that can be fused into the current kernel.
 
-Let's have a look at the key methods of each kernel implementation:
+Key methods of each kernel implementation are as follows:
 
 ```cpp
 class MVNKernelRef : public MVNKernelBase {
@@ -132,6 +132,7 @@ protected:
 ```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/gpu_memory_formats.md b/src/plugins/intel_gpu/docs/gpu_memory_formats.md
index 891814ad59f3d4..bab66dec02c276 100644
--- a/src/plugins/intel_gpu/docs/gpu_memory_formats.md
+++ b/src/plugins/intel_gpu/docs/gpu_memory_formats.md
@@ -1,4 +1,4 @@
-# GPU memory formats
+# GPU Memory Formats
 
 The memory format descriptor in GPU plugin usually uses the following letters:
  - `b` - batch
@@ -8,9 +8,9 @@ The memory format descriptor in GPU plugin usually uses the following letters:
  - `o` - output channels (for weights layout only)
  - `g` - groups (for weights layout only)
 
-The combination of the characters above defines tensor format, i.e. the actual layout of tensor values in memory buffer. For example:
+The combination of the characters above defines the tensor format, that is, the actual layout of tensor values in a memory buffer. For example:
 `bfyx` format means that the tensor has 4 dimensions in planar layout and `x` coordinate changes faster than `y`, `y` - faster than `f`, and so on.
-It means that for tensor with size `[b: 2; f: 2; y: 2; x: 2]` we have a linear memory buffer with `size=16` where:
+It means that for a tensor with size `[b: 2; f: 2; y: 2; x: 2]`, there is a linear memory buffer with `size=16`, where:
 ```
 i = 0  => [b=0; f=0; y=0; x=0];
 i = 1  => [b=0; f=0; y=0; x=1];
@@ -37,19 +37,19 @@ i = 14 => [b=1; f=1; y=1; x=0];
 i = 15 => [b=1; f=1; y=1; x=1];
 ```
 
-Usually, planar memory formats are not very efficient for DNN operations, so GPU plugin has plenty *blocked* format. Blocking means that we take some tensor dimension
-and put blocks of adjacent elements closer in memory (in the format with single blocking they are stored linearly in the memory). Consider the most widely used
-blocked format in GPU plugin: `b_fs_yx_fsv16`. First of all, let's understand what these additional letters mean. We have `b`, `f`, `y`, `x` dimensions here, so
-this is 4D tensor.
+Usually, planar memory formats are not very efficient for DNN operations, so GPU plugin has plenty of *blocked* formats. Blocking means that you take some tensor dimension
+and put blocks of adjacent elements closer in memory (in the format with a single blocking, they are stored linearly in the memory). Consider the most widely used
+blocked format in GPU plugin: `b_fs_yx_fsv16`. First of all, let's understand what these additional letters mean. There are `b`, `f`, `y`, `x` dimensions here, so
+this is a 4D tensor.
 `fs=CeilDiv(f, block_size)`; `fs` means `feature slice` - the blocked dimension.
-The block size is specified in the format name: `fsv16` - `block_size = 16`, blocked dimension is `f`; `fsv` means `feature slice vector`
+The block size is specified in the format name: `fsv16` - `block_size = 16`, a blocked dimension is `f`; `fsv` means `feature slice vector`
 Just like with any other layout, the coordinate of the rightmost dimension (`fsv`) is changed first, then coordinate to the left (`x`), and so on.
 
-Note: if the original `f` dimension is not divisible by block size (16 in this case), then it's aligned up to the first divisible value. These pad values
+> **Note**: If the original `f` dimension is not divisible by block size (`16` in this case), then it is aligned up to the first divisible value. These pad values
 are filled with zeroes.
 
-Let's look at the changes with the tensor above if we reorder it into `b_fs_yx_fsv16` format:
-1. Actual buffer size becomes `[b: 2; f: 16; y: 2; x: 2]`, and total size = 128
+When you reorder the tensor above into `b_fs_yx_fsv16` format, changes are as follows:
+1. Actual buffer size becomes `[b: 2; f: 16; y: 2; x: 2]`, and total size equals 128.
 2. The order of elements in memory changes:
 ```
 // first batch
@@ -106,6 +106,7 @@ i = 127 => [b=1; f=15; y=1; x=1] == [b=1; fs=0; y=1; x=1; fsv=15];
 All formats used by GPU plugin are specified in `src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp` file. Most of the formats there follow the notation above.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md b/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md
index 9f9485abbb8445..85e0cf033e550c 100644
--- a/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md
+++ b/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md
@@ -1,8 +1,8 @@
-# Driver issues troubleshooting
+# Driver Issues Troubleshooting
 
-If you see errors like "[CLDNN ERROR]. clGetPlatformIDs error -1001" when running OpenVINO samples / demos, then most likely you have some issues with OpenCL runtime on your machine. This document contains several hints on what to check and how to troubleshoot such kind of issues.
+If you see errors like `[CLDNN ERROR]. clGetPlatformIDs error -1001` when running OpenVINO samples / demos, then most likely you have some issues with OpenCL runtime on your machine. This document contains several hints on what to check and how to troubleshoot such issues.
 
-In order to make sure that OpenCL runtime is functional on your machine, you can use [clinfo](https://github.com/Oblomov/clinfo) tool. On many linux distributives it can be installed via package manager. If it's not available for your system, it can be easily built from sources.
+To make sure that OpenCL runtime is functional on your machine, you can use [clinfo](https://github.com/Oblomov/clinfo) tool. On many linux distributions it can be installed via package manager. If it is not available for your system, it can be easily built from sources.
 
 Example of clinfo output:
 ```
@@ -23,26 +23,30 @@ Number of devices                                 1
   Device Type                                     GPU
 ```
 ## 1. Make sure that you have GPU on your system
+
 Some Intel® CPUs might not have integrated GPU, so if you want to run OpenVINO on iGPU, go to [ark.intel website](https://ark.intel.com/) and make sure that your CPU has it.
 
 ## 2. Make sure that OpenCL® Runtime is installed
-On Windows OpenCL runtime is a part of the GPU driver, but on linux it should be installed separately. For the installation tips please refer to [OpenVINO docs](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux_header.html) and [OpenCL Compute Runtime docs](https://github.com/intel/compute-runtime/tree/master/opencl/doc).
-To get support of Intel® Iris® Xe MAX Graphics with Linux please follow [driver installation guide](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html)
 
+OpenCL runtime is a part of the GPU driver on Windows, but on Linux it should be installed separately. For the installation tips, refer to [OpenVINO docs](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux_header.html) and [OpenCL Compute Runtime docs](https://github.com/intel/compute-runtime/tree/master/opencl/doc).
+To get the support of Intel® Iris® Xe MAX Graphics with Linux, follow the [driver installation guide](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html)
 
 ## 3. Make sure that user has all required permissions to work with GPU device
+
 Add the current Linux user to the `video` group:
 ```
 sudo usermod -a -G video "$(whoami)"
 ```
 
 ## 4. Make sure that iGPU is enabled
+
 ```
 $ cat /sys/devices/pci0000\:00/0000\:00\:02.0/enable
 1
 ```
 
-## 5. Make sure that "/etc/OpenCL/vendors/intel.icd" contain proper paths to the OpenCL driver
+## 5. Make sure that "/etc/OpenCL/vendors/intel.icd" contains proper paths to the OpenCL driver
+
 ```
 $ cat /etc/OpenCL/vendors/intel.icd 
 /usr/lib/x86_64-linux-gnu/intel-opencl/libigdrcl.so
@@ -50,12 +54,15 @@ $ cat /etc/OpenCL/vendors/intel.icd
 Note: path to the runtime lib may vary in different driver versions
 
 ## 6. Use LD_DEBUG=libs to trace loaded libraries
+
 For more details, see the [OpenCL on Linux](https://github.com/bashbaug/OpenCLPapers/blob/markdown/OpenCLOnLinux.md)
 
 ## 7. If you are using dGPU with XMX, ensure that HW_MATMUL feature is recognized
-Openvino contains hello_query_device sample application: [link](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README.html)
+
+OpenVINO contains *hello_query_device* sample application: [link](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README.html)
 
 With this option, you can check whether Intel XMX(Xe Matrix Extension) feature is properly recognized or not. This is a hardware feature to accelerate matrix operations and available on some discrete GPUs.
+
 ```
 $ ./hello_query_device.py
 ...
@@ -68,9 +75,9 @@ install them from [OpenCL Git](https://github.com/KhronosGroup/OpenCL-Headers)
 
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
  * [OpenVINO GPU Plugin](../README.md)
  * [Developer documentation](../../../../docs/dev/index.md)
- 
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md b/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md
index ae67f22b4ced94..01c202cd7a57bd 100644
--- a/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md
+++ b/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md
@@ -1,29 +1,29 @@
-# GPU plugin operations enabling flow
+# GPU Plugin Operations Enabling Flow
 
 ## Terminology
+
 * **NGraph operation**: Building block of neural networks, such as convolution or pooling.
 * **(clDNN) Primitive**: Basic NN operation that was defined in clDNN. One primitive is usually mapped to one ngraph operation, but graph compilation may cause the mapping not to be 1-to-1.
-* **Kernel**: Actual body of execution in GPU. It also refers to specific implementations of **Primitive** for GPU, such as `convolution_gpu_winograd_2x3_s1.cl`. Usually, single kernel fulfills the operation of single primitive, but several kernels may be used to support one primitive.
-* **Unittest**: Single-layer test within cldnn.
+* **Kernel**: Actual body of execution in GPU. It also refers to specific implementations of **Primitive** for GPU, such as `convolution_gpu_winograd_2x3_s1.cl`. Usually, single kernel fulfills the operation of a single primitive, but several kernels may be used to support one primitive.
+* **Unittest**: Single-layer test within clDNN.
 * **Functional test**: Single-layer test in IE.
 
-<br>
-
 ## Adding new primitive
+
 1. Understand the new operation.
     * Review the [ngraph operation spec](https://github.com/openvinotoolkit/openvino/tree/master/docs/ops)
     * IE operations(a.k.a primitive or NN-layer) are defined by ngraph.
     * You can check ngraph reference implementation of the primitive as well
-        * e.g. [Scatter Elements Update in nGraph](https://github.com/openvinotoolkit/openvino/blob/master/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp)
+        * For example, [Scatter Elements Update in nGraph](https://github.com/openvinotoolkit/openvino/blob/master/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp)
 
 1. Try to find existing primitive that fully or partially covers this operation.
     * It is also possible to transform the network so that the missing primitive is covered from existing primitive.
-    * e.g. [Replace reduce with pooling](https://github.com/openvinotoolkit/openvino/blob/23808f46f7b5d464fd649ad278f253eec12721b3/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L205)
+    * For example, [replace reduce with pooling](https://github.com/openvinotoolkit/openvino/blob/23808f46f7b5d464fd649ad278f253eec12721b3/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L205).
+
+1. Add new / extend existing clDNN primitive according to the operation spec.
+    1. This phase is to enable primitive within clDNN library, without exposing it to IE.
+    1. Implement **reference parallel kernel** that supports all parameters of the operation and all input/output data types and layouts.
 
-1. Add new / extend existing cldnn primitive according to the operation spec.
-    1. This phase is to enable primitive within cldnn library, without exposing it to IE.
-    1. Implement **reference parallel kernel** that supports all parameters of the operation and all input/output data types and layouts
-        
         | File | Description |
         |------|-------------|
         | [scatter_elements_update_ref.cl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_elements_update_ref.cl) | OpenCL Kernel body. For more detail, please see [How to write OCL kernel](#writing-ocl-kernel) section |
@@ -31,18 +31,18 @@
         | [scatter_elements_update_kernel_selector.(cpp,h)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_elements_update_kernel_selector.cpp) | Kernel selector for a primitive |
         | [register_gpu.(cpp,hpp)](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp) | Primitive registration |
         | [scatter_elements_update_gpu.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp) | Primitive registration, input spec |
-        | [scatter_elements_update_inst.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h) | Node type declaration for cldnn program |
+        | [scatter_elements_update_inst.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h) | Node type declaration for clDNN program |
         | [clDNN/src/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp) | Code for scatter_elements_update_inst.h |
         | [clDNN/api/cldnn/primitives/scatter_elements_update.hpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/primitives/scatter_elements_update.hpp) | clDNN primitive definition |
         | [common_types.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/common_types.h) | Enum declaration for KernelType and arguments |
 
-    1. Add unit tests for the new operation
+    1. Add unit tests for the new operation.
 
         | File | Description |
         |------|-------------|
         | [scatter_elements_update_gpu_test.cpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp) | Unittest for layer |
 
-        * Need to add reference code or expected result for checking the result.
+        * You need to add reference code or expected result for checking the result.
 
         * You can also specify the kernel with `force_implementations` in case the primitive contains multiple kernels.
             ```
@@ -54,31 +54,31 @@
             ...
             ```
 
-        * This unit test is built into `clDNN_unit_tests`. It is a gtest application.
+        * This unit test is built into `clDNN_unit_tests`. It is a `gtest` application.
             ```
             # Show list of test cases
             openvino/bin/intel64/Debug$ ./clDNN_unit_tests64 --gtest_list_tests
             # Run test
             openvino/bin/intel64/Debug$ ./clDNN_unit_tests64 --gtest_filter=scatter_elements_update_gpu_fp16.*
             ```
-        
-        * Test scope needs to be comprehensive, but not wasteful. These tests run for every PRs in CI. Let's save the planet.
-    
+
+        * Test scope needs to be comprehensive, but not wasteful. These tests run for every PR in CI. Let's save the planet.
+
     1. Support layer fusion, if applicable
-        * It is usually easy to fuse some layers, such as scale, activation, quantize and eltwise, into previous layer. This fusing rule can be added to `prepare_primitive_fusing::fuse_simple_primitives`.
+        * It is usually easy to fuse some layers, such as *scale*, *activation*, *quantize*, and *eltwise*, into the previous layer. This fusing rule can be added to `prepare_primitive_fusing::fuse_simple_primitives`.
         * `fuse_simple_primitives` is called during [graph compilation phase](https://github.com/openvinotoolkit/openvino/blob/71c50c224964bf8c24378d16f015d74e2c1e1ce8/inference-engine/thirdparty/clDNN/src/program.cpp#L430)
-        * You can see general description of layer fusion [here](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_CL_DNN.html#optimizations)
+        * See general description of layer fusion [here](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_CL_DNN.html#optimizations)
         * Unit tests for layer fusion are placed in a single file: [fusings_gpu_test.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp). It is also compiled into `clDNN_unit_tests`.
         * Code for fused layers are generated with `jitter`. It is created as `FUSED_OPS..` macro in OCL code. This generation logic is in `KernelBase::MakeFusedOpsJitConstants`.
 
-1. Add / update factory for this operation in the GPU plugin to use new primitive in inference-engine
+1. Add / update factory for this operation in the GPU plugin to use new primitive in inference-engine.
 
     | File | Description |
     |------|-------------|
-    | [cldnn_engine/ops/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp) | Instantiation from cldnn plugin for IE |
+    | [cldnn_engine/ops/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp) | Instantiation from clDNN plugin for IE |
     | [cldnn_primitives_list.hpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp) | Registration for primitives |
 
-1. Add functional single layer tests for the operation and try to cover most of the difference use cases of this operation
+1. Add functional single-layer tests for the operation and try to cover most of the different use cases of this operation.
 
     | File | Description |
     |------|-------------|
@@ -86,32 +86,31 @@
 
     * It is possible to use ngraph reference code for result validation.
     * This is compiled into `gpuFuncTests`. It is also `gtest` application.
-    * Please also review the [general guideline of test infrastructure](https://github.com/openvinotoolkit/openvino/wiki/InferenceEngineTestsInfrastructure)
+    * Also, review the [general guideline of test infrastructure](https://github.com/openvinotoolkit/openvino/blob/master/docs/IE_PLUGIN_DG/PluginTesting.md).
 
-1. [Optional] If there are existing IRs with this operation, try to run the full model(s) to be sure that it's correctly processed within the context
+1. [Optional] If there are existing IRs with this operation, try to run the full model(s) to be sure that it is correctly processed within the context.
 
-1. [Optional] If there are existing IRs with this operation, try to run the full model(s) and estimate performance impact from this operation on total model execution time
+1. [Optional] If there are existing IRs with this operation, try to run the full model(s) and estimate performance impact from this operation on total model execution time.
 
-1. Create PR with your changes
+1. Create a PR with your changes.
     * If you are `OpenVINO` group member in github, CI will be triggered.
-    * Please review the [OpenVINO contribution guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md).
-
-<br>
+    * Review the [OpenVINO contribution guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md).
 
 ## Adding new kernel for an existing primitive
-* The process is quite similar to previous one. You can skip already existing steps.
-* Main work is adding new kernel and registering it from kernel selector.
-* You may need to add unit test for that new kernel. Specific kernel can be chosen with `build_option::force_implementations`.
-* It is not possible to specify kernel from functional test(IE).
 
-<br>
+* The process is quite similar to the previous one. You can skip already existing steps.
+* Main work is adding a new kernel and registering it from the kernel selector.
+* You may need to add a unit test for that new kernel. A specific kernel can be chosen with `build_option::force_implementations`.
+* It is not possible to specify a kernel from a functional test(IE).
 
 ## Writing OCL kernel
 
 ### Jitter
-In GPU OCL kernels, many conditional statements are processed with `#ifdef` so that it can be handled during compile-time. The definitions are created with `jitter.cpp`. It is set during graph compilation. You can see generated macros following the steps in [source dumps](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#sources-dumps).
+
+In GPU OCL kernels, many conditional statements are processed with `#ifdef` so that they can be handled during compile-time. The definitions are created with `jitter.cpp`. It is set during graph compilation. You can see generated macros, following the steps in [source dumps](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#sources-dumps).
+
 Jitter also contains run-time parameters such as input and output size.
-Additional macros can be defined from host-code of kernel itself. For example, see below code snippet. It passes `SUB_GROUP_SIZE` through macro definition through jitter.
+Additional macros can be defined from the host-code of a kernel itself. For example, see the code snippet below. It passes `SUB_GROUP_SIZE` through macro definition through jitter.
 ```
   // GetJitConstants method of the kernel
   const size_t sub_group_size = 16;
@@ -120,17 +119,22 @@ Additional macros can be defined from host-code of kernel itself. For example, s
 ```
 
 ### Accessing input and output tensor
-Jitter generates macros for index calculations. With these macros, you can program ocl kernel in a layout-agnostic way. If you use the macro `${TENSOR_NAME}_GET_INDEX`, you can get 1d-index from tensor coordinate whether the format is planar(such as `bfyx` or `byxf`) or blocked.(such as `b_fs_yx_fsv16`). You can check [source code for GET_INDEX macro](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp#L313).
+
+Jitter generates macros for index calculations. With these macros, you can program OCL kernel in a layout-agnostic way. If you use the macro `${TENSOR_NAME}_GET_INDEX`, you can get 1d-index from a tensor coordinate whether the format is planar (such as `bfyx` or `byxf`) or blocked (such as `b_fs_yx_fsv16`). You can check [source code for GET_INDEX macro](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp#L313).
 
 ### Layout support
+
 If a kernel is not performance-critical, you can support `bfyx`, `bfzyx` and `bfwzyx` only for layout. Those are default layouts. As an optimized format, `b_fs_yx_fsv16`, `b_fs_yx_fsv4` or `byxf` can be used as well.
-[General description of layout can be found here](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_memory_formats.md) and [header file is here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/api/tensor.hpp)
+
+[General description of layout can be found here](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_memory_formats.md) and [header file is here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/api/tensor.hpp).
 
 ### Layer fusion
+
 When layers are fused, `jitter` will create macros to generate code for fused layers. It is realized into `FUSED_OPS..` in OCL kernel. You can understand the usage from other kernels.
 There is a [comment that describes layer fusion](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h#L521).
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md b/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md
index 87632f28d8e868..134f96bc258391 100644
--- a/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md
+++ b/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md
@@ -1,14 +1,14 @@
-# GPU plugin unit test
+# GPU Plugin Unit Test
 
-GPU plugin has two type tests: first one is functional tests and second one is unit tests.
+GPU plugin has two types of tests: functional and unit tests. This article is about the latter.
 
-- The functional test is testing single layer, behavior, sub graph and low precision transformation on inference engine level for various layout and data types such as fp16 and fp32.
-- The unit test is testing cldnn primitive and core type modules on GPU plugin level. Unlike functional test, it is possible to test by explicitly specifying the format of the input such as `bfyx` or `b_fs_yx_fsv16`. This documentation is about this type of test.
+- The functional test is testing a single layer, behavior, subgraph and low-precision transformation on inference engine level for various layout and data types, such as FP16 and FP32.
+- The unit test is testing clDNN primitive and core-type modules on GPU plugin level. Unlike the functional test, it is possible to test by explicitly specifying the format of the input, such as `bfyx` or `b_fs_yx_fsv16`.
 
-# Structure of unit test
+# Structure of a unit test
 
-Intel GPU unit test (aka clDNN unit test) is a set of unit tests each of which is for testing all primitives, fusions and fundamental core types of GPU plugin. 
-There are 4 sub categories of unit tests as below.
+Intel GPU unit test (aka clDNN unit test) is a set of unit tests, each of which is for testing all primitives, fusions, and fundamental core types of GPU plugin.
+There are four subcategories of unit tests as below.
 
 ```bash
 openvino/src/plugins/intel_gpu/tests	- root of Intel GPU unit test
@@ -19,42 +19,45 @@ openvino/src/plugins/intel_gpu/tests	- root of Intel GPU unit test
 ```
 
 - ### fusions
-  - Fusion is an algorithm that fuse several operations into one optimized operation. For example, two nodes of `conv -> relu` may be fused into single node of `conv`.
+
+  - Fusion is an algorithm that fuses several operations into one optimized operation. For example, two nodes of `conv -> relu` may be fused into a single node of `conv`.
   - Fusion unit tests checks whether the fusion is done as expected.
   - fusion_test_common.cpp
-     - The base class for fusing test, i.e., [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19), is implemented here. It tests whether the fusing is successful or not by comparing the execution results of the two networks, one is the fused network, the other is non fused network for same topology.
-       - [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19) has an important method called *`compare()`*. 
-       - *`compare()`* method has the following three tasks
+     - The base class for a fusing test, that is, [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19), is implemented here. It tests whether the fusing is successful or not by comparing the execution results of the two networks, one is the fused network, the other is non-fused network for the same topology.
+       - [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19) has an important method called `compare()`.
+       - `compare()` method has the following three tasks:
             - Execute two networks (fused network and not fused network)
-            - Compare the actual  number of executed primitives with the expected number of executed primitives in test params
+            - Compare the actual number of executed primitives with the expected number of executed primitives in test params
             - Compare the results between fused network and non fused network
   - eltwise_fusing_test.cpp
-       - Check whether or not eltwise is fused to other primitives as expected
+       - Checks whether or not *eltwise* is fused to other primitives as expected
   - [primitive_name]_fusion_test.cpp
-       - Check that nodes such as eltwise or activation are fusing to the [primitive_name] as expected
+       - Checks that nodes such as *eltwise* or *activation* are fusing to the [primitive_name] as expected
   - The detail of how to add each instance is described [below](#fusions-1).
 
 - ### test_cases
-  - It is mainly checking that cldnn primitives and topology creation are working as designed
-  - It also checks configurations for OpenCL functionalities such as cl_cache, cl_mem allocation and cl_command_queue modes
 
-- ### module_tests 
-  - Unit tests for fundamental core modules such as ocl_user_events, format, layout, and usm memory
-    - Check ocl_user_event is working as expected
-    - Check all format is converted to the string and trait
-    - Check various layouts are created as expected
-    - Check usm_host and  usm device memory buffer creation and read/write functionality
+  - It is mainly checking whether clDNN primitives and topology creation are working as designed.
+  - It also checks configurations for OpenCL functionalities such as *cl_cache*, *cl_mem allocation* and *cl_command_queue* modes
+
+- ### module_tests
+
+  - Unit tests for fundamental core modules such as `ocl_user_events`, format, layout, and USM memory:
+    - check whether `ocl_user_event` is working as expected,
+    - check whether all format is converted to the string and trait,
+    - check whether various layouts are created as expected,
+    - check `usm_host` and USM device memory buffer creation and read/write functionality.
 
 - ### test_utils
-  - Defined base functions of unit test such as *`get_test_engine()`* which returns `cldnn::engine`
-  - Utility functions such as Float16, random_gen and uniform_quantized_real_distribution
 
+  - Define base functions of a unit test, such as `get_test_engine()`, which returns `cldnn::engine`
+  - Utility functions such as `Float16`, `random_gen` and `uniform_quantized_real_distribution`
 
 # How to run unit tests
 
 ## Build unit test
 
-1. Turn on `ENABLE_TESTS` and `ENABLE_CLDNN_TESTS` in cmake option
+1. Turn on `ENABLE_TESTS` and `ENABLE_CLDNN_TESTS` in cmake option:
 
    ```bash
    cmake -DCMAKE_BUILD_TYPE=Release \
@@ -69,21 +72,19 @@ openvino/src/plugins/intel_gpu/tests	- root of Intel GPU unit test
    make clDNN_unit_tests
    ```
 
-3. You can find _`clDNN_unit_tests64`_ in bin directory after build
-
-
+3. You can find `clDNN_unit_tests64` in *bin* directory after build
 
 ## Run unit test
 
-You can run _`clDNN_unit_tests64`_ in bin directory which is the output of openvino build
+You can run _`clDNN_unit_tests64`_ in *bin* directory which is the output of OpenVINO build
 
-If you want to run specific unit test, you can use gtest_filter option as follows:
+If you want to run a specific unit test, you can use `gtest_filter` option as follows:
 
 ```
 ./clDNN_unit_tests64 --gtest_filter='*filter_name*'
 ```
 
-Then, you can get the result like this
+Then, you can get the result similar to:
 
 ```bash
 openvino/bin/intel64/Release$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD
@@ -101,34 +102,33 @@ Note: Google Test filter = *fusings_gpu/conv_fp32_reorder_fsv16_to_bfyx.basic/0*
 [  PASSED  ] 1 test.
 ```
 
-
 # How to create new test case
 
 ## TEST and TEST_P (GoogleTest macros)
 
-GPU unit tests are using 2 types of test macros(**TEST** and **TEST_P**)  in  [GoogleTest (aka gtest)](https://google.github.io/googletest/)
+GPU unit tests are using two types of test macros (**TEST** and **TEST_P**)  in  [GoogleTest (aka gtest)](https://google.github.io/googletest/)
 
 - ### **TEST**
-  - **TEST** is the simple test case macro.
-  - To make test-case using **TEST**,  define an individual test named *`TestName`* in the test suite *`TestSuiteName`*
+  - **TEST** is a simple test case macro.
+  - To make a test-case using **TEST**, define an individual test named `TestName` in the test suite `TestSuiteName`
 
     ```
     TEST(TestSuiteName, TestName) {
       ... test body ...
     }
     ```
-  - The test body can be any code under test. To determine the outcomes within the test body, use assertion such as *`EXPECT_EQ`* and *`ASSERT_NE`*.
- 
+  - The test body can be any code under the test. To determine the outcome within the test body, use assertion types, such as `EXPECT_EQ` and `ASSERT_NE`.
+
 - ### **TEST_P**
-  - **TEST_P** is used to set test case using test parameter sets
-  - To make test-case using **TEST_P**, define an individual value-parameterized test named *`TestName`* that uses the test fixture class *`TestFixtureName`* which is the test suite name
+  - **TEST_P** is used to set a test case using test parameter sets
+  - To make a test case using **TEST_P**, define an individual value-parameterized test named `TestName` that uses the test fixture class `TestFixtureName`, which is the test suite name:
 
     ```
     TEST_P(TestFixtureName, TestName) {
       ... statements ...
     }
     ```
-  - Then, instantiates the value-parameterized test suite *`TestSuiteName`* which is defined defined with **TEST_P**
+  - Then, instantiates the value-parameterized test suite `TestSuiteName`, which is defined with **TEST_P**
     ```c++
     INSTANTIATE_TEST_SUITE_P(InstantiationName,TestSuiteName,param_generator)
     ```
@@ -136,29 +136,28 @@ GPU unit tests are using 2 types of test macros(**TEST** and **TEST_P**)  in  [G
 
 ## module_test and test_cases
 
-- module_test and test_cases are testing GPU plugin using both **TEST_P** and **TEST**.
-- Please refer to [the fusion test](#fusions-1) for the test case based on **TEST_P**
+- *module_test* and *test_cases* are testing GPU plugin using both **TEST_P** and **TEST**.
+- Refer to [the fusion test](#fusions-1) for the test case based on **TEST_P**
 - **TEST** checks the test result by comparing the execution results with expected values after running network created from the target topology to check.
   - It is important to generate test input and expected output result in **TEST**
-  - You can create input data and expected output data using the 3 following ways:
-    - Generate simple input data and calculate the expected output data from input data manually like [basic_deformable_convolution_def_group1_2](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp#L254)
-    - Generate random input and get the expected output using reference function which is made in the test codes like [mvn_test_across_channels_outside_sqrt_bfyx](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L108)
-    - Generate random input and get the expected output from another reference kernel which is existed in cldnn kernels like [mvn_random_test_bsv32](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L793)
+  - You can create input data and expected output data using these three approaches:
+    - Generate simple input data and calculate the expected output data from input data manually, like [basic_deformable_convolution_def_group1_2](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp#L254)
+    - Generate random input and get the expected output, using reference function, which is made in the test codes like [mvn_test_across_channels_outside_sqrt_bfyx](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L108)
+    - Generate random input and get the expected output from another reference kernel which exists in clDNN kernels like [mvn_random_test_bsv32](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L793)
 
-- When you allocate input data, please keep in mind that the layout order in *`engine.allocation_memory`* is not *`bfyx`* but *`bfxy`*. i.e., example, if input is {1,1,4,5}, the layout should be below
+- When you allocate input data, keep in mind that the layout order in `engine.allocation_memory` is not `bfyx` but `bfxy`. For example, if input is `{1,1,4,5}`, the layout should be as below:
 
   ```c++
   auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } });
   ```
 
-
 ## fusions
 
-- It is implemented based on **TEST_P** because there are many cases where multiple layouts are tested in the same topology
-- If the fusing test class is already existed, you can use it. otherwise, you should make new fusing test class which is inherited [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19)
-  - The new fusing test class should create `execute()` method which creates fused / non fused networks and calls *`compare`* method after setting input
-- Create test case using **TEST_P**
-  - You can make the desired networks using create_topologies. 
+- It is implemented based on **TEST_P** because there are many cases where multiple layouts are tested in the same topology.
+- If the fusing test class already exists, you can use it. Otherwise, you should make a new fusing test class, which is inherited [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19).
+  - The new fusing test class should create the `execute()` method, which creates fused / non-fused networks and calls `compare` method after setting input.
+- Create a test case, using **TEST_P**:
+  - You can make the desired networks using create_topologies.
 ```mermaid
 flowchart LR
     nodeA1(bias) --> nodeA2(conv_prim)
@@ -186,7 +185,7 @@ class nodeA3 moss1
 class nodeA8 steel1
 class nodeA4,nodeA1,nodeA6,nodeA9,nodeA11 carbon1
 ```
-  - For example, if you design the networks like the one above, you can make the test code as follow
+  - For example, if you design the networks like the one above, you can make the test code as follows:
 
     ```c++
     class conv_fp32_multi_eltwise_4_clamp : public ConvFusingTest {};
@@ -218,12 +217,12 @@ class nodeA4,nodeA1,nodeA6,nodeA9,nodeA11 carbon1
     
     ```
 
-  - If you want to change some node's layout format to specific format, you can change it using *`build_option::force_implementations`*.
-    - In the sample codes, *`conv_prim`* is set to *`format::b_fs_yx_fsv16`* by *`build_option::force_implementations`*
-- *`tolerance`* is used as to threshold to check whether or not output result are same between fused network and non fused network in *`compare`* function.
-- After the test case is implemented, use `INSTANTIATE_TEST_SUITE_P` to set the test suite for each parameter case as follows. 
-  - Check all variables in *`convolution_test_params`* to make `CASE_CONV_FP32_2`. 
-    - In *`convolution_test_params`*, all tensor, format, and data_types are used in common in all convolution fusing tests. So you can define `CASE_CONV_FP32_2` with all variables except *`expected_fused_primitives`* and *`expected_not_fused_primitives`*
+  - If you want to change some node's layout format to a specific format, you can change it using `build_option::force_implementations`.
+    - In the sample codes, `conv_prim` is set to `format::b_fs_yx_fsv16` by `build_option::force_implementations`.
+- `tolerance` is used as a threshold to check whether or not the output results are the same between a fused network and a non-fused network in the `compare` function.
+- After the test case is implemented, use `INSTANTIATE_TEST_SUITE_P` to set the test suite for each parameter case as follows.
+  - Check all variables in `convolution_test_params` to make `CASE_CONV_FP32_2`.
+    - In `convolution_test_params`, all tensor, format, and `data_types` are used in common in all convolution fusing tests. Therefore, you can define `CASE_CONV_FP32_2` with all variables except `expected_fused_primitives` and `expected_not_fused_primitives`.
 
 ```c++
 struct convolution_test_params {
@@ -256,6 +255,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_scale, ::testing::ValuesIn(std::
 ```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/graph_optimization_passes.md b/src/plugins/intel_gpu/docs/graph_optimization_passes.md
index 5a96e74a80c3c5..d30260ca7dc389 100644
--- a/src/plugins/intel_gpu/docs/graph_optimization_passes.md
+++ b/src/plugins/intel_gpu/docs/graph_optimization_passes.md
@@ -1,25 +1,26 @@
 # Graph Optimization Passes
 
-Graph optimization is a collection of optimization passes that happens to convert a general network description into a network-description-for-GPU-execution. It happens in the constructor of `cldnn::program`. In other words, the input of graph optimization is `topology`[(link)](./basic_data_structures.md#topology) and output is `program`[(link)](./basic_data_structures.md#program-impl--).
+Graph optimization is a collection of optimization passes that convert a general network description into a network-description-for-GPU-execution. It happens in the constructor of `cldnn::program`. In other words, the input of graph optimization is `topology` [(link)](./basic_data_structures.md#topology) and the output is `program` [(link)](./basic_data_structures.md#program-impl--).
 
-The transformation from original graph into the final graph is quite complicated. The steps are divided into smaller pieces(`pass`). The purpose of this documentation is not to explain every step in detail, but to explain key steps.
+The transformation from the original graph into the final graph is quite complicated. The steps are divided into smaller pieces (`pass`). The purpose of this documentation is not to explain every step in detail, but to explain key steps.
 
-For debugging purpose, you can dump the optimized graph after each step. Please see this [link](./gpu_debug_utils.md#graph-dumps) for detail.
+For debugging purposes, you can dump the optimized graph after each step. See this [article](./gpu_debug_utils.md#graph-dumps) for details.
 
-Note: The optimization passes runs in sequence and the prefixed number indicates the sequence. However, this sequence number might change in the future.
+> **Note**: The optimization passes run in sequence and the prefixed number indicates the sequence. However, the sequence number might change in the future.
 
-* **00_init**: First step of the optimization. If you want to see first cldnn graph, you can check this. It collects network output node information and set node processing order.
-* **08_prepare_primitive_fusing**: Fuse post-operations into other primitives. For example, relu is fused into convolution. Element-wise add operation can usually be fused into predecessor, too. The layout for the primitive is not chosen at this point yet, and we don't know which kernel will be chosen for the primitive. However, support for post-operation is dependent on the chosen kernel. That is why this pass contains some logic to guess the layout.
-* **09_reorder_inputs**: Select layout format for each primitives. This is done by calling `layout_optimizer::get_preferred_format` function which returns preferred format for a node(or “any” which means that format must be propagated from adjacent nodes if possible). Then it propagate formats for nodes with “any” preferred format to minimize local reorders. After propagating formats, it inserts actual reorders nodes into the graph. As a result of this pass, we get quite complicated graph with many _redundant_ reorders. It will be removed from `remove_redundant_reorders`.
-* **17_remove_redundant_reorders**: This pass is about removing reorder, but it has two conceptual purpose. First one is removing _redundant_ reorders. For example, when the network contains a pattern like `reorder - reorder - reorder`, it can be shrunk into single `reorder`. Second one is about supporting cross-layout operation of primitive. For example, when a `convolution` needs to receive `bfyx` input and to generate `b_fs_yx_fsv16` output, the initial graph from `reorder_inputs` looks like this: `data(bfyx) --> reorder(b_fs_yx_fsv16) --> convolution(b_fs_yx_fsv16)`. This pass looks for such pattern and removes the reorder to generate cross-layout graph for the target convolution: `data(bfyx) --> convolution(b_fs_yx_fsv16)`
-* **19_prepare_buffer_fusing**: This pass is for implicit concat or implicit crop. Implicit concat is about removing `concatenation` primitive when two predecessors can put result into the target buffer of concat directly. For example, if two convolution results are concatenated along f-axis and the layout is bfyx format and b=1, we can just remove concat primitive and manipulate the output address of the convolutions to point proper locations.
-* **20_add_required_reorders**: This pass tries to keep graph consistency and add reorder if current format is not supported by a node. It checks if current input format is present in `implementation_map<op_t>`  defined in `<op_type>_gpu.cpp` file. If it is not defined, this pass tries to change layout to one of the most common format [bfyx, yxfb, byxf] and picks the first supported format.
-* **21_add_onednn_optimization_attributes**: This pass generates onednn attributes for post operation[(link)](https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#post-ops-and-attributes). OpenVINO gpu plugin(a.k.a. cldnn) has a set of defined post operations and it requires some transformation to map those into onednn post-operations.
-* **22_compile_graph**: This pass creates `primitive_impl` through kernel selector. In this pass, the kernel for each node is chosen. For onednn primitives, OpenCL code is compiled in this stage. For cldnn primitives, OpenCL code will be compiled after all passes.
-* **26_propagate_constants**: This pass reorders weights for convolution, deconvolution and FC to a required format. As kernel is chosen in `compile_graph` stage, it is now known that some reordering is required for weights. It is because the weights are stored in a simple planar format in IR, but other format is usually required for optimized convolution(or deconv, FC). In order to reorder weights, this pass creates a simple graph that receives weights and generates reordered weights. We get the reordered weights by executing the network and the reordered weights are inserted back into the original graph.
-* **31_oooq_memory_dependencies**: In GPU, device memory is a limited resource and it is not necessary to keep all the intermediate results when inferencing a network. Therefore, the buffer is reused when the content is not needed anymore. However, it is necessary to take it into consideration that intel_gpu plugin is using out-of-order queue. As we are not sure the exact sequence of execution, there is additional limitation of reusing buffer. For example, in case of multi-branch structure like inception, there is no direct dependencies between the branches except for the common ancestor. However, in OOOQ execution mode, as we are not sure the sequence of execution in inception module, it is necessary not to reuse the buffer from one branch by another branch. Such _implicit dependency_ information is processed in this pass.
+* **00_init**: First step of the optimization. If you want to see the first clDNN graph, you can check this. It collects network output node information and sets node processing order.
+* **08_prepare_primitive_fusing**: Fuse post-operations into other primitives. For example, *ReLU* is fused into convolution. Element-wise *add* operation can usually be fused into predecessor, too. The layout for the primitive is not chosen at this point yet, and you do not know which kernel will be chosen for the primitive. However, support for post-operation is dependent on the chosen kernel. That is why this pass contains some logic to guess the layout.
+* **09_reorder_inputs**: Select the layout format for each primitives. This is done by calling `layout_optimizer::get_preferred_format` function, which returns preferred format for a node (or “any” which means that the format must be propagated from adjacent nodes if possible). Then it propagates formats for nodes with “any” preferred format to minimize local reorders. After propagating formats, it inserts actual reorder nodes into the graph. The result of this pass is a quite complicated graph with many _redundant_ reorders. It will be removed from `remove_redundant_reorders`.
+* **17_remove_redundant_reorders**: This pass is about removing reorder, but it has two conceptual purposes. First one is removing _redundant_ reorders. For example, when the network contains a pattern like `reorder - reorder - reorder`, it can be shrunk into a single `reorder`. Second one is about supporting cross-layout operation of a primitive. For example, when a `convolution` needs to receive `bfyx` input and to generate `b_fs_yx_fsv16` output, the initial graph from `reorder_inputs` looks as follows: `data(bfyx) --> reorder(b_fs_yx_fsv16) --> convolution(b_fs_yx_fsv16)`. This pass looks for such a pattern and removes the reorder to generate a cross-layout graph for the target convolution: `data(bfyx) --> convolution(b_fs_yx_fsv16)`
+* **19_prepare_buffer_fusing**: This pass is for implicit concat or implicit crop. Implicit concat is about removing `concatenation` primitive when two predecessors can put result into the target buffer of concat directly. For example, if two convolution results are concatenated along f-axis and the layout is `bfyx` format and `b=1`, you can just remove concat primitive and manipulate the output address of the convolutions to point to proper locations.
+* **20_add_required_reorders**: This pass tries to keep graph consistency and add reorder if current format is not supported by a node. It checks if the current input format is present in `implementation_map<op_t>`, defined in `<op_type>_gpu.cpp` file. If it is not defined, this pass tries to change layout to one of the most common format `[bfyx, yxfb, byxf]` and picks the first supported format.
+* **21_add_onednn_optimization_attributes**: This pass generates oneDNN attributes for post operation [(link)](https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#post-ops-and-attributes). OpenVINO GPLU plugin (clDNN) has a set of defined post operations and it requires some transformation to map those into oneDNN post-operations.
+* **22_compile_graph**: This pass creates `primitive_impl` through the kernel selector. In this pass, the kernel for each node is chosen. For oneDNN primitives, OpenCL code is compiled in this stage. For clDNN primitives, OpenCL code will be compiled after all passes.
+* **26_propagate_constants**: This pass reorders weights for convolution, deconvolution and FC to a required format. As the kernel is chosen in `compile_graph` stage, it is now known that some reordering is required for the weights. It is because the weights are stored in a simple planar format in IR, but other format is usually required for optimized convolution(or deconv, FC). To reorder weights, this pass creates a simple graph that receives weights and generates reordered weights. You get the reordered weights by executing the network and the reordered weights are inserted back into the original graph.
+* **31_oooq_memory_dependencies**: In GPU, device memory is a limited resource and it is not necessary to keep all the intermediate results when inferencing a network. Therefore, the buffer is reused when the content is not needed anymore. However, it is necessary to take it into consideration that `Intel_GPU` plugin is using out-of-order queue. As you are not sure about the exact sequence of execution, there is an additional limitation of reusing the buffer. For example, in case of a multi-branch structure like inception, there is no direct dependencies between the branches except for the common ancestor. However, in OOOQ execution mode, as you are not sure about the sequence of execution in inception module, it is necessary not to reuse the buffer from one branch by another branch. Such _implicit dependency_ information is processed in this pass.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md
index aa1d54d3733004..eb548eb386abe4 100644
--- a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md
+++ b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md
@@ -1,23 +1,26 @@
-# Memory allocation in GPU plugin
+# Memory Allocation in GPU Plugin
 
 ## Allocation types
-GPU plugin supports 4 types of memory allocation as below. Note that the prefix `usm_` indicates the allocation type using Intel Unified Shared Memory (USM) extension for OpenCL. For more detailed information about the USM extension, refer to [this](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_unified_shared_memory.html) page. 
-* `cl_mem` : Standard OpenCL cl_mem allocation
-* `usm_host` : Allocated in host memory and accessible by both of host and device. Not migratable.
+
+GPU plugin supports four types of memory allocation as below. Note that the prefix `usm_` indicates the allocation type using Intel Unified Shared Memory (USM) extension for OpenCL. For more detailed information about the USM extension, refer to [this](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_unified_shared_memory.html) page.
+* `cl_mem` : Standard OpenCL cl_mem allocation.
+* `usm_host` : Allocated in host memory and accessible by both of host and device. Non-migratable.
 * `usm_shared` : Allocated in host and devices and accessible by both host and device. The memories are automatically migrated on demand.
-* `usm_device` : Allocated in device memory and accessible only by the device which owns the memory. Not migratable.
+* `usm_device` : Allocated in device memory and accessible only by the device which owns the memory. Non-migratable.
 
 Note that there are a few restrictions on a memory allocation:
 
-* Allocation of single memory object should not exceed the available device memory size, i.e., the value obtained by `CL_DEVICE_GLOBAL_MEM_SIZE`.
-* The sum of all memory objects required to execute a kernel (i.e., the sum of inputs and outputs of a kernel) should not exceed the target available memory. For example, if you want to allocate a memory object to the device memory, the above restrictions should be satisfied against the device memory. Otherwise, the memory object should be allocated on the host memory. 
+* Allocation of a single memory object should not exceed the available device memory size, that is, the value obtained by `CL_DEVICE_GLOBAL_MEM_SIZE`.
+* The sum of all memory objects required to execute a kernel (that is, the sum of inputs and outputs of a kernel) should not exceed the target available memory. For example, if you want to allocate a memory object to the device memory, the above restrictions should be satisfied against the device memory. Otherwise, the memory object should be allocated on the host memory.
 
 ## Memory allocation API
+
 In GPU plugin, the allocation for each allocation type can be done with [engine::allocate_memory](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp#L51), which
-calls the corresponding memory object wrapper for each allocation type: [gpu_buffer](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L35), [gpu_usm](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L291). 
+calls the corresponding memory object wrapper for each allocation type: [gpu_buffer](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L35), [gpu_usm](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L291).
+
+## Dump memory allocation history
 
-## Dump memory allocation history 
-The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=1` if the OpenVino is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`. 
+The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=1` if OpenVino is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`.
 ```cpp
 ...
 GPU_Debug: Allocate 58982400 bytes of usm_host allocation type (current=117969612; max=117969612)
@@ -26,26 +29,28 @@ GPU_Debug: Allocate 44236800 bytes of usm_host allocation type (current=16220641
 GPU_Debug: Allocate 14873856 bytes of usm_device allocation type (current=59500236; max=59500236)
 ...
 ```
-Here, `current` denotes the total allocated memory amount at that moment while `max` denotes the peak record of the total memory allocation until that moment. 
+Here, `current` denotes the amount of total allocated memory at that moment, while `max` denotes the peak record of the total memory allocation until that moment.
 
 ## Allocated memory objects
-The typical memory allocation performed in the GPU plugin can be categorized as follows: 
-* `Constant memory allocation`: In GPU plugin, constant data are held by the `data` primitives and the required memory objects are [allocated](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/plugin/ops/constant.cpp#L181) and assigned at the creation of the data primitive. First, it is allocated on the host memory and the constant data are copied from the corresponding blob in ngraph. Once all the transformation and optimization processes in `cldnn::program` is finished and the user nodes of those data are known as the GPU operations using the device memory, then the memory is reallocated on the device memory and the constants data are copied to there (i.e., [transferred](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/program.cpp#L457)). Note that constant data are shared within batches and streams.
-* `Output memory allocation`: A memory object to store the output result of each primitive is created at the creation of each primitive_inst ([link](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L263)), except the cases when the output is reusing the input memory. Note that the creation of a primitive_inst is done in an descending order of the output memory size for achieving better memory reusing efficiency.
 
-* `Intermediate memory allocation`: Some primitives such as _detection_output_ and _non_max_suppression_ consisting of multiple kernels require intermediate memories to exchange data b/w those kernels. The allocation of such intermediate memories happens after all allocation for primitive_insts are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a processing order to use the predecessors' allocation information to decide whether to allocate it on device memory or not by checking the memory allocation restriction described above.
+The typical memory allocation performed in the GPU plugin can be categorized as follows:
+* `Constant memory allocation`: In GPU plugin, constant data are held by the `data` primitives and the required memory objects are [allocated](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/plugin/ops/constant.cpp#L181) and assigned at the creation of the data primitive. First, it is allocated on the host memory and the constant data are copied from the corresponding blob in ngraph. Once all the transformation and optimization processes in `cldnn::program` are finished and the user nodes of the data are known as the GPU operations using the device memory, then the memory is reallocated on the device memory and the constant data is copied to there (that is, [transferred](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/program.cpp#L457)). Note that constant data is shared within batches and streams.
+* `Output memory allocation`: A memory object to store the output result of each primitive is created at the creation of each `primitive_inst` ([link](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L263)), except when the output is reusing the input memory. Note that the creation of a `primitive_inst` is done in descending order of the output memory size for achieving better memory reusing efficiency.
+
+* `Intermediate memory allocation`: Some primitives such as _detection_output_ and _non_max_suppression_ consisting of multiple kernels require intermediate memories to exchange data b/w those kernels. The allocation of such intermediate memories happens after all allocation for `primitive_insts` is finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)). After all, it needs to be processed in a processing order to use the predecessors' allocation information to decide whether to allocate it on device memory or not by checking the memory allocation restriction described above.
 
 ## Memory dependency and memory pool
-In GPU plugin, multiple memory objects can be allocated at a same address, when there is no dependency between the users of them. For example, a memory region of a program_node _A_'s output memory can be allocated for another program_node _B_'s output, if the output of _A_ is no longer used by any other program_node, when the result of the _B_ is to be stored. This mechanism is realized by the following two parts;
-1. `Memory dependency` : memory_dependencies of a program_node is set by the memory dependency passes. There are two kinds of memory dependency passes as follows: 
-    * `basic_memory_dependencies` : Assuming an in-order-queue execution, this pass adds dependencies to a program_node, which are deduced by checking its direct input and output nodes only.
-    * `oooq_memory_dependencies` : Assuming an out-of-order-queue execution, this pass adds dependencies to all pair of program_nodes that can potentially be executed at the same time.
-2. `Memory pool` : The GPU plugin has a [memory pool](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp) which is responsible for the decision of allocation or reuse for an allocation request. This memory_pool utilizes the memory dependencies set by the above two passes in the decision of reuse of not. Note that each `cldnn::network` has its own `memory_pool`.
+
+In GPU plugin, multiple memory objects can be allocated at the same address, when there is no dependency between their users. For example, a memory region of a `program_node` _A_'s output memory can be allocated for another `program_node` _B_'s output, if the output of _A_ is no longer used by any other `program_node`, when the result of the _B_ is to be stored. This mechanism is realized by the following two parts;
+1. `Memory dependency` : memory_dependencies of a `program_node` is set by the memory dependency passes. There are two kinds of memory dependency passes:
+    * `basic_memory_dependencies` : Assuming an in-order-queue execution, this pass adds dependencies to a `program_node`, which are deduced by checking its direct input and output nodes only.
+    * `oooq_memory_dependencies` : Assuming an out-of-order-queue execution, this pass adds dependencies to all pairs of `program_nodes` that can potentially be executed at the same time.
+2. `Memory pool` : The GPU plugin has a [memory pool](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp), which is responsible for the decision of allocation or reuse for an allocation request. This `memory_pool` utilizes the memory dependencies set by the above two passes in the decision of reuse of not. Note that each `cldnn::network` has its own `memory_pool`.
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
  * [OpenVINO GPU Plugin](../README.md)
  * [Developer documentation](../../../../docs/dev/index.md)
- 
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/docs/simplified_workflow.md b/src/plugins/intel_gpu/docs/simplified_workflow.md
index 7d72cc3b9bb6f9..c00f829aadbce5 100644
--- a/src/plugins/intel_gpu/docs/simplified_workflow.md
+++ b/src/plugins/intel_gpu/docs/simplified_workflow.md
@@ -1,6 +1,6 @@
-# GPU plugin workflow
+# GPU Plugin Workflow
 
-The simplified workflow in the GPU plugin is shown on the picture below (click on image for higher resolution):
+The simplified workflow in the GPU plugin is shown in the diagram below (click it for higher resolution):
 
 ```mermaid
 classDiagram 
@@ -147,6 +147,7 @@ class `intel_gpu::device_query` {Detects available devices for given backend}
 ```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/plugins/intel_gpu/docs/source_code_structure.md b/src/plugins/intel_gpu/docs/source_code_structure.md
index 59becd7a212a15..0afc73d737f3bf 100644
--- a/src/plugins/intel_gpu/docs/source_code_structure.md
+++ b/src/plugins/intel_gpu/docs/source_code_structure.md
@@ -1,20 +1,20 @@
-# GPU plugin structure
+# GPU Plugin Structure
 
-Historically GPU plugin was built on top of standalone [clDNN library](https://github.com/intel/clDNN) for DNNs inference on Intel® GPUs,
+Historically, GPU plugin was built on top of standalone [clDNN library](https://github.com/intel/clDNN) for DNNs inference on Intel® GPUs,
 but at some point clDNN became a part of OpenVINO, so now it's a part of overall GPU plugin code. Intel® Arc™ Graphics Xe-HPG is supported
 via embedding of [oneDNN library](https://github.com/oneapi-src/oneDNN) 
 
 OpenVINO GPU plugin is responsible for:
  1. [IE Plugin API](https://docs.openvino.ai/latest/openvino_docs_ie_plugin_dg_overview.html) implementation.
- 2. Translation of model from common IE semantic (ov::Function) into plugin specific one (cldnn::topology) which is then compiled into
- gpu graph representation (cldnn::network).
+ 2. Translation of a model from common IE semantic (`ov::Function`) into plugin-specific one (`cldnn::topology`), which is then compiled into
+ GPU graph representation (`cldnn::network`).
  3. Implementation of OpenVINO operation set for Intel® GPU.
- 4. Device specific graph transformations.
+ 4. Device-specific graph transformations.
  5. Memory allocation and management logic.
- 6. Processing of incoming InferRequests using clDNN objects.
+ 6. Processing of incoming InferRequests, using clDNN objects.
  7. Actual execution on GPU device.
 
-As Intel GPU Plugin source code structure is shown below:
+Intel GPU Plugin source code structure is shown below:
 <pre>
 src/plugins/intel_gpu                  - root GPU plugin folder
              ├── include               
@@ -49,19 +49,20 @@ src/plugins/intel_gpu                  - root GPU plugin folder
                  └── rapidjson  - thirdparty <a href="https://github.com/Tencent/rapidjson">RapidJSON</a> lib for reading json files (cache.json)
 </pre>
 
-One last thing that is worth mentioning is functional tests which is located in the following location:
+It is worth it to mention the functional tests, which are located in:
 ```
 src/tests/functional/plugin/gpu
 ```
-Most of the tests are reused across plugins, and each plugin only need to add test instances with some specific parameters.
+Most of the tests are reused across plugins, and each plugin only needs to add the test instances with some specific parameters.
 
-Shared tests are located here:
+Shared tests are located in:
 ```
 src/tests/functional/plugin/shared                        <--- test definitions
 src/tests/functional/plugin/gpu/shared_tests_instances    <--- instances for GPU plugin
 ```
 
 ## See also
+
  * [OpenVINO™ README](../../../../README.md)
  * [OpenVINO Core Components](../../../README.md)
  * [OpenVINO Plugins](../../README.md)
diff --git a/src/tests/README.md b/src/tests/README.md
index f751809b8e3bec..3c83354ab9dace 100644
--- a/src/tests/README.md
+++ b/src/tests/README.md
@@ -1,7 +1,7 @@
 # Inference Engine Test Infrastructure
 
 This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine test system contains:
-* **Unit tests**  
+* **Unit tests**
   This test type is used for detailed testing of each software instance (including internal classes with their methods)
   within the tested modules (Inference Engine and Plugins). There are following rules which are **required** for Unit
   Tests development:
@@ -9,50 +9,51 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t
   * Unit test folder for a particular module should replicate `SRC` folder layout of the corresponding tested module to
     allow further developers get better understanding which part of software is already covered by unit tests and where
     to add new tests if needed.
-    > **Example**: We have `network_serializer.h` and `network_serializer.cpp` files within the `src` folder of the
-    tested Inference Engine module. Then, new `network_serializer_test.cpp` file should be created within the root of
+    > **Example**: There are `network_serializer.h` and `network_serializer.cpp` files within the `src` folder of the
+    tested Inference Engine module. Then, a new `network_serializer_test.cpp` file should be created within the root of
     the Unit Test folder for this module. This test file should cover all the classes and methods from the original
     files.
-    
-    > **Example**: We have `ie_reshaper.cpp` within the `src/shape_infer` subfolder of the tested module. In this case
-    new `shape_infer` subfolder should be created within the the root of the Unit Test folder for this module. And new
+
+    > **Example**: There is the `ie_reshaper.cpp` file within the `src/shape_infer` subfolder of the tested module. In this case,
+    a new `shape_infer` subfolder should be created within the root of the Unit Test folder for this module. And a new
     `ie_reshaper_test.cpp` file should be created within this newly created subfolder. This test file should cover all
     the classes and methods from the original file.
-  
-  * Each Unit Test should cover the only target classes and methods. If needed, all external interface components should
+
+  * Each Unit Test should cover only the target classes and methods. If needed, all external interface components should
     be mocked. There are common mock objects provided within the common Unit Test Utilities to stub the general
     Inference Engine API classes.
-    > **Example**: We have `cnn_network_impl.hpp` and `cnn_network_impl.cpp` files within the `src` folder of the tested
-    module. In this case, new `cnn_network_impl_test.cpp` file should be created and it should contain tests on
+    > **Example**: There are `cnn_network_impl.hpp` and `cnn_network_impl.cpp` files within the `src` folder of the tested
+    module. In this case, a new `cnn_network_impl_test.cpp` file should be created and it should contain tests on
     `CNNNetworkImpl` class only.
 
-  * It's not prohibited to have several test files for the same file from the tested module. 
-  * It's not prohibited to create a separate test file for a specific classes or functions (not for the whole file).
+  * It is not prohibited to have several test files for the same file from the tested module.
+  * It is not prohibited to create a separate test file for specific classes or functions (not for the whole file).
 
-* **Functional tests**  
+* **Functional tests**
   This test type is used to verify public Inference Engine API. There are following types of functional tests:
-  * `inference_engine_tests` are plugin-independent tests. Used to verify Inference Engine API methods which don't
-    involve any plugin runtime. E.g. `network_reader`, `network_serializer`, `precision` tests.
-  * `plugin_tests` are plugin-dependent tests. These tests require plugin runtime to be executed during testing. E.g.
-    any tests using `ExecutableNetwork`, `InferRequest` API can only be implemented within this test group. 
+  * `inference_engine_tests` are plugin-independent tests. They are used to verify Inference Engine API methods that do not
+    involve any plugin runtime. The examples are: `network_reader`, `network_serializer`, and `precision` tests.
+  * `plugin_tests` are plugin-dependent tests. These tests require plugin runtime to be executed during testing. For example,
+    any tests using `ExecutableNetwork`, `InferRequest` API can only be implemented within this test group.
 
-  > **Example**: Any new test on creating of a CNNNetwork object and checking of its output info should be included to
-  to the Inference Engine Functional tests suite. But any new test containing reading of a network and loading it to a
+  > **Example**: Any new test on creating a CNNNetwork object and checking its output info should be included to
+  the Inference Engine Functional tests suite. However, any new test containing reading of a network and loading it to a
   specified plugin is always the plugin test.
 
   There are following rules which are **required** for Functional Tests development:
   * All Functional tests are separated into different executables for the Inference Engine and each plugin.
   * Pre-converted IR files must not be used within the new Functional Tests. Tested models should be generated during
     the tests execution. The main method to generate a required model is building of the required NGraph function and
-    creating of a CNNNetwork using it. If a required layer is not covered by Ngraph it's allowed to build IR file using
-    `xml_net_builder` utility (please refer to the `ir_net.hpp` file). IR XML files hardcoded as strings within the test
+    creating a CNNNetwork using it. If a required layer is not covered by Ngraph, it is allowed to build IR file using
+    `xml_net_builder` utility (refer to the `ir_net.hpp` file). IR XML files hardcoded as strings within the test
     code should not be used.
   * All the plugin test cases are parameterized with (at least) the device name and included to the common
     `funcSharedTests` static library. This library is linked to the Plugin Test binaries. And all the plugin
     developers just add required test instantiations based on the linked test definitions to own test binary. It should
-    be done to make all the **shared** test cases always visible and available to instantiate by other plugins. 
+    be done to make all the **shared** test cases always visible and available to instantiate by other plugins.
+
     > **NOTE**: Any new plugin test case should be added to the common test definitions library
-    (`funcSharedTests`) within the OpenVINO repository first. And then this test case can be instantiated with the
+    (`funcSharedTests`) within the OpenVINO repository first. Then, this test case can be instantiated with the
     required parameters inside own plugin's test binary which links this shared tests library.
 
     > **NOTE**: `funcSharedTests` library is added to the developer package and available for closed source
@@ -60,15 +61,17 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t
   * All the inference engine functional test cases are defined and instantiated within the single test binary. These
     test cases are not implemented as a separate library and not available for instantiations outside this binary.
 
-* **Inference Engine tests utilities**  
+* **Inference Engine tests utilities**
   The set of utilities which are used by the Inference Engine Functional and Unit tests. Different helper functions,
-  blob comparators, OS specific constants, etc are implemented within the utilities.    
+  blob comparators, OS-specific constants, etc. are implemented within the utilities.
   Internal namespaces (for example, `CommonTestUtils::`, `FuncTestUtils::` or `UnitTestUtils::`) must be used to
   separate utilities by domains.
+
   > **NOTE**: All the utilities libraries are added to the developer package and available for closed source
   development.
 
-  ## See also
+## See also
+
  * [OpenVINO™ README](../../README.md)
  * [OpenVINO Core Components](../README.md)
  * [Developer documentation](../../docs/dev/index.md)
diff --git a/src/tests/functional/plugin/conformance/test_runner/README.md b/src/tests/functional/plugin/conformance/test_runner/README.md
index 4844f4e9b14d0e..4c67da79667285 100644
--- a/src/tests/functional/plugin/conformance/test_runner/README.md
+++ b/src/tests/functional/plugin/conformance/test_runner/README.md
@@ -1,9 +1,11 @@
-# Conformance test runner
+# Conformance Test Runner
 
 ## Description
+
 Conformance suites certify plugin functionality using a set of tests with plugin specificity independent parameters. There are two types of conformance validation.
 
 ### API Conformance
+
 The suite checks the following OpenVINO API entities in a plugin implementation:
 * plugin
 * compiled model (executable network)
@@ -11,24 +13,24 @@ The suite checks the following OpenVINO API entities in a plugin implementation:
 Also, there are test instantiations to validate hardware plugin functionality via software plugins (for example, MULTI, HETERO, etc.) for the entities.
 
 The other part of the API conformance suite is QueryModel validation:
-* `ReadIR_queryModel` tests validate the `query_model` API using a simple single operation graph (Conformance IR) based on model parameters.
+* `ReadIR_queryModel` tests validate the `query_model` API, using a simple single operation graph (Conformance IR) based on model parameters.
 * `OpImplCheck` tests are simple synthetic checks to `query_model` and set implementation status for each operation.
 
-A result of the `apiConformanceTests` run is two xml files: `report_api.xml` and `report_opset.xml`. The first one shows OpenVINO API entities' test statistics for each OpenVINO API entity, such as passed/failed/crashed/skipped/hanging, tests number, pass rates, and implementation status. The second one demonstrates the `query_model` results for each operation.
-
-
+A result of the `apiConformanceTests` run is two *xml* files: `report_api.xml` and `report_opset.xml`. The first one shows OpenVINO API entities' test statistics for each OpenVINO API entity, such as `passed/failed/crashed/skipped/hanging`, tests number, pass rates, and implementation status. The second one demonstrates the `query_model` results for each operation.
 
 ### Opset Conformance
+
 The suite validates an OpenVINO operation plugin implementation, using simple single operation graphs (Conformance IR) taken from models. The plugin inference output is compared with the reference.
 
- The suite contains:
+The suite contains:
 * `ReadIR_compareWithRefs` set allows reading IRs from folders recursively, inferring them, and comparing plugin results with the reference.
-* `OpImplCheckTest` set checks an operation plugin implementation status, using a simple synthetic single operation graph (`Implemented`/`Not implemented`). The suite checks only `compile_model` without  comparison with the reference.
+* `OpImplCheckTest` set checks an operation plugin implementation status, using a simple synthetic single operation graph (`Implemented`/`Not implemented`). The suite checks only `compile_model` without comparison with the reference.
 
 A result of the `conformanceTests` run is the `report_opset.xml` file. It shows tests statistic, like pass rate, passed, crashed, skipped, failed tests, and plugin implementation per operation for devices.
 
 ## How to build
-Run the following command in build directory:
+
+Run the following commands in the build directory:
 1. Generate CMake project:
    ```
    cmake -DENABLE_TESTS=ON -DENABLE_FUNCTIONAL_TESTS=ON ..
@@ -43,129 +45,128 @@ Run the following command in build directory:
    ```
    make --jobs=$(nproc --all) lib_plugin_name
    ```
-   
+
 ## How to run using [simple conformance runner](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py)
+
 There is a simple python runner to complete the whole conformance pipeline locally. Some steps could be excluded from the pipeline by command-line parameter configuration.
 
 ### The conformance pipeline steps:
-1. (Optional) Download models/conformance IR via URL / copy archieve to working directory / verify dirs / check list-files.
+
+1. (Optional) Download models/conformance IR via URL / copy archive to working directory / verify dirs / check list-files.
 2. (Optional) Run `SubgraphDumper` to generate a simple single op graph based on models or download the `conformance_ir` folder. (if `-s=1`)
 3. Run conformance test executable files.
 4. Generate conformance reports.
 
 ### Command-line arguments
+
 The script has the following arguments:
 * `-h, --help`          show this help message and exit
 * `-m MODELS_PATH, --models_path MODELS_PATH`
-                        Path to the directory/ies containing models to dump subgraph (the default way is to download conformance IR). It may be directory, archieve file, .lst file or http link to download something . If `--s=0`, specify the Conformance IRs directoryy
+                        Path to the directory/ies containing models to dump subgraph (the default method is to download conformance IR). It may be a directory, an archive file, an `.lst` file, or a URL to download some data. If `--s=0`, specify the Conformance IRs directory.
 * `-d DEVICE, --device DEVICE`
-                        Specify the target device. The default value is CPU
+                        Specify the target device. The default value is `CPU`.
 * `-ov OV_PATH, --ov_path OV_PATH`
-                        OV repo path. The default way is try to find the absolute path of OV repo (by using script path)
+                        OV repo path. The default method is to try to find the absolute path of OV repo (by using the script path).
 * `-w WORKING_DIR, --working_dir WORKING_DIR`
-                        Specify a working directory to save all artifacts, such as reports, models, conformance_irs, etc.
+                        Specify a working directory to save all artifacts, such as reports, models, `conformance_irs`, etc.
 * `-t TYPE, --type TYPE`
-                        Specify conformance type: `OP` or `API`. The default value is `OP`
+                        Specify conformance type: `OP` or `API`. The default value is `OP`.
 * `-s DUMP_CONFORMANCE, --dump_conformance DUMP_CONFORMANCE`
-                        Set '1' if you want to create Conformance IRs from custom/downloaded models. In other cases, set `0`. The default value is '1'
+                        Set `1` if you want to create Conformance IRs from custom/downloaded models. In other cases, set `0`. The default value is `1`.
 * `-j WORKERS, --workers WORKERS`
-                        Specify number of workers to run in parallel. The default value is CPU count - 1
+                        Specify number of workers to run in parallel. The default value is CPU count - `1`
 * `--gtest_filter GTEST_FILTER`
-                        Specify gtest filter to apply when running test. E.g. *Add*:*BinaryConv*. The default value is None
+                        Specify gtest filter to apply when running a test. For example, *Add*:*BinaryConv*. The default value is `None`.
 * `-c OV_CONFIG_PATH, --ov_config_path OV_CONFIG_PATH`
-                        Specify path to file contains plugin config 
+                        Specify path to a file, which contains plugin config.
 * `-sh SHAPE_MODE, --shape_mode SHAPE_MODE`
-                        Specify shape mode for conformance. Default value is ``. Possible values: `static`, `dynamic`, ``
+                        Specify shape mode for conformance. The default value is ``. Possible values: `static`, `dynamic`, ``
 
-> **NOTE**:
-> All arguments are optional and have default values to reproduce OMZ conformance results in a default way.
+> **NOTE**: All arguments are optional and have default values to reproduce OMZ conformance results in a default method.
 
-> **NOTE**:
-> The approach can be used as custom model scope validator!
+> **NOTE**: The approach can be used as custom model scope validator!
 
 ## Examples of usage:
-1. Use the default way to reproduce opset conformance results for OMZ on GPU:
+
+1. Use the default method to reproduce opset conformance results for OMZ on GPU:
 ```
 python3 run_conformance.py -d GPU
-``` 
+```
 2. Use the conformance pipeline to check new models support (as IRs) on the CPU plugin and save results to a custom directory:
 ```
 python3 run_conformance.py -m /path/to/new/model_irs -s=1 -w /path/to/working/dir -d CPU
-``` 
-3. Use custom OV build to check GNA conformance using pre-generated conformance_irs:
+```
+3. Use custom OV build to check GNA conformance, using pre-generated `conformance_irs`:
 ```
 python3 run_conformance.py -m /path/to/conformance_irs -s=0 -ov /path/to/ov_repo_on_custom_branch -d GNA
-``` 
-
-> **IMPORTANT NOTE:**
-> If you need to debug some conformance tests, use the binary run as the default method. If you want to get conformance results or reproduce CI behavior, use the simple python runner.
+```
 
+> **IMPORTANT NOTE:** If you need to debug some conformance tests, use the binary run as the default method. If you want to get conformance results or reproduce CI behavior, use the simple python runner.
 
 ## How to generate Conformance IRs set
+
 Run the following commands:
 1. Clone [`Open Model Zoo repo`](https://github.com/openvinotoolkit/open_model_zoo) or prepare custom model scope
 2. Download all models using [Downloader tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/downloader.py) from the repo.
-3. Convert downloaded models to IR files using [Converter tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/converter.py) from the repo.
+3. Convert downloaded models to IR files, using [Converter tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/converter.py) from the repo.
 4. Run [Subgraph dumper](./../subgraphs_dumper/README.md) to collect unique operation set from the models.
 
-
-
 ## How to run operation conformance suite
+
 The target is able to take the following command-line arguments:
 * `-h` prints target command-line options with description.
 * `--device` specifies target device.
-* `--input_folders` specifies the input folders with IRs or '.lst' file contains IRs path. Delimiter is `,` symbol.
-* `--plugin_lib_name` is name of plugin library. The example is `openvino_intel_cpu_plugin`. Use only with unregistered in IE Core devices.
-* `--disable_test_config` allows to ignore all skipped tests with the exception of `DISABLED_` prefix using.
-* `--skip_config_path` allows to specify paths to files contain regular expressions list to skip tests. [Examples](./op_conformance_runner/skip_configs)
-* `--config_path` allows to specify path to file contains plugin config. [Example](./op_conformance_runner/config/config_example.txt)
-* `--extend_report` allows not to re-write device results to the report (add results of this run to the existing). Mutually exclusive with --report_unique_name.
-* `--report_unique_name` allows to save report with unique name (report_pid_timestamp.xml). Mutually exclusive with --extend_report.
-* `--save_report_timeout` allows to try to save report in cycle using timeout (in seconds).
-* `--output_folder` Paths to the output folder to save report.
-* `--extract_body` allows to count extracted operation bodies to report.
-* `--shape_mode` Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both scenarios. Possible values 
+* `--input_folders` specifies the input folders with IRs or `.lst` file. It contains paths, separated by a comma `,`.
+* `--plugin_lib_name` is a name of a plugin library. The example is `openvino_intel_cpu_plugin`. Use only with unregistered in IE Core devices.
+* `--disable_test_config` allows ignoring all skipped tests with the exception of `DISABLED_` prefix using.
+* `--skip_config_path` allows specifying paths to files. It contains a list of regular expressions to skip tests. [Examples](./op_conformance_runner/skip_configs/skip_config_example.lst)
+* `--config_path` allows specifying the path to a file that contains plugin config. [Example](./op_conformance_runner/config/config_example.txt)
+* `--extend_report` allows you not to re-write device results to the report (add results of this run to the existing one). Mutually exclusive with `--report_unique_name`.
+* `--report_unique_name` allows you to save a report with a unique name (`report_pid_timestamp.xml`). Mutually exclusive with `--extend_report`.
+* `--save_report_timeout` allows saving a report in the cycle, using timeout (in seconds).
+* `--output_folder` specifies the path to the output folder to save a report.
+* `--extract_body` allows you to count extracted operation bodies to a report.
+* `--shape_mode` is optional. It allows you to run `static`, `dynamic` , or both scenarios. The default value is an empty string, which allows running both scenarios. Possible values
   are `static`, `dynamic`, ``
-* `--test_timeout` Setup timeout for each test in seconds, default timeout 900seconds (15 minutes).
+* `--test_timeout` specifies setup timeout for each test in seconds. The default timeout is 900 seconds (15 minutes).
 * All `gtest` command-line parameters
 
 > **NOTE**:
-> 
-> Using of [`parallel_runner`](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py) tool to run a conformance suite helps to report crashed tests and collect correct statistic after unexpected crashes. 
-> The tool is able to work in 2 modes: 
-> * one test is run in separate thread (first run, as the output the cache will be saved as a custom file)
-> * similar load time per one worker based on test execution time. May contain different test count per worker
-> 
+>
+> Using [`parallel_runner`](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py) tool to run a conformance suite helps to report crashed tests and collect correct statistics after unexpected crashes.
+> The tool is able to work in two modes:
+> * one test is run in a separate thread (first run, as the output the cache will be saved as a custom file).
+> * similar load time per one worker based on test execution time. May contain different test count per worker.
+>
 > The example of usage is:
 > ```
-> python3 run_parallel.py -e=/path/to/openvino/bin/intel64/Debug/conformanceTests -d . 
-> --gtest_filter=*Add*:*BinaryConv* -- --input_folders=/path/to/ir_1,/path/to/ir_2 --device=CPU 
+> python3 run_parallel.py -e=/path/to/openvino/bin/intel64/Debug/conformanceTests -d .
+> --gtest_filter=*Add*:*BinaryConv* -- --input_folders=/path/to/ir_1,/path/to/ir_2 --device=CPU
 > --report_unique_name --output_folder=/path/to/temp_output_report_folder
 > ```
 > All arguments after `--` symbol is forwarding to `conformanceTests` target.
-> 
+>
 >  If you use the `--report_unique_name` argument, run
-> [the merge xml script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py) 
-> to aggregate the results to one xml file. Check command-line arguments with `--help` before running the command.
+> [the merge xml script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py)
+> to aggregate the results to one *xml* file. Check command-line arguments with `--help` before running the command.
 > The example of usage is:
 > ```
 > python3 merge_xmls.py --input_folders=/path/to/temp_output_report_folder --output_folder=/path/to/output_report_folder --output_filename=report_aggregated
 > ```
 
 ## How to create operation conformance report
+
 Run [the summarize script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py) to generate `html` and `csv` report. Check command-line arguments with `--help` before running the command.
 The example of using the script is:
 ```
 python3 summarize.py --xml /opt/repo/infrastructure-master/thirdparty/gtest-parallel/report.xml --out /opt/repo/infrastructure-master/thirdparty/gtest-parallel/
 ```
-> **NOTE**:
->
-> Please, do not forget to copy [styles folder](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/template) to the output directory. It 
-> helps to provide report with the filters and other usable features.
+> **NOTE**: Remember to copy [styles folder](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/template) to the output directory. It helps to provide a report with filters and other useful features.
 
 The report contains statistics based on conformance results and filter fields at the top of the page.
 
-## See also
+## See Also
+
  * [OpenVINO™ README](../../../../../../README.md)
  * [OpenVINO Core Components](../../../../../README.md)
  * [Developer documentation](../../../../../../docs/dev/index.md)
\ No newline at end of file

From 232c802e074ab7c67ae36b02a878e3714d779042 Mon Sep 17 00:00:00 2001
From: River Li <river.li@intel.com>
Date: Wed, 22 Mar 2023 16:18:40 +0800
Subject: [PATCH 031/296] [CAPI] Add ov::hint::execution_mode property (#16466)

---
 .../c/include/openvino/c/ov_property.h        | 14 ++++++
 src/bindings/c/src/ov_property.cpp            |  1 +
 src/bindings/c/tests/ov_core_test.cpp         | 45 +++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index b00f72aaedafc2..54c887435c5cd6 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -171,3 +171,17 @@ ov_property_key_enable_profiling;
  */
 OPENVINO_C_VAR(const char*)
 ov_property_key_device_priorities;
+
+/**
+ * @brief Read-write property<string> for high-level OpenVINO Execution hint
+ * unlike low-level properties that are individual (per-device), the hints are something that every device accepts
+ * and turns into device-specific settings
+ * Execution mode hint controls preferred optimization targets (performance or accuracy) for given model
+ * It can be set to be below value:
+ *   "UNDEFINED"     //!<  Undefined value, settings may vary from device to device
+ *   "PERFORMANCE",  //!<  Optimize for max performance
+ *   "ACCURACY",     //!<  Optimize for max accuracy
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_hint_execution_mode;
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 613d52b376a228..2d6c470ae5df1d 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -29,3 +29,4 @@ const char* ov_property_key_hint_model_priority = "MODEL_PRIORITY";
 const char* ov_property_key_log_level = "LOG_LEVEL";
 const char* ov_property_key_enable_profiling = "PERF_COUNT";
 const char* ov_property_key_device_priorities = "MULTI_DEVICE_PRIORITIES";
+const char* ov_property_key_hint_execution_mode = "EXECUTION_MODE_HINT";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 6804504c94053f..0cb2f29f65e878 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -147,6 +147,27 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) {
     ov_core_free(core);
 }
 
+TEST_P(ov_core_test, ov_core_compile_model_with_excution_mode) {
+    std::string device_name = "AUTO";
+    ov_core_t* core = nullptr;
+    OV_EXPECT_OK(ov_core_create(&core));
+    EXPECT_NE(nullptr, core);
+
+    ov_model_t* model = nullptr;
+    OV_EXPECT_OK(ov_core_read_model(core, xml_file_name.c_str(), nullptr, &model));
+    EXPECT_NE(nullptr, model);
+
+    ov_compiled_model_t* compiled_model = nullptr;
+    const char* key = ov_property_key_hint_execution_mode;
+    const char* value = "PERFORMANCE";
+    OV_EXPECT_OK(ov_core_compile_model(core, model, device_name.c_str(), 2, &compiled_model, key, value));
+    EXPECT_NE(nullptr, compiled_model);
+
+    ov_compiled_model_free(compiled_model);
+    ov_model_free(model);
+    ov_core_free(core);
+}
+
 TEST_P(ov_core_test, ov_core_compile_model_with_property_invalid) {
     auto device_name = GetParam();
     ov_core_t* core = nullptr;
@@ -306,6 +327,30 @@ TEST_P(ov_core_test, ov_core_get_property) {
     ov_core_free(core);
 }
 
+TEST_P(ov_core_test, ov_core_set_and_get_property_execution_mode) {
+    std::string device_name = "AUTO";
+    ov_core_t* core = nullptr;
+    OV_EXPECT_OK(ov_core_create(&core));
+    EXPECT_NE(nullptr, core);
+
+    const char* key = ov_property_key_hint_execution_mode;
+    char* property_value = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value));
+    ov_free(property_value);
+
+    const char* value1 = "ACCURACY";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value1));
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value));
+    EXPECT_STREQ(value1, property_value);
+
+    const char* value2 = "PERFORMANCE";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value2));
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value));
+    EXPECT_STREQ(value2, property_value);
+
+    ov_core_free(core);
+}
+
 TEST_P(ov_core_test, ov_core_set_get_property_str) {
 #ifdef __aarch64__
     GTEST_SKIP() << "Skip this test for ARM CPU for now, cause no string property supported";

From 14e70e76fbe32311beca58a349c082d1d94fefb2 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 22 Mar 2023 09:39:32 +0100
Subject: [PATCH 032/296] DOCS shift to rst - Further Low-Level Implementation
 Details (#16444)

---
 docs/{img => _static/images}/batch_device.svg |   0
 .../cpu_execution_conventional_approach.svg   |   0
 .../images}/cpu_execution_streams.svg         |   0
 .../images}/cpu_execution_streams_2.svg       |   0
 .../images}/large_batch_approach.svg          |   0
 .../dldt_deployment_optimization_internals.md | 186 +++++-------------
 6 files changed, 44 insertions(+), 142 deletions(-)
 rename docs/{img => _static/images}/batch_device.svg (100%)
 rename docs/{img => _static/images}/cpu_execution_conventional_approach.svg (100%)
 rename docs/{img => _static/images}/cpu_execution_streams.svg (100%)
 rename docs/{img => _static/images}/cpu_execution_streams_2.svg (100%)
 rename docs/{img => _static/images}/large_batch_approach.svg (100%)

diff --git a/docs/img/batch_device.svg b/docs/_static/images/batch_device.svg
similarity index 100%
rename from docs/img/batch_device.svg
rename to docs/_static/images/batch_device.svg
diff --git a/docs/img/cpu_execution_conventional_approach.svg b/docs/_static/images/cpu_execution_conventional_approach.svg
similarity index 100%
rename from docs/img/cpu_execution_conventional_approach.svg
rename to docs/_static/images/cpu_execution_conventional_approach.svg
diff --git a/docs/img/cpu_execution_streams.svg b/docs/_static/images/cpu_execution_streams.svg
similarity index 100%
rename from docs/img/cpu_execution_streams.svg
rename to docs/_static/images/cpu_execution_streams.svg
diff --git a/docs/img/cpu_execution_streams_2.svg b/docs/_static/images/cpu_execution_streams_2.svg
similarity index 100%
rename from docs/img/cpu_execution_streams_2.svg
rename to docs/_static/images/cpu_execution_streams_2.svg
diff --git a/docs/img/large_batch_approach.svg b/docs/_static/images/large_batch_approach.svg
similarity index 100%
rename from docs/img/large_batch_approach.svg
rename to docs/_static/images/large_batch_approach.svg
diff --git a/docs/optimization_guide/dldt_deployment_optimization_internals.md b/docs/optimization_guide/dldt_deployment_optimization_internals.md
index b03742d351f180..ab596e49c98e0b 100644
--- a/docs/optimization_guide/dldt_deployment_optimization_internals.md
+++ b/docs/optimization_guide/dldt_deployment_optimization_internals.md
@@ -1,168 +1,70 @@
 # Further Low-Level Implementation Details {#openvino_docs_deployment_optimization_guide_internals}
-## Throughput on the CPU: Internals
-As explained in the [throughput-related section](./dldt_deployment_optimization_tput.md), the OpenVINO streams are means of running multiple requests in parallel.
-In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams.
-This provides much better performance for the networks than batching, especially for the multiple-core systems:
-
-@sphinxdirective
-
-.. container:: row-two-col-content
-
-   .. container:: column-two-col-content
-
-      **Conventional Approach**
-
-      | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops.
-      | A lot of synchronization between many threads results in overhead.
-      | An only option to improve efficiency is batching.
-
-   .. container:: column-two-col-content
-
-      **Streams**
-
-      | CPU cores are evenly distributed between execution streams (each 1-4 threads).
-      | Less threads per stream means less synchronization, better locality, and finer granularity.
-
-@endsphinxdirective
-
-@sphinxdirective
-
-.. raw:: html
-
-   <div class="row-two-col-content">
-   <div class="column-two-col-content">
-
-@endsphinxdirective
-
-
-![](../img/cpu_execution_conventional_approach.svg)
 
 @sphinxdirective
 
-.. raw:: html
+Throughput on the CPU: Internals
+################################
 
-   </div>
-   <div class="column-two-col-content">
-
-@endsphinxdirective
-
-
-![](../img/cpu_execution_streams.svg)
-
-@sphinxdirective
-
-.. raw:: html
-
-   </div>
-   </div>
-
-@endsphinxdirective
-
-
-@sphinxdirective
-
-.. container:: row-two-col-content
-
-   .. container:: column-two-col-content
-
-      .. raw:: html
+As explained in the :doc:`throughput-related section <openvino_docs_deployment_optimization_guide_tput>`, the OpenVINO streams are means of running multiple requests in parallel.
+In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams.
+This provides much better performance for the networks than batching, especially for the multiple-core systems:
 
-         <br />
+.. list-table::
+   :header-rows: 1
 
-   .. container:: column-two-col-content
+   * - Conventional Approach
+     - Streams
+   * - | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops.
+       | A lot of synchronization between many threads results in overhead.
+       | An only option to improve efficiency is batching.
+     - | CPU cores are evenly distributed between execution streams (each 1-4 threads).
+       | Less threads per stream means less synchronization, better locality, and finer granularity.
+   * - |conventional-approach|
+     - | |execution-streams|
+       | Requests are executed in parallel with a small number of threads.
+       | Layer-wise, the streams imply much less synchronization.
 
-      | Requests are executed in parallel with a small number of threads.
-      | **Layer-wise, the streams imply much less synchronization.**
-
-@endsphinxdirective
+.. |conventional-approach| image:: _static/images/cpu_execution_conventional_approach.svg
 
+.. |execution-streams| image:: _static/images/cpu_execution_streams.svg
 
 Compared to the batching, the parallelism is somewhat transposed (performed over inputs with much less synchronization within CNN ops):
 
-@sphinxdirective
-
-.. container:: row-two-col-content
-
-   .. container:: column-two-col-content
-
-      **Large Batch Approach**
-
-      | All threads process all inputs at once.
-      | Assumes all layers are parallelized well.
-      | "Fat" requests are executed one by one.
-
-   .. container:: column-two-col-content
-
-      **Streams**
-
-      | CPU cores are evenly distributed between execution streams.
-      | "Parallelize the outermost loop" rule of thumb.
-      | Individual requests are executed in parallel.
-
-@endsphinxdirective
-
-
-@sphinxdirective
-
-.. raw:: html
-
-   <div class="row-two-col-content">
-   <div class="column-two-col-content">
-
-@endsphinxdirective
-
-
-![](../img/large_batch_approach.svg)
-
-@sphinxdirective
-
-.. raw:: html
+.. list-table::
+   :header-rows: 1
 
-   </div>
-   <div class="column-two-col-content">
+   * - Large Batch Approach
+     - Streams
+   * - | All threads process all inputs at once.
+       | Assumes all layers are parallelized well.
+       | “Fat” requests are executed one by one.
+     - | CPU cores are evenly distributed between execution streams.
+       | “Parallelize the outermost loop” rule of thumb.
+       | Individual requests are executed in parallel.
+   * - |large-batch-approach|
+     - | |execution-streams-2|
+       | Inputs-wise the streams are the “transposed” batch.
 
-@endsphinxdirective
-
-
-![](../img/cpu_execution_streams_2.svg)
-
-@sphinxdirective
-
-.. raw:: html
+.. |large-batch-approach| image:: _static/images/large_batch_approach.svg
 
-   </div>
-   </div>
-
-@endsphinxdirective
-
-
-@sphinxdirective
+.. |execution-streams-2| image:: _static/images/cpu_execution_streams_2.svg
 
-.. container:: row-two-col-content
 
-   .. container:: column-two-col-content
+Keep in mind that :doc:`high-level performance hints <openvino_docs_OV_UG_Performance_Hints>` allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including :doc:`int8 inference <openvino_docs_model_optimization_guide>` hardware acceleration, number of cores, etc.
 
-      .. raw:: html
+Automatic Batching Internals
+############################
 
-         <br />
-
-   .. container:: column-two-col-content
-
-      **Inputs-wise the streams are the “transposed” batch.**
-
-@endsphinxdirective
-
-
-Keep in mind that [high-level performance hints](../OV_Runtime_UG/performance_hints.md) allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including [int8 inference](@ref openvino_docs_model_optimization_guide) hardware acceleration, number of cores, etc.
-
-## Automatic Batching Internals
-[Automatic batching](../OV_Runtime_UG/automatic_batching.md) performs on-the-fly grouping of inference requests to improve device utilization. 
+:doc:`Automatic batching <openvino_docs_OV_UG_Automatic_Batching>` performs on-the-fly grouping of inference requests to improve device utilization. 
 It relaxes the requirement for an application to saturate devices such as GPU by using a large batch "explicitly". It performs transparent input gathering from individual inference requests followed by the actual batched execution, with no programming effort from the user:
-![](../img/batch_device.svg)
+
+.. image:: _static/images/batch_device.svg
 
 Essentially, Automatic Batching shifts asynchronicity from individual requests to groups of requests that constitute the batches. Furthermore, for the execution to be efficient, it is very important that the requests arrive timely, without causing a batching timeout. 
 Normally, the timeout should never be hit. It is rather a graceful way to handle the application exit (when the inputs are not arriving anymore, so the full batch is not possible to collect).
 
 If a workload experiences timeouts, which lead to a drop in performance due to increased latency of every request, consider balancing its value against the batch size. For example, a smaller batch size and timeout value may yield better results than a large batch size coupled with a timeout value that cannot guarantee accommodating all the required requests.
 
-Finally, following the `get_tensor` idiom section from the [general optimizations](./dldt_deployment_optimization_common.md) helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. 
+Finally, following the ``get_tensor`` idiom section from the :doc:`general optimizations <openvino_docs_deployment_optimization_guide_common>` helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. 
+
+@endsphinxdirective

From 2f69305aa3736610c8cb0e7d7724a182b6403a57 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 22 Mar 2023 09:41:59 +0100
Subject: [PATCH 033/296] DOCS shift to rst (#16445)

---
 .../{img => _static/images}/nncf_workflow.svg |   0
 docs/optimization_guide/nncf/introduction.md  | 101 +++++++++++-------
 2 files changed, 65 insertions(+), 36 deletions(-)
 rename docs/{img => _static/images}/nncf_workflow.svg (100%)

diff --git a/docs/img/nncf_workflow.svg b/docs/_static/images/nncf_workflow.svg
similarity index 100%
rename from docs/img/nncf_workflow.svg
rename to docs/_static/images/nncf_workflow.svg
diff --git a/docs/optimization_guide/nncf/introduction.md b/docs/optimization_guide/nncf/introduction.md
index ba2a2662ba3a17..a4fcbbead198b4 100644
--- a/docs/optimization_guide/nncf/introduction.md
+++ b/docs/optimization_guide/nncf/introduction.md
@@ -9,10 +9,11 @@
    qat_introduction
    filter_pruning
 
-@endsphinxdirective
 
-## Introduction
-Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than [post-training optimization](@ref pot_introduction), but it can require more effort to set up.
+Introduction
+####################
+
+Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than :doc:`post-training optimization <pot_introduction>`, but it can require more effort to set up.
 
 OpenVINO provides the Neural Network Compression Framework (NNCF) tool for implementing compression algorithms on models to improve their performance. NNCF is a Python library that integrates into PyTorch and TensorFlow training pipelines to add training-time compression methods to the pipeline. To apply training-time compression methods with NNCF, you need:
 
@@ -22,65 +23,93 @@ OpenVINO provides the Neural Network Compression Framework (NNCF) tool for imple
 
 Adding compression to a training pipeline only requires a few lines of code. The compression techniques are defined through a single configuration file that specifies which algorithms to use during fine-tuning.
 
-### NNCF Quick Start Examples
+NNCF Quick Start Examples
++++++++++++++++++++++++++
+
 See the following Jupyter Notebooks for step-by-step examples showing how to add model compression to a PyTorch or Tensorflow training pipeline with NNCF:
 
-- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html).
-- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html).
+- `Quantization Aware Training with NNCF and PyTorch <https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html>`__.
+- `Quantization Aware Training with NNCF and TensorFlow <https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html>`__.
+
+Installation
+####################
+
+NNCF is open-sourced on `GitHub <https://github.com/openvinotoolkit/nncf>`__ and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed.
 
-## Installation
-NNCF is open-sourced on [GitHub](https://github.com/openvinotoolkit/nncf) and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed.
+Install from PyPI
+++++++++++++++++++++
 
-### Install from PyPI
 To install the latest released version via pip manager run the following command:
-```
-pip install nncf
-```
 
-> **NOTE**: To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`.
+.. code-block:: sh
+
+   pip install nncf
+
+
+.. note::
+
+   To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`.
+
 
-To install the latest NNCF version from source follow the instruction on [GitHub](https://github.com/openvinotoolkit/nncf#installation).
+To install the latest NNCF version from source follow the instruction on `GitHub <https://github.com/openvinotoolkit/nncf#installation>`__.
 
-> **NOTE**: NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately.
+.. note::
+
+   NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately.
+
+Working with NNCF
+####################
 
-## Working with NNCF
 The figure below shows a common workflow of applying training-time compressions with NNCF. The NNCF optimizations are added to the TensorFlow or PyTorch training script, and then the model undergoes fine-tuning. The optimized model can then be exported to OpenVINO IR format for accelerated performance with OpenVINO Runtime.
 
-![](../../img/nncf_workflow.svg)
+.. image:: _static/images/nncf_workflow.svg
+
 
+Training-Time Compression Methods
++++++++++++++++++++++++++++++++++
 
-### Training-Time Compression Methods
-NNCF provides several methods for improving model performance with training-time compression. 
+NNCF provides several methods for improving model performance with training-time compression.
 
-#### Quantization
-Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models. 
+Quantization
+--------------------
+Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models.
 
 Quantization-aware training inserts nodes into the neural network during training that simulate the effect of lower precision. This allows the training algorithm to consider quantization errors as part of the overall training loss that gets minimized during training. The network is then able to achieve enhanced accuracy when quantized.
 
-The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the [Quantization-ware Training guide](@ref qat_introduction) to learn more.
+The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the :doc:`Quantization-ware Training guide <qat_introduction>` to learn more.
+
+Filter pruning
+--------------------
+
+Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the :doc:`Filter Pruning <filter_pruning>` page.
 
-#### Filter pruning
-Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the [Filter Pruning](@ref filter_pruning) page.
+Experimental methods
+--------------------
 
-#### Experimental methods
 NNCF also provides state-of-the-art compression techniques that are still in experimental stages of development and are only recommended for expert developers. These include:
 
 - Mixed-precision quantization
 - Sparsity
 - Binarization
 
-To learn more about these methods, visit the [NNCF repository on GitHub](https://github.com/openvinotoolkit/nncf).
+To learn more about these methods, visit the `NNCF repository on GitHub <https://github.com/openvinotoolkit/nncf>`__.
+
+Recommended Workflow
+++++++++++++++++++++
 
-### Recommended Workflow
 Using compression-aware training requires a training pipeline, an annotated dataset, and compute resources (such as CPUs or GPUs). If you don't already have these set up and available, it can be easier to start post-training quantization to quickly see quantized results. Then you can use compression-aware training if the model isn't accurate enough. We recommend the following workflow for compressing models with NNCF:
 
-1. [Perform post-training quantization](@ref pot_introduction) on your model and then compare performance to the original model. 
-2. If the accuracy is too degraded, use [Quantization-aware Training](@ref qat_introduction) to increase accuracy while still achieving faster inference time.
-3. If the quantized model is still too slow, use [Filter Pruning](@ref filter_pruning) to further improve the model’s inference speed.
+1. :doc:`Perform post-training quantization <pot_introduction>` on your model and then compare performance to the original model.
+2. If the accuracy is too degraded, use :doc:`Quantization-aware Training <qat_introduction>` to increase accuracy while still achieving faster inference time.
+3. If the quantized model is still too slow, use :doc:`Filter Pruning <filter_pruning>` to further improve the model’s inference speed.
+
+Additional Resources
+####################
 
-## Additional Resources
-- [Quantizing Models Post-training](@ref pot_introduction)
-- [NNCF GitHub repository](https://github.com/openvinotoolkit/nncf)
-- [NNCF FAQ](https://github.com/openvinotoolkit/nncf/blob/develop/docs/FAQ.md)
-- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html)
-- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html)
\ No newline at end of file
+- :doc:`Quantizing Models Post-training <pot_introduction>`
+- `NNCF GitHub repository <https://github.com/openvinotoolkit/nncf>`__
+- `NNCF FAQ <https://github.com/openvinotoolkit/nncf/blob/develop/docs/FAQ.md>`__
+- `Quantization Aware Training with NNCF and PyTorch <https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html>`__
+- `Quantization Aware Training with NNCF and TensorFlow <https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html>`__
+
+@endsphinxdirective

From 066ef694f5ee5fc04c276f7f99e406ed33545ac6 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 22 Mar 2023 09:42:47 +0100
Subject: [PATCH 034/296] DOCS shift to rst - Deploying Your Application with
 Deployment Manager (#16453)

---
 .../deployment/deployment-manager-tool.md     | 224 ++++++++----------
 .../images}/configuration_dialog.png          |   0
 .../images}/selection_dialog.png              |   0
 3 files changed, 103 insertions(+), 121 deletions(-)
 rename docs/{OV_Runtime_UG/img => _static/images}/configuration_dialog.png (100%)
 rename docs/{OV_Runtime_UG/img => _static/images}/selection_dialog.png (100%)

diff --git a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md
index b48525217f267b..5fdd5a2112ac57 100644
--- a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md
+++ b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md
@@ -1,202 +1,184 @@
 # Deploying Your Application with Deployment Manager {#openvino_docs_install_guides_deployment_manager_tool}
 
-The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the `<INSTALL_DIR>/tools/deployment_manager` directory after installation.
+@sphinxdirective
+
+The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the ``<INSTALL_DIR>/tools/deployment_manager`` directory after installation.
 
 This article provides instructions on how to create a package with Deployment Manager and then deploy the package to your target systems.
 
-## Prerequisites
+Prerequisites
+####################
 
 To use the Deployment Manager tool, the following requirements need to be met:
-* Intel® Distribution of OpenVINO™ toolkit is installed. See the [Installation Guide](../../install_guides/installing-openvino-overview.md) for instructions on different operating systems.
+
+* Intel® Distribution of OpenVINO™ toolkit is installed. See the :doc:`Installation Guide <openvino_docs_install_guides_overview>` for instructions on different operating systems.
 * To run inference on a target device other than CPU, device drivers must be pre-installed:
-   * **For GPU**, see [Configurations for Intel® Processor Graphics (GPU)](../../install_guides/configurations-for-intel-gpu.md).
-   * **For GNA**, see [Intel® Gaussian & Neural Accelerator (GNA)](../../install_guides/configurations-for-intel-gna.md)
 
-> **IMPORTANT**: The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04.
+  * **For GPU**, see :doc:`Configurations for Intel® Processor Graphics (GPU) <openvino_docs_install_guides_configurations_for_intel_gpu>`.
+  * **For GNA**, see :doc:`Intel® Gaussian & Neural Accelerator (GNA) <openvino_docs_install_guides_configurations_for_intel_gna>`
+
+.. important::
+
+   The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04.
+
+.. tip::
 
-> **TIP**: If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the ['--user_data' option](https://docs.openvino.ai/latest/openvino_docs_install_guides_deployment_manager_tool.html#run-standard-cli-mode) to add them to the deployment archive. Install these dependencies on the target host before running inference.
+   If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the `'--user_data' option <#running-deployment-manager-in-standard-cli-mode>`__ to add them to the deployment archive. Install these dependencies on the target host before running inference.
 
-## Creating Deployment Package Using Deployment Manager
+Creating Deployment Package Using Deployment Manager
+####################################################
 
 To create a deployment package that includes inference-related components of OpenVINO™ toolkit, you can run the Deployment Manager tool in either interactive or standard CLI mode .
 
-### Running Deployment Manager in Interactive Mode
+Running Deployment Manager in Interactive Mode
+++++++++++++++++++++++++++++++++++++++++++++++
 
-@sphinxdirective
+.. dropdown:: Click to expand/collapse
 
-.. raw:: html
+   The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts.
 
-    <div class="collapsible-section" data-title="Click to expand/collapse">
+   To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters:
 
-@endsphinxdirective
+   .. tab:: Linux
 
-The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts.
+      .. code-block:: sh
 
-To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters:
-  
-@sphinxdirective
-   
-.. tab:: Linux  
-      
-   .. code-block:: sh
-      
-      cd <INSTALL_DIR>/tools/deployment_manager
-         
-      ./deployment_manager.py  
-         
-.. tab:: Windows  
-      
-   .. code-block:: bat  
-         
-      cd <INSTALL_DIR>\deployment_tools\tools\deployment_manager
-      .\deployment_manager.py  
-         
-.. tab:: macOS  
-      
-   .. code-block:: sh
-         
-      cd <INSTALL_DIR>/tools/deployment_manager
-      ./deployment_manager.py  
-      
-@endsphinxdirective
+         cd <INSTALL_DIR>/tools/deployment_manager
 
-The target device selection dialog is displayed:
-  
-![Deployment Manager selection dialog](../img/selection_dialog.png)
+         ./deployment_manager.py
 
-Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation process and exit the program, type **q** and press **Enter**.
+   .. tab:: Windows
 
-Once the selection is accepted, the package generation dialog will appear:
-  
-![Deployment Manager configuration dialog](../img/configuration_dialog.png)
+      .. code-block:: bat
 
-The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the following options to configure the generation process:
-   
-* `o. Change output directory` (optional): the path to the output directory. By default, it is set to your home directory.
+         cd <INSTALL_DIR>\deployment_tools\tools\deployment_manager
+         .\deployment_manager.py
 
-* `u. Provide (or change) path to folder with user data` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means that copying the user data to the target system need to be done separately.
+   .. tab:: macOS
 
-* `t. Change archive name` (optional): the deployment archive name without extension. By default, it is set to `openvino_deployment_package`.
- 
-After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program, type **q** and press **Enter**.
+      .. code-block:: sh
 
-Once the script has successfully completed, the deployment package is generated in the specified output directory. 
+         cd <INSTALL_DIR>/tools/deployment_manager
+         ./deployment_manager.py
 
-@sphinxdirective
 
-.. raw:: html
+   The target device selection dialog is displayed:
 
-    </div>
+   .. image:: _static/images/selection_dialog.png
+      :alt: Deployment Manager selection dialog
 
-@endsphinxdirective
+   Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation    process and exit the program, type **q** and press **Enter**.
 
-### Running Deployment Manager in Standard CLI Mode
+   Once the selection is accepted, the package generation dialog will appear:
 
-@sphinxdirective
+   .. image:: _static/images/configuration_dialog.png
+      :alt: Deployment Manager configuration dialog
 
-.. raw:: html
+   The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the    following options to configure the generation process:
 
-    <div class="collapsible-section" data-title="Click to expand/collapse">
+   * ``o. Change output directory`` (optional): the path to the output directory. By default, it is set to your home directory.
 
-@endsphinxdirective
+   * ``u. Provide (or change) path to folder with user data`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required    for inference, which will be added to the deployment archive. By default, it is set to ``None``, which means that copying the user data to the target system need to be done    separately.
 
-You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline.
+   * ``t. Change archive name`` (optional): the deployment archive name without extension. By default, it is set to ``openvino_deployment_package``.
 
-To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following syntax:
+   After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program,    type **q** and press **Enter**.
 
-@sphinxdirective
+   Once the script has successfully completed, the deployment package is generated in the specified output directory.
 
-.. tab:: Linux
 
-   .. code-block:: sh
+Running Deployment Manager in Standard CLI Mode
++++++++++++++++++++++++++++++++++++++++++++++++
 
-      cd <INSTALL_DIR>/tools/deployment_manager
-      ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
 
-.. tab:: Windows
+.. dropdown:: Click to expand/collapse
 
-   .. code-block:: bat
+   You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline.
 
-      cd <INSTALL_DIR>\tools\deployment_manager
-      .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
+   To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following    syntax:
 
-.. tab:: macOS
+   .. tab:: Linux
 
-   .. code-block:: sh
+      .. code-block:: sh
 
-      cd <INSTALL_DIR>/tools/deployment_manager
-      ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
+         cd <INSTALL_DIR>/tools/deployment_manager
+         ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
 
-@endsphinxdirective
+   .. tab:: Windows
 
-The following options are available:
+      .. code-block:: bat
 
-* `<--targets>` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, `--targets cpu gpu`. 
-To get a list of currently available targets, run the program with the `-h` option.
+         cd <INSTALL_DIR>\tools\deployment_manager
+         .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
 
-* `[--output_dir]` (optional): the path to the output directory. By default, it is set to your home directory.
+   .. tab:: macOS
 
-* `[--archive_name]` (optional): a deployment archive name without extension. By default, it is set to `openvino_deployment_package`.
+      .. code-block:: sh
 
-* `[--user_data]` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means copying the user data to the target system need to be performed separately.
+         cd <INSTALL_DIR>/tools/deployment_manager
+         ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data]
 
-Once the script has successfully completed, the deployment package is generated in the output directory specified.
 
-@sphinxdirective
+   The following options are available:
 
-.. raw:: html
+   * ``<--targets>`` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, ``--targets cpu gpu``.
+   To get a list of currently available targets, run the program with the ``-h`` option.
 
-    </div>
+   * ``[--output_dir]`` (optional): the path to the output directory. By default, it is set to your home directory.
 
-@endsphinxdirective
+   * ``[--archive_name]`` (optional): a deployment archive name without extension. By default, it is set to ``openvino_deployment_package``.
+
+   * ``[--user_data]`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the    deployment archive. By default, it is set to ``None``, which means copying the user data to the target system need to be performed separately.
 
-## Deploying Package on Target Systems
+   Once the script has successfully completed, the deployment package is generated in the output directory specified.
 
-Once the Deployment Manager has successfully completed, the `.tar.gz` (on Linux or macOS) or `.zip` (on Windows) package is generated in the specified output directory.
+
+Deploying Package on Target Systems
+###################################
+
+Once the Deployment Manager has successfully completed, the ``.tar.gz`` (on Linux or macOS) or ``.zip`` (on Windows) package is generated in the specified output directory.
 
 To deploy the OpenVINO Runtime components from the development machine to the target system, perform the following steps:
 
 1. Copy the generated archive to the target system by using your preferred method.
 
-2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace `openvino_deployment_package` with your specified name.
-@sphinxdirective
-
-.. tab:: Linux
+2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace ``openvino_deployment_package`` with your specified name.
 
-   .. code-block:: sh
+   .. tab:: Linux
 
-      tar xf openvino_deployment_package.tar.gz -C <destination_dir>
+      .. code-block:: sh
 
-.. tab:: Windows
+         tar xf openvino_deployment_package.tar.gz -C <destination_dir>
 
-   .. code-block:: bat
+   .. tab:: Windows
 
-      Use the archiver of your choice to unzip the file.
+      .. code-block:: bat
 
-.. tab:: macOS
+         Use the archiver of your choice to unzip the file.
 
-   .. code-block:: sh
+   .. tab:: macOS
 
-      tar xf openvino_deployment_package.tar.gz -C <destination_dir>
+      .. code-block:: sh
 
-@endsphinxdirective
+         tar xf openvino_deployment_package.tar.gz -C <destination_dir>
 
 
    Now, the package is extracted to the destination directory. The following files and subdirectories are created:
-   
-      * `setupvars.sh` — a copy of `setupvars.sh`.
-      * `runtime` — contains the OpenVINO runtime binary files.
-      * `install_dependencies` — a snapshot of the `install_dependencies` directory from the OpenVINO installation directory.
-      * `<user_data>` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package.
-
-3. On a target Linux system, to run inference install additional dependencies by running the `install_openvino_dependencies.sh` script:
-   ```sh
-   cd <destination_dir>/openvino/install_dependencies
-   sudo -E ./install_openvino_dependencies.sh
-   ```
+
+   * ``setupvars.sh`` — a copy of ``setupvars.sh``.
+   * ``runtime`` — contains the OpenVINO runtime binary files.
+   * ``install_dependencies`` — a snapshot of the ``install_dependencies`` directory from the OpenVINO installation directory.
+   * ``<user_data>`` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package.
+
+3. On a target Linux system, to run inference install additional dependencies by running the ``install_openvino_dependencies.sh`` script:
+
+   .. code-block: sh
+
+      cd <destination_dir>/openvino/install_dependencies
+      sudo -E ./install_openvino_dependencies.sh
+
 
 4. Set up the environment variables:
-@sphinxdirective
 
 .. tab:: Linux
 
@@ -219,7 +201,7 @@ To deploy the OpenVINO Runtime components from the development machine to the ta
       cd <destination_dir>/openvino/
       source ./setupvars.sh
 
-@endsphinxdirective
-
 
 Now, you have finished the deployment of the OpenVINO Runtime components to the target system.
+
+@endsphinxdirective
diff --git a/docs/OV_Runtime_UG/img/configuration_dialog.png b/docs/_static/images/configuration_dialog.png
similarity index 100%
rename from docs/OV_Runtime_UG/img/configuration_dialog.png
rename to docs/_static/images/configuration_dialog.png
diff --git a/docs/OV_Runtime_UG/img/selection_dialog.png b/docs/_static/images/selection_dialog.png
similarity index 100%
rename from docs/OV_Runtime_UG/img/selection_dialog.png
rename to docs/_static/images/selection_dialog.png

From 90100451a31a2a81b2cbb1c2a45dc33ea8c3b57a Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 22 Mar 2023 09:43:44 +0100
Subject: [PATCH 035/296] DOCS shift to rst - Libraries for Local Distribution
 (#16469)

---
 .../deployment/local-distribution.md          | 196 +++++++++---------
 .../images}/deployment_full.svg               |   0
 2 files changed, 102 insertions(+), 94 deletions(-)
 rename docs/{img => _static/images}/deployment_full.svg (100%)

diff --git a/docs/OV_Runtime_UG/deployment/local-distribution.md b/docs/OV_Runtime_UG/deployment/local-distribution.md
index 1b4e02143ad1dc..cd68ac4bdbf085 100644
--- a/docs/OV_Runtime_UG/deployment/local-distribution.md
+++ b/docs/OV_Runtime_UG/deployment/local-distribution.md
@@ -1,155 +1,163 @@
 # Libraries for Local Distribution {#openvino_docs_deploy_local_distribution}
 
+@sphinxdirective
+
 With a local distribution, each C or C++ application/installer will have its own copies of OpenVINO Runtime binaries. However, OpenVINO has a scalable plugin-based architecture, which means that some components can be loaded in runtime only when they are really needed. Therefore, it is important to understand which minimal set of libraries is really needed to deploy the application. This guide helps you to achieve that goal.
 
+Local distribution is also appropriate for OpenVINO binaries built from sources using `Build instructions <https://github.com/openvinotoolkit/openvino/wiki#how-to-build>`__, 
+but the guide below supposes OpenVINO Runtime is built dynamically. For case of `Static OpenVINO Runtime <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md>`__ select the required OpenVINO capabilities on CMake configuration stage using `CMake Options for Custom Compilation <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/cmake_options_for_custom_comiplation.md>`__, the build and link the OpenVINO components into the final application.
 
-Local dsitribution is also appropriate for OpenVINO binaries built from sources using [Build instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build), but the guide below supposes OpenVINO Runtime is built dynamically. For case of [Static OpenVINO Runtime](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) select the required OpenVINO capabilities on CMake configuration stage using [CMake Options for Custom Compilation](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/cmake_options_for_custom_comiplation.md), the build and link the OpenVINO components into the final application.
+.. note::
 
-> **NOTE**: The steps below are operating system independent and refer to a library file name without any prefixes (like `lib` on Unix systems) or suffixes (like `.dll` on Windows OS). Do not put `.lib` files on Windows OS to the distribution, because such files are needed only on a linker stage.
+   The steps below are operating system independent and refer to a library file name without any prefixes (like ``lib`` on Unix systems) or suffixes (like ``.dll`` on Windows OS). Do not put ``.lib`` files on Windows OS to the distribution, because such files are needed only on a linker stage.
 
-## Library Requirements for C++ and C Languages
 
-Independent on the language used to write the application, the `openvino` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, `openvino` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package.
+Library Requirements for C++ and C Languages
+############################################
 
-If your application is written with C language, you need to put the `openvino_c` library additionally.
+Independent on the language used to write the application, the ``openvino`` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, ``openvino`` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package.
 
-The `plugins.xml` file with information about inference devices must also be taken as a support file for `openvino`.
+If your application is written with C language, you need to put the ``openvino_c`` library additionally.
 
+The ``plugins.xml`` file with information about inference devices must also be taken as a support file for ``openvino``.
 
-## Libraries for Pluggable Components
+
+Libraries for Pluggable Components
+##################################
 
 The picture below presents dependencies between the OpenVINO Runtime core and pluggable libraries:
 
-![](../../img/deployment_full.svg)
+.. image:: _static/images/deployment_full.svg
 
-### Libraries for Compute Devices
+Libraries for Compute Devices
++++++++++++++++++++++++++++++
 
 For each inference device, OpenVINO Runtime has its own plugin library:
-- `openvino_intel_cpu_plugin` for [Intel® CPU devices](../supported_plugins/CPU.md).
-- `openvino_intel_gpu_plugin` for [Intel® GPU devices](../supported_plugins/GPU.md).
-- `openvino_intel_gna_plugin` for [Intel® GNA devices](../supported_plugins/GNA.md).
-- `openvino_arm_cpu_plugin` for [ARM CPU devices](../supported_plugins/ARM_CPU.md).
+
+- ``openvino_intel_cpu_plugin`` for :doc:`Intel® CPU devices <openvino_docs_OV_UG_supported_plugins_CPU>`.
+- ``openvino_intel_gpu_plugin`` for :doc:`Intel® GPU devices <openvino_docs_OV_UG_supported_plugins_GPU>`.
+- ``openvino_intel_gna_plugin`` for :doc:`Intel® GNA devices <openvino_docs_OV_UG_supported_plugins_GNA>`.
+- ``openvino_arm_cpu_plugin`` for :doc:`ARM CPU devices <openvino_docs_OV_UG_supported_plugins_ARM_CPU>`.
 
 Depending on what devices are used in the app, the appropriate libraries need to be put to the distribution package.
 
 As it is shown on the picture above, some plugin libraries may have OS-specific dependencies which are either backend libraries or additional supports files with firmware, etc. Refer to the table below for details:
 
-@sphinxdirective
+.. dropdown:: Windows OS:
 
-.. raw:: html
+   .. list-table::
+      :header-rows: 1
 
-    <div class="collapsible-section" data-title="Windows OS: Click to expand/collapse">
+      * - Device
+        - Dependency
+      * - CPU
+        - ``-``
+      * - GPU
+        - ``OpenCL.dll``, ``cache.json``
+      * - GNA
+        - ``gna.dll``
+      * - Arm® CPU
+        - ``-``
 
-@endsphinxdirective
 
-| Device      | Dependency |
-|-------------|------------|
-| CPU         |  `-`       |
-| GPU         | `OpenCL.dll`, `cache.json` |
-| GNA         | `gna.dll` |
-| Arm® CPU    |  `-`      |
+.. dropdown:: Linux OS:
 
-@sphinxdirective
+   .. list-table::
+      :header-rows: 1
 
-.. raw:: html
+      * - Device
+        - Dependency
+      * - CPU
+        - ``-``
+      * - GPU
+        - ``libOpenCL.so``, ``cache.json``
+      * - GNA
+        - ``gna.dll``
+      * - Arm® CPU
+        - ``-``
 
-    </div>
 
-@endsphinxdirective
-@sphinxdirective
+.. dropdown:: MacOS:
 
-.. raw:: html
+   .. list-table::
+      :header-rows: 1
 
-    <div class="collapsible-section" data-title="Linux OS: Click to expand/collapse">
+      * - Device
+        - Dependency
+      * - CPU
+        - ``-``
+      * - Arm® CPU
+        - ``-``
 
-@endsphinxdirective
 
-| Device      | Dependency  |
-|-------------|-------------|
-| CPU         |  `-`        |
-| GPU         | `libOpenCL.so`, `cache.json` |
-| GNA         | `gna.dll`   |
-| Arm® CPU    |  `-`        |
+Libraries for Execution Modes
++++++++++++++++++++++++++++++
 
-@sphinxdirective
+The ``HETERO``, ``MULTI``, ``BATCH`` and ``AUTO`` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package:
 
-.. raw:: html
+- If :doc:`AUTO <openvino_docs_OV_UG_supported_plugins_AUTO>` is used explicitly in the application or `ov::Core::compile_model <classov_1_1Core.html#doxid-classov-1-1-core-1a46555f0803e8c29524626be08e7f5c5a>`__ is used without specifying a device, put ``openvino_auto_plugin`` to the distribution.
 
-    </div>
+  .. note::
 
-@endsphinxdirective
-@sphinxdirective
-
-.. raw:: html
+     Automatic Device Selection relies on :doc:`[inference device plugins <openvino_docs_OV_UG_Working_with_devices>`. If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1gae88af90a18871677f39739cb0ef0101e>`__ is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only.
 
-    <div class="collapsible-section" data-title="MacOS: Click to expand/collapse">
+- If :doc:`MULTI <openvino_docs_OV_UG_Running_on_multiple_devices>` is used explicitly, put ``openvino_auto_plugin`` to the distribution.
+- If :doc:`HETERO <openvino_docs_OV_UG_Hetero_execution>` is either used explicitly or `ov::hint::performance_mode <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1ga2691fe27acc8aa1d1700ad40b6da3ba2>`__ is used with GPU, put ``openvino_hetero_plugin`` to the distribution.
+- If :doc:`BATCH <openvino_docs_OV_UG_Automatic_Batching>` is either used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_batch_plugin`` to the distribution.
 
-@endsphinxdirective
+Frontend Libraries for Reading Models
++++++++++++++++++++++++++++++++++++++
 
-| Device      | Dependency  |
-|-------------|-------------|
-| CPU         |     `-`     |
-| Arm® CPU    |  `-`        |
-
-@sphinxdirective
-
-.. raw:: html
-
-    </div>
-
-@endsphinxdirective
-
-### Libraries for Execution Modes
-
-The `HETERO`, `MULTI`, `BATCH` and `AUTO` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package:
-- If [AUTO](../auto_device_selection.md) is used explicitly in the application or `ov::Core::compile_model` is used without specifying a device, put `openvino_auto_plugin` to the distribution.
-  > **NOTE**: Automatic Device Selection relies on [inference device plugins](../supported_plugins/Device_Plugins.md). If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities` is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only.
+OpenVINO Runtime uses frontend libraries dynamically to read models in different formats:
 
-- If [MULTI](../multi_device.md) is used explicitly, put `openvino_auto_plugin` to the distribution.
-- If [HETERO](../hetero_execution.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_hetero_plugin` to the distribution.
-- If [BATCH](../automatic_batching.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_batch_plugin` to the distribution.
+- ``openvino_ir_frontend`` is used to read OpenVINO IR.
+- ``openvino_tensorflow_frontend`` is used to read TensorFlow file format.
+- ``openvino_onnx_frontend`` is used to read ONNX file format.
+- ``openvino_paddle_frontend`` is used to read Paddle file format.
 
-### Frontend Libraries for Reading Models
+Depending on the model format types that are used in the application in `ov::Core::read_model <classov_1_1Core.html#doxid-classov-1-1-core-1ae0576a95f841c3a6f5e46e4802716981>`__, pick up the appropriate libraries.
 
-OpenVINO Runtime uses frontend libraries dynamically to read models in different formats:
-- `openvino_ir_frontend` is used to read OpenVINO IR.
-- `openvino_tensorflow_frontend` is used to read TensorFlow file format.
-- `openvino_onnx_frontend` is used to read ONNX file format.
-- `openvino_paddle_frontend` is used to read Paddle file format.
+.. note::
 
-Depending on the model format types that are used in the application in `ov::Core::read_model`, pick up the appropriate libraries.
+   To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`. This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package.
 
-> **NOTE**: To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using [Model Optimizer](../../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package.
+(Legacy) Preprocessing via G-API
+++++++++++++++++++++++++++++++++
 
-### (Legacy) Preprocessing via G-API
+.. note::
 
-> **NOTE**: [G-API](../../gapi/gapi_intro.md) preprocessing is a legacy functionality, use [preprocessing capabilities from OpenVINO 2.0](../preprocessing_overview.md) which do not require any additional libraries.
+   :doc:`G-API <openvino_docs_gapi_gapi_intro>` preprocessing is a legacy functionality, use :doc:`preprocessing capabilities from OpenVINO 2.0 <openvino_docs_OV_UG_Preprocessing_Overview>` which do not require any additional libraries.
 
-If the application uses `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API.
+If the application uses `InferenceEngine::PreProcessInfo::setColorFormat <classInferenceEngine_1_1PreProcessInfo.html#doxid-class-inference-engine-1-1-pre-process-info-1a3a10ba0d562a2268fe584d4d2db94cac>`__ or `InferenceEngine::PreProcessInfo::setResizeAlgorithm <classInferenceEngine_1_1PreProcessInfo.html#doxid-class-inference-engine-1-1-pre-process-info-1a0c083c43d01c53c327f09095e3e3f004>`__ methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API.
 
-## Examples
+Examples
+####################
 
 **CPU + OpenVINO IR in C application**
 
 In this example, the application is written in C language, performs inference on CPU, and reads models stored as the OpenVINO IR format. The following libraries are used:
-- The `openvino_c` library is a main dependency of the application. It links against this library.
-- The `openvino` library is used as a private dependency for `openvino_c` and is also used in the deployment.
-- `openvino_intel_cpu_plugin` is used for inference.
-- `openvino_ir_frontend` is used to read source models.
+- The ``openvino_c`` library is a main dependency of the application. It links against this library.
+- The ``openvino`` library is used as a private dependency for ``openvino_c`` and is also used in the deployment.
+- ``openvino_intel_cpu_plugin`` is used for inference.
+- ``openvino_ir_frontend`` is used to read source models.
 
 **MULTI execution on GPU and CPU in `tput` mode**
 
-In this example, the application is written in C++, performs inference [simultaneously on GPU and CPU devices](../multi_device.md) with the `ov::hint::PerformanceMode::THROUGHPUT` property set, and reads models stored in the ONNX format. The following libraries are used:
-- The `openvino` library is a main dependency of the application. It links against this library.
-- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference.
-- `openvino_auto_plugin` is used for Multi-Device Execution.
-- `openvino_auto_batch_plugin` can be also put to the distribution to improve the saturation of [Intel® GPU](../supported_plugins/GPU.md) device. If there is no such plugin, [Automatic Batching](../automatic_batching.md) is turned off.
-- `openvino_onnx_frontend` is used to read source models.
+In this example, the application is written in C++, performs inference :doc:`simultaneously on GPU and CPU devices <openvino_docs_OV_UG_Running_on_multiple_devices>` with the `ov::hint::PerformanceMode::THROUGHPUT <enumov_1_1hint_1_1PerformanceMode.html#doxid-group-ov-runtime-cpp-prop-api-1gga032aa530efa40760b79af14913d48d73a50f9b1f40c078d242af7ec323ace44b3>`__ property set, and reads models stored in the ONNX format. The following libraries are used:
+
+- The ``openvino`` library is a main dependency of the application. It links against this library.
+- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference.
+- ``openvino_auto_plugin`` is used for Multi-Device Execution.
+- ``openvino_auto_batch_plugin`` can be also put to the distribution to improve the saturation of :doc:`Intel® GPU <openvino_docs_OV_UG_supported_plugins_GPU>` device. If there is no such plugin, :doc:`Automatic Batching <openvino_docs_OV_UG_Automatic_Batching>` is turned off.
+- ``openvino_onnx_frontend`` is used to read source models.
 
 **Auto-Device Selection between GPU and CPU**
 
-In this example, the application is written in C++, performs inference with the [Automatic Device Selection](../auto_device_selection.md) mode, limiting device list to GPU and CPU, and reads models [created using C++ code](../model_representation.md). The following libraries are used:
-- The `openvino` library is a main dependency of the application. It links against this library.
-- `openvino_auto_plugin` is used to enable Automatic Device Selection.
-- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine.
-- No frontend library is needed because `ov::Model` is created in code.
+In this example, the application is written in C++, performs inference with the :doc:`Automatic Device Selection <openvino_docs_OV_UG_supported_plugins_AUTO>` mode, limiting device list to GPU and CPU, and reads models :doc:`created using C++ code <openvino_docs_OV_UG_Model_Representation>`. The following libraries are used:
+
+- The ``openvino`` library is a main dependency of the application. It links against this library.
+- ``openvino_auto_plugin`` is used to enable Automatic Device Selection.
+- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine.
+- No frontend library is needed because ``ov::Model`` is created in code.
+
+@endsphinxdirective
diff --git a/docs/img/deployment_full.svg b/docs/_static/images/deployment_full.svg
similarity index 100%
rename from docs/img/deployment_full.svg
rename to docs/_static/images/deployment_full.svg

From 57c91e0c5685921fdec4c25ff5c4df38e611ca28 Mon Sep 17 00:00:00 2001
From: Chen Xu <chen.xu@intel.com>
Date: Wed, 22 Mar 2023 17:28:38 +0800
Subject: [PATCH 036/296] [CPU] Fix issue in reducing HW with small channel
 size in npsc layout (#16467)

---
 src/plugins/intel_cpu/src/nodes/reduce.cpp    | 36 ++++++++++---------
 .../single_layer_tests/reduce_ops.cpp         | 33 +++++++++++++++++
 2 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp
index a5abc1ead506fb..0eefdf73146389 100644
--- a/src/plugins/intel_cpu/src/nodes/reduce.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp
@@ -2147,22 +2147,24 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
             } else if (!ReduceC && ReduceD && ReduceH && !ReduceW) {
                 size_t IWB = IW / blk_size;
                 if (ReduceDH_opt) {
-                    // reduce parallelly in D dimension
-                    // step1: !ReduceD && ReduceH && !ReduceW
-                    uint8_t *prc_ptr_n = &vec_reduceDH_prc[0];
-                    init_dst_data(prc_ptr_n, prc_size);
-                    parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){
-                        size_t pd = id, pwb = iwb;
-                        reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size,
-                                              prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
-                    });
-                    // step2: ReduceD
-                    reduce_stride = PW;
-                    parallel_for(IWB, [&](size_t iwb){
-                        size_t pwb = iwb, owb = iwb;
-                        reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size,
-                                              out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID);
-                    });
+                    if (IWB > 0) {
+                        // reduce parallelly in D dimension
+                        // step1: !ReduceD && ReduceH && !ReduceW
+                        uint8_t *prc_ptr_n = &vec_reduceDH_prc[0];
+                        init_dst_data(prc_ptr_n, prc_size);
+                        parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){
+                            size_t pd = id, pwb = iwb;
+                            reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size,
+                                                prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
+                        });
+                        // step2: ReduceD
+                        reduce_stride = PW;
+                        parallel_for(IWB, [&](size_t iwb){
+                            size_t pwb = iwb, owb = iwb;
+                            reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size,
+                                                out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID);
+                        });
+                    }
                     // reduce tail
                     reduce_stride = IW;
                     size_t tail_start = IWB * blk_size;
@@ -2740,7 +2742,7 @@ inline void Reduce::set_reduce_dim_flags() {
     ReduceH = IH != OH && OH == 1;
     ReduceW = IW != OW && OW == 1;
 
-    // must be done before the above dimension change
+    // must be done after the above dimension change
     create_DH_working_memory();
 
     // suit for parallel
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp
index 7fa7871b0243ad..f41e74dd4221e5 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp
@@ -241,6 +241,10 @@ const std::vector<std::vector<int>> axes5DFusing = {
         {0, 2, 4},
 };
 
+const std::vector<std::vector<int>> axesHW = {
+        {2, 3}
+};
+
 std::vector<CommonTestUtils::OpType> opTypes = {
         CommonTestUtils::OpType::SCALAR,
         CommonTestUtils::OpType::VECTOR,
@@ -294,6 +298,11 @@ std::vector<std::vector<ov::test::InputShape>> inputShapes_Int32 = {
     {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}},
 };
 
+std::vector<std::vector<ov::test::InputShape>> inputShapes_SmallChannel = {
+    {{{}, {{2, 3, 2, 9}}}},
+    {{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}},
+};
+
 std::vector<CPUSpecificParams> cpuParams_4D = {
         CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
         CPUSpecificParams({nchw}, {nchw}, {}, {}),
@@ -316,6 +325,10 @@ std::vector<CPUSpecificParams> cpuParams_HybridLayout_5D = {
         CPUSpecificParams({ndhwc}, {}, {}, {})
 };
 
+std::vector<CPUSpecificParams> cpuParams_NHWC_4D = {
+        CPUSpecificParams({nhwc}, {nhwc}, {}, {})
+};
+
 const std::vector<fusingSpecificParams> fusingParamsSet {
         /* activations */
         fusingSwish,
@@ -431,6 +444,19 @@ const auto params_Int32 = testing::Combine(
         testing::Values(emptyCPUSpec),
         testing::Values(emptyFusingSpec));
 
+const auto params_NHWC_SmallChannel = testing::Combine(
+        testing::Combine(
+                testing::ValuesIn(axesHW),
+                testing::Values(CommonTestUtils::OpType::VECTOR),
+                testing::Values(true),
+                testing::ValuesIn(reductionTypes),
+                testing::ValuesIn(inpOutPrc),
+                testing::Values(ElementType::undefined),
+                testing::Values(ElementType::undefined),
+                testing::ValuesIn(inputShapes_SmallChannel)),
+        testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
+        testing::Values(emptyFusingSpec));
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_Reduce_OneAxis_CPU,
         ReduceCPULayerTest,
@@ -480,6 +506,13 @@ INSTANTIATE_TEST_SUITE_P(
         ReduceCPULayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Reducea_NHWC_SmallChannel_CPU,
+        ReduceCPULayerTest,
+        params_NHWC_SmallChannel,
+        ReduceCPULayerTest::getTestCaseName
+);
+
 /* ================================ 1.2 No fusion - Logical ================================ */
 const auto params_OneAxis_Logical = testing::Combine(
         testing::Combine(

From 1b72352f6f2348de31337b484b2e0830ee61748c Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 22 Mar 2023 14:20:03 +0400
Subject: [PATCH 037/296] Fixed CVS-93736 (#16471)

---
 src/core/src/descriptor/tensor.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp
index 5a7a4c24a69f34..ed87ce606bf21a 100644
--- a/src/core/src/descriptor/tensor.cpp
+++ b/src/core/src/descriptor/tensor.cpp
@@ -100,10 +100,8 @@ const ov::Shape& ov::descriptor::Tensor::get_shape() const {
 
 size_t ov::descriptor::Tensor::size() const {
     const bool bitwidth_less_than_byte = m_element_type.bitwidth() < 8;
-    if (bitwidth_less_than_byte) {
-        return static_cast<size_t>(ceil((1.0 * shape_size(get_shape()) * m_element_type.bitwidth()) / 8));
-    }
-    return shape_size(get_shape()) * m_element_type.size();
+    return bitwidth_less_than_byte ? (shape_size(get_shape()) * m_element_type.bitwidth() + 7) >> 3
+                                   : (shape_size(get_shape()) * m_element_type.size());
 }
 
 const std::unordered_set<std::string>& ov::descriptor::Tensor::get_names() const {

From 8509d0dd82cce761e06b88ce002a419e362aa333 Mon Sep 17 00:00:00 2001
From: Xuejun Zhai <xuejun.zhai@intel.com>
Date: Wed, 22 Mar 2023 23:09:14 +0800
Subject: [PATCH 038/296] [Deprecated API] remove `version` (#16426)

* [Remove version] Remove version from py openvino

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Modify caused by remove version

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Fix clang format issue

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Revert "Fix clang format issue"

This reverts commit 132787286fdcf4865da3480655e328e3fb607249.

* Fix CI format issue

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Fix CI format issue

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Fix merge conflict error

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

---------

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>
---
 .../pyngraph/discrete_type_info.cpp           |  6 +-
 .../src/compatibility/pyngraph/node.cpp       | 11 ----
 .../pyopenvino/graph/discrete_type_info.cpp   |  9 +--
 .../python/src/pyopenvino/graph/node.cpp      |  9 ---
 .../python/tests/test_graph/test_basic.py     |  7 ---
 .../python/tests/test_graph/test_core.py      |  4 +-
 .../test_ngraph/test_basic.py                 |  6 --
 .../test_ngraph/test_core.py                  |  4 +-
 .../low_precision/markup_precisions.hpp       |  6 +-
 .../markup_quantization_granularity.hpp       |  6 +-
 ...avg_pool_precision_preserved_attribute.hpp |  2 +-
 .../rt_info/intervals_alignment_attribute.hpp |  2 +-
 .../rt_info/precision_preserved_attribute.hpp |  2 +-
 .../rt_info/precisions_attribute.hpp          |  2 +-
 .../quantization_alignment_attribute.hpp      |  2 +-
 .../quantization_granularity_attribute.hpp    |  2 +-
 .../rt_info/quantization_mode_attribute.hpp   |  2 +-
 .../rt_info/skip_cleanup_attribute.hpp        |  2 +-
 .../src/markup_precisions.cpp                 |  8 +--
 .../src/markup_quantization_granularity.cpp   |  8 +--
 .../include/mask_attribute.hpp                |  2 +-
 .../src/pass/common_optimizations.cpp         |  2 +-
 .../include/ov_ops/nms_static_shape_ie.hpp    |  7 +--
 .../include/ov_ops/type_relaxed.hpp           |  5 +-
 .../tests/utils/compare_functions_test.cpp    |  4 +-
 src/core/include/ngraph/node.hpp              | 20 +++---
 src/core/include/openvino/core/model.hpp      |  2 +-
 src/core/include/openvino/core/node.hpp       |  8 ---
 src/core/include/openvino/core/rtti.hpp       | 38 ++++++-----
 .../openvino/core/runtime_attribute.hpp       |  2 +-
 src/core/include/openvino/core/type.hpp       | 18 +-----
 src/core/include/openvino/op/acosh.hpp        |  2 +-
 src/core/include/openvino/op/add.hpp          |  2 +-
 src/core/include/openvino/op/asinh.hpp        |  2 +-
 src/core/include/openvino/op/assign.hpp       |  4 +-
 src/core/include/openvino/op/atanh.hpp        |  2 +-
 src/core/include/openvino/op/avg_pool.hpp     |  2 +-
 src/core/include/openvino/op/batch_norm.hpp   |  2 +-
 .../include/openvino/op/batch_to_space.hpp    |  2 +-
 .../openvino/op/binary_convolution.hpp        |  2 +-
 src/core/include/openvino/op/broadcast.hpp    |  4 +-
 src/core/include/openvino/op/bucketize.hpp    |  2 +-
 src/core/include/openvino/op/convert_like.hpp |  2 +-
 src/core/include/openvino/op/convolution.hpp  |  4 +-
 .../op/ctc_greedy_decoder_seq_len.hpp         |  2 +-
 src/core/include/openvino/op/ctc_loss.hpp     |  2 +-
 .../openvino/op/deformable_convolution.hpp    |  2 +-
 .../openvino/op/deformable_psroi_pooling.hpp  |  2 +-
 src/core/include/openvino/op/dft.hpp          |  2 +-
 src/core/include/openvino/op/divide.hpp       |  2 +-
 src/core/include/openvino/op/einsum.hpp       |  2 +-
 .../openvino/op/embedding_segments_sum.hpp    |  2 +-
 .../openvino/op/embeddingbag_offsets_sum.hpp  |  2 +-
 .../openvino/op/embeddingbag_packedsum.hpp    |  2 +-
 src/core/include/openvino/op/equal.hpp        |  2 +-
 ...xperimental_detectron_detection_output.hpp |  2 +-
 ...erimental_detectron_generate_proposals.hpp |  2 +-
 ...imental_detectron_prior_grid_generator.hpp |  2 +-
 .../op/experimental_detectron_roi_feature.hpp |  2 +-
 .../op/experimental_detectron_topkrois.hpp    |  2 +-
 .../openvino/op/extractimagepatches.hpp       |  2 +-
 src/core/include/openvino/op/floor_mod.hpp    |  2 +-
 src/core/include/openvino/op/gather.hpp       |  4 +-
 .../include/openvino/op/gather_elements.hpp   |  2 +-
 src/core/include/openvino/op/gather_nd.hpp    |  2 +-
 src/core/include/openvino/op/gather_tree.hpp  |  2 +-
 src/core/include/openvino/op/gelu.hpp         |  4 +-
 src/core/include/openvino/op/greater.hpp      |  2 +-
 src/core/include/openvino/op/greater_eq.hpp   |  2 +-
 src/core/include/openvino/op/group_conv.hpp   |  4 +-
 src/core/include/openvino/op/gru_cell.hpp     |  2 +-
 src/core/include/openvino/op/gru_sequence.hpp |  2 +-
 src/core/include/openvino/op/hsigmoid.hpp     |  2 +-
 src/core/include/openvino/op/hswish.hpp       |  2 +-
 src/core/include/openvino/op/idft.hpp         |  2 +-
 src/core/include/openvino/op/interpolate.hpp  |  4 +-
 src/core/include/openvino/op/less.hpp         |  2 +-
 src/core/include/openvino/op/less_eq.hpp      |  2 +-
 src/core/include/openvino/op/log_softmax.hpp  |  2 +-
 src/core/include/openvino/op/logical_and.hpp  |  2 +-
 src/core/include/openvino/op/logical_not.hpp  |  2 +-
 src/core/include/openvino/op/logical_or.hpp   |  2 +-
 src/core/include/openvino/op/logical_xor.hpp  |  2 +-
 src/core/include/openvino/op/loop.hpp         |  2 +-
 src/core/include/openvino/op/lstm_cell.hpp    |  2 +-
 .../include/openvino/op/lstm_sequence.hpp     |  2 +-
 src/core/include/openvino/op/max_pool.hpp     |  2 +-
 src/core/include/openvino/op/maximum.hpp      |  2 +-
 src/core/include/openvino/op/minimum.hpp      |  2 +-
 src/core/include/openvino/op/mish.hpp         |  2 +-
 src/core/include/openvino/op/mod.hpp          |  2 +-
 src/core/include/openvino/op/multiply.hpp     |  2 +-
 src/core/include/openvino/op/mvn.hpp          |  2 +-
 .../openvino/op/non_max_suppression.hpp       | 10 +--
 src/core/include/openvino/op/non_zero.hpp     |  2 +-
 src/core/include/openvino/op/not_equal.hpp    |  2 +-
 src/core/include/openvino/op/one_hot.hpp      |  2 +-
 src/core/include/openvino/op/op.hpp           |  3 +-
 src/core/include/openvino/op/pad.hpp          |  2 +-
 src/core/include/openvino/op/power.hpp        |  2 +-
 src/core/include/openvino/op/proposal.hpp     |  2 +-
 src/core/include/openvino/op/range.hpp        |  2 +-
 src/core/include/openvino/op/read_value.hpp   |  4 +-
 src/core/include/openvino/op/reduce_l1.hpp    |  2 +-
 src/core/include/openvino/op/reduce_l2.hpp    |  2 +-
 .../openvino/op/reduce_logical_and.hpp        |  2 +-
 .../include/openvino/op/reduce_logical_or.hpp |  2 +-
 src/core/include/openvino/op/reduce_max.hpp   |  2 +-
 src/core/include/openvino/op/reduce_mean.hpp  |  2 +-
 src/core/include/openvino/op/reduce_min.hpp   |  2 +-
 src/core/include/openvino/op/reduce_prod.hpp  |  2 +-
 src/core/include/openvino/op/reduce_sum.hpp   |  2 +-
 src/core/include/openvino/op/reshape.hpp      |  2 +-
 src/core/include/openvino/op/reverse.hpp      |  2 +-
 src/core/include/openvino/op/rnn_sequence.hpp |  2 +-
 src/core/include/openvino/op/roi_align.hpp    |  2 +-
 src/core/include/openvino/op/roll.hpp         |  2 +-
 src/core/include/openvino/op/round.hpp        |  2 +-
 .../openvino/op/scatter_elements_update.hpp   |  2 +-
 .../include/openvino/op/scatter_nd_update.hpp |  2 +-
 .../include/openvino/op/scatter_update.hpp    |  2 +-
 src/core/include/openvino/op/select.hpp       |  2 +-
 src/core/include/openvino/op/shape_of.hpp     |  2 +-
 src/core/include/openvino/op/softmax.hpp      |  2 +-
 src/core/include/openvino/op/softplus.hpp     |  2 +-
 .../include/openvino/op/space_to_batch.hpp    |  2 +-
 src/core/include/openvino/op/split.hpp        |  2 +-
 .../include/openvino/op/strided_slice.hpp     |  2 +-
 src/core/include/openvino/op/subtract.hpp     |  2 +-
 src/core/include/openvino/op/swish.hpp        |  2 +-
 src/core/include/openvino/op/topk.hpp         |  6 +-
 src/core/include/openvino/op/transpose.hpp    |  2 +-
 .../include/openvino/op/variadic_split.hpp    |  2 +-
 src/core/src/node.cpp                         | 10 ++-
 src/core/src/pass/low_latency.cpp             |  2 +-
 src/core/src/pass/pass.cpp                    |  2 +-
 src/core/src/pass/serialize.cpp               |  4 +-
 src/core/src/type.cpp                         | 32 ++++------
 src/core/tests/graph_rewrite.cpp              | 10 +--
 src/core/tests/opset.cpp                      |  8 +--
 src/core/tests/pass_config.cpp                | 12 ++--
 src/core/tests/rtti.cpp                       |  6 +-
 src/core/tests/type_info.cpp                  | 63 +++++++++----------
 src/core/tests/type_prop/broadcast.cpp        |  1 -
 src/frontends/ir/src/ir_deserializer.cpp      |  4 +-
 src/inference/src/ie_network_reader.cpp       |  2 +-
 .../swap_convert_transpose.cpp                |  2 +-
 src/plugins/intel_cpu/src/nodes/if.cpp        |  2 +-
 .../src/nodes/non_max_suppression.cpp         |  2 +-
 .../include/legacy/ngraph_ops/onehot_ie.hpp   |  6 --
 .../include/legacy/ngraph_ops/pad_ie.hpp      |  6 --
 .../convert_ngraph_to_cnn_network_tests.cpp   |  8 +--
 .../src/transformations/insert_copy_layer.cpp | 10 +--
 .../src/transformations/pwl_approximation.cpp |  4 +-
 src/plugins/intel_gpu/src/plugin/program.cpp  |  6 +-
 .../common_test_utils/graph_comparator.cpp    |  8 +--
 .../src/utils/ngraph_helpers.cpp              |  8 +--
 157 files changed, 274 insertions(+), 385 deletions(-)

diff --git a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp
index b7418def4d3acf..49f8bb97953ebf 100644
--- a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp
@@ -28,14 +28,14 @@ void regclass_pyngraph_DiscreteTypeInfo(py::module m) {
     discrete_type_info.def(py::self != py::self);
 
     discrete_type_info.def_readonly("name", &ngraph::DiscreteTypeInfo::name);
-    discrete_type_info.def_readonly("version", &ngraph::DiscreteTypeInfo::version);
+    discrete_type_info.def_readonly("version_id", &ngraph::DiscreteTypeInfo::version_id);
     discrete_type_info.def_readonly("parent", &ngraph::DiscreteTypeInfo::parent);
 
     discrete_type_info.def("__repr__", [](const ngraph::DiscreteTypeInfo& self) {
         std::string name = std::string(self.name);
-        std::string version = std::to_string(self.version);
+        std::string version = std::string(self.version_id);
         if (self.parent != nullptr) {
-            std::string parent_version = std::to_string(self.parent->version);
+            std::string parent_version = std::string(self.parent->version_id);
             std::string parent_name = self.parent->name;
             return "<DiscreteTypeInfo: " + name + " v" + version + " Parent(" + parent_name + " v" + parent_version +
                    ")" + ">";
diff --git a/src/bindings/python/src/compatibility/pyngraph/node.cpp b/src/bindings/python/src/compatibility/pyngraph/node.cpp
index 02bfb3d1395630..f696a4297cad7f 100644
--- a/src/bindings/python/src/compatibility/pyngraph/node.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/node.cpp
@@ -277,16 +277,6 @@ void regclass_pyngraph_Node(py::module m) {
                 get_rt_info : PyRTMap
                     A dictionary of user defined data.
              )");
-    node.def("get_version",
-             &ngraph::Node::get_version,
-             R"(
-                Returns operation's version of the node.
-
-                Returns
-                ----------
-                get_version : int
-                    Operation version.
-             )");
 
     node.def("set_argument", &ngraph::Node::set_argument);
     node.def("set_arguments", [](const std::shared_ptr<ngraph::Node>& self, const ngraph::NodeVector& args) {
@@ -301,7 +291,6 @@ void regclass_pyngraph_Node(py::module m) {
     node.def_property_readonly("rt_info",
                                (PyRTMap & (ngraph::Node::*)()) & ngraph::Node::get_rt_info,
                                py::return_value_policy::reference_internal);
-    node.def_property_readonly("version", &ngraph::Node::get_version);
     node.def_property_readonly("type_info", &ngraph::Node::get_type_info);
     node.def_property("friendly_name", &ngraph::Node::get_friendly_name, &ngraph::Node::set_friendly_name);
 
diff --git a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
index dca43473035be7..cd19fa2a121cbc 100644
--- a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp
@@ -26,23 +26,18 @@ void regclass_graph_DiscreteTypeInfo(py::module m) {
     discrete_type_info.def(py::self != py::self);
 
     discrete_type_info.def_readonly("name", &ov::DiscreteTypeInfo::name);
-    discrete_type_info.def_readonly("version", &ov::DiscreteTypeInfo::version);
     discrete_type_info.def_readonly("version_id", &ov::DiscreteTypeInfo::version_id);
     discrete_type_info.def_readonly("parent", &ov::DiscreteTypeInfo::parent);
 
-    discrete_type_info.def("get_version", []() {
-        Common::utils::deprecation_warning("get_version()", "2024.0", "Please use version attribute instead.");
-        return &ov::DiscreteTypeInfo::get_version;
-    });
     discrete_type_info.def("hash", [](const ov::DiscreteTypeInfo& self) {
         return self.hash();
     });
 
     discrete_type_info.def("__repr__", [](const ov::DiscreteTypeInfo& self) {
         std::string name = std::string(self.name);
-        std::string version = std::to_string(self.version);
+        std::string version = std::string(self.version_id);
         if (self.parent != nullptr) {
-            std::string parent_version = std::to_string(self.parent->version);
+            std::string parent_version = std::string(self.parent->version_id);
             std::string parent_name = self.parent->name;
             return "<DiscreteTypeInfo: " + name + " v" + version + " Parent(" + parent_name + " v" + parent_version +
                    ")" + ">";
diff --git a/src/bindings/python/src/pyopenvino/graph/node.cpp b/src/bindings/python/src/pyopenvino/graph/node.cpp
index 599194ab9b96f0..1e99bff44210e0 100644
--- a/src/bindings/python/src/pyopenvino/graph/node.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/node.cpp
@@ -304,14 +304,6 @@ void regclass_graph_Node(py::module m) {
                 :return: A dictionary of user defined data.
                 :rtype: openvino.runtime.RTMap
              )");
-    node.def("get_version",
-             &ov::Node::get_version,
-             R"(
-                Returns operation's version of the node.
-
-                :return: Operation version.
-                :rtype: int
-             )");
 
     node.def("set_argument", &ov::Node::set_argument);
     node.def("set_arguments", [](const std::shared_ptr<ov::Node>& self, const ov::NodeVector& args) {
@@ -326,7 +318,6 @@ void regclass_graph_Node(py::module m) {
     node.def_property_readonly("rt_info",
                                (PyRTMap & (ov::Node::*)()) & ov::Node::get_rt_info,
                                py::return_value_policy::reference_internal);
-    node.def_property_readonly("version", &ov::Node::get_version);
     node.def_property_readonly("type_info", &ov::Node::get_type_info);
     node.def_property("friendly_name", &ov::Node::get_friendly_name, &ov::Node::set_friendly_name);
 
diff --git a/src/bindings/python/tests/test_graph/test_basic.py b/src/bindings/python/tests/test_graph/test_basic.py
index 67bb1f1afad7c5..b4cc21edb27150 100644
--- a/src/bindings/python/tests/test_graph/test_basic.py
+++ b/src/bindings/python/tests/test_graph/test_basic.py
@@ -537,13 +537,6 @@ def test_sink_function_ctor():
     assert function.get_friendly_name() == "TestModel"
 
 
-def test_node_version():
-    node = ops.add([1], [2])
-
-    assert node.get_version() == 1
-    assert node.version == 1
-
-
 def test_strides_iteration_methods():
     data = np.array([1, 2, 3])
     strides = Strides(data)
diff --git a/src/bindings/python/tests/test_graph/test_core.py b/src/bindings/python/tests/test_graph/test_core.py
index bd02af0fe69089..57e0d26252eec4 100644
--- a/src/bindings/python/tests/test_graph/test_core.py
+++ b/src/bindings/python/tests/test_graph/test_core.py
@@ -369,10 +369,10 @@ def test_discrete_type_info():
     assert n1.get_type_info().name == "TopK"
     assert n3.get_type_info().name == "Sin"
     assert n1.type_info.name == n2.type_info.name
-    assert n1.type_info.version == n2.type_info.version
+    assert n1.type_info.version_id == n2.type_info.version_id
     assert n1.type_info.parent == n2.type_info.parent
     assert n1.get_type_info().name == n2.get_type_info().name
-    assert n1.get_type_info().version == n2.get_type_info().version
+    assert n1.get_type_info().version_id == n2.get_type_info().version_id
     assert n1.get_type_info().parent == n2.get_type_info().parent
     assert n1.get_type_info().name != n3.get_type_info().name
     assert n1.get_type_info().name > n3.get_type_info().name
diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py
index de83f6a77089bf..5acc1a29fd2d49 100644
--- a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py
+++ b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py
@@ -414,9 +414,3 @@ def test_sink_function_ctor():
     assert len(function.get_results()) == 1
     assert function.get_friendly_name() == "TestFunction"
 
-
-def test_node_version():
-    node = ng.add([1], [2])
-
-    assert node.get_version() == 1
-    assert node.version == 1
diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py
index 9b50732396e5a5..949946ef842bdc 100644
--- a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py
+++ b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py
@@ -250,10 +250,10 @@ def test_discrete_type_info():
     assert n1.get_type_info().name == "TopK"
     assert n3.get_type_info().name == "Sin"
     assert n1.type_info.name == n2.type_info.name
-    assert n1.type_info.version == n2.type_info.version
+    assert n1.type_info.version_id == n2.type_info.version_id
     assert n1.type_info.parent == n2.type_info.parent
     assert n1.get_type_info().name == n2.get_type_info().name
-    assert n1.get_type_info().version == n2.get_type_info().version
+    assert n1.get_type_info().version_id == n2.get_type_info().version_id
     assert n1.get_type_info().parent == n2.get_type_info().parent
     assert n1.get_type_info().name != n3.get_type_info().name
     assert n1.get_type_info().name > n3.get_type_info().name
diff --git a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp
index 783dcafb13f678..97f3bc7569984e 100644
--- a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp
@@ -39,12 +39,12 @@ class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::Funct
     class Restriction {
     public:
         explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
-        void add(const uint64_t version, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) {
-            precisionsByVersion.emplace(version, precisions);
+        void add(const std::string version_id, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) {
+            precisionsByVersion.emplace(version_id, precisions);
         }
 
         bool versionIsRequired;
-        std::unordered_map<uint64_t, ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts> precisionsByVersion;
+        std::unordered_map<std::string, ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts> precisionsByVersion;
     };
 
     OPENVINO_RTTI("MarkupPrecisions", "0");
diff --git a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp
index 23558ec81a26d5..098da265cee0b4 100644
--- a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp
@@ -37,12 +37,12 @@ class ngraph::pass::low_precision::MarkupQuantizationGranularity : public ngraph
     class PerTensorQuantization {
     public:
         explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
-        void add(const uint64_t version, const std::vector<PortQuantizationGranularityRestriction>& restrictions) {
-            portsByVersion.emplace(version, restrictions);
+        void add(const std::string version_id, const std::vector<PortQuantizationGranularityRestriction>& restrictions) {
+            portsByVersion.emplace(version_id, restrictions);
         }
 
         bool versionIsRequired;
-        std::unordered_map<uint64_t, std::vector<PortQuantizationGranularityRestriction>> portsByVersion;
+        std::unordered_map<std::string, std::vector<PortQuantizationGranularityRestriction>> portsByVersion;
     };
 
     OPENVINO_RTTI("MarkupPerTensorQuantization", "0");
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
index 4064edabf03006..c7c84e2122960c 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
@@ -23,7 +23,7 @@ namespace ngraph {
  */
 class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute {
 public:
-    OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute);
     using PrecisionPreservedAttribute::PrecisionPreservedAttribute;
     void merge_attributes(std::vector<ov::Any>& attributes);
     bool is_skipped() const;
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
index dcdf552856062f..99d6e814c2abe0 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
@@ -58,7 +58,7 @@ class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue {
  */
 class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedAttribute<IntervalsAlignmentSharedValue> {
 public:
-    OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute);
     IntervalsAlignmentAttribute() = default;
     IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels);
     IntervalsAlignmentAttribute(
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
index 13baf73c3eb1b6..f5d14342a8f230 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
@@ -22,7 +22,7 @@ namespace ngraph {
  */
 class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedAttribute<bool> {
 public:
-    OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute);
 
     PrecisionPreservedAttribute() = default;
     PrecisionPreservedAttribute(const bool value);
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
index 41f88a290b6a7c..0a69000b85e4e1 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
@@ -26,7 +26,7 @@ namespace ngraph {
  */
 class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedAttribute<std::vector<ngraph::element::Type>> {
 public:
-    OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute);
     PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions);
 
     static ov::Any create(
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
index 0c766b71885ab5..30daa5f8073082 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
@@ -27,7 +27,7 @@ namespace ngraph {
  */
 class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedAttribute<bool> {
 public:
-    OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute);
     QuantizationAlignmentAttribute(const bool value = false);
 
     static ov::Any create(
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp
index 84f2bf474c82a2..df466ef041cf4a 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp
@@ -22,7 +22,7 @@ namespace ngraph {
  */
 class LP_TRANSFORMATIONS_API QuantizationGranularityAttribute : public ov::RuntimeAttribute {
 public:
-    OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute);
 
     enum class Granularity {
         PerChannel,
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp
index a3658061510ee0..3c3a454e4b9519 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp
@@ -12,7 +12,7 @@ namespace ngraph {
 
 class LP_TRANSFORMATIONS_API QuantizationModeAttribute : public ov::RuntimeAttribute {
 public:
-    OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute);
 
     enum class Mode {
         Asymmetric,
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp
index 1a11bbc2983ea2..1b323ee9424794 100644
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp
@@ -11,7 +11,7 @@
 namespace ngraph {
 class LP_TRANSFORMATIONS_API SkipCleanupAttribute : public ov::RuntimeAttribute {
 public:
-    OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute, 0);
+    OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute);
     static ov::Any create(const std::shared_ptr<ngraph::Node>& node);
 };
 } // namespace ngraph
diff --git a/src/common/low_precision_transformations/src/markup_precisions.cpp b/src/common/low_precision_transformations/src/markup_precisions.cpp
index d5c168d4502131..a1748036012f13 100644
--- a/src/common/low_precision_transformations/src/markup_precisions.cpp
+++ b/src/common/low_precision_transformations/src/markup_precisions.cpp
@@ -30,10 +30,10 @@ ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(
         OPENVINO_SUPPRESS_DEPRECATED_START
         if (it == restrictionsByOperation.end()) {
             Restriction r(restriction.specifyVersion);
-            r.precisionsByVersion.emplace(restriction.operationType.version, restriction.precisionsByPorts);
+            r.precisionsByVersion.emplace(restriction.operationType.version_id, restriction.precisionsByPorts);
             restrictionsByOperation.emplace(restriction.operationType.name, r);
         } else {
-            it->second.add(restriction.operationType.version, restriction.precisionsByPorts);
+            it->second.add(restriction.operationType.version_id, restriction.precisionsByPorts);
         }
         OPENVINO_SUPPRESS_DEPRECATED_END
     }
@@ -108,9 +108,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::run_on_model(const std::shar
         if (it != restrictionsByOperation.end()) {
             const Restriction& r = it->second;
             if (r.versionIsRequired) {
-                OPENVINO_SUPPRESS_DEPRECATED_START
-                const auto it2 = r.precisionsByVersion.find(typeInfo.version);
-                OPENVINO_SUPPRESS_DEPRECATED_END
+                const auto it2 = r.precisionsByVersion.find(typeInfo.version_id);
                 if (it2 == r.precisionsByVersion.end()) {
                     continue;
                 }
diff --git a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp
index 7b86c1d2203c50..bbe448d83423ec 100644
--- a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp
+++ b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp
@@ -20,10 +20,10 @@ ngraph::pass::low_precision::MarkupQuantizationGranularity::MarkupQuantizationGr
         OPENVINO_SUPPRESS_DEPRECATED_START
         if (it == restrictionsByOperation.end()) {
             PerTensorQuantization r(restriction.specifyVersion);
-            r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictions);
+            r.portsByVersion.emplace(restriction.operationType.version_id, restriction.restrictions);
             restrictionsByOperation.emplace(restriction.operationType.name, r);
         } else {
-            it->second.add(restriction.operationType.version, restriction.restrictions);
+            it->second.add(restriction.operationType.version_id, restriction.restrictions);
         }
         OPENVINO_SUPPRESS_DEPRECATED_END
     }
@@ -74,9 +74,7 @@ bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(co
         }
 
         if (restriction.versionIsRequired) {
-            OPENVINO_SUPPRESS_DEPRECATED_START
-            const auto it2 = restriction.portsByVersion.find(node->get_type_info().version);
-            OPENVINO_SUPPRESS_DEPRECATED_END
+            const auto it2 = restriction.portsByVersion.find(node->get_type_info().version_id);
             if (it2 == restriction.portsByVersion.end()) {
                 continue;
             }
diff --git a/src/common/offline_transformations/include/mask_attribute.hpp b/src/common/offline_transformations/include/mask_attribute.hpp
index 33ec516ded9c94..22ad9ba70d054a 100644
--- a/src/common/offline_transformations/include/mask_attribute.hpp
+++ b/src/common/offline_transformations/include/mask_attribute.hpp
@@ -28,7 +28,7 @@ namespace ngraph {
 class Mask : public std::vector<std::set<uint64_t>>, public std::enable_shared_from_this<Mask> {
 public:
     static const ::ov::DiscreteTypeInfo& get_type_info_static() {
-        static const ::ov::DiscreteTypeInfo type_info_static{"Mask", 0, "0"};
+        static const ::ov::DiscreteTypeInfo type_info_static{"Mask", "0"};
         return type_info_static;
     }
 
diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp
index 787fb8f650d5be..04278526ce8c34 100644
--- a/src/common/snippets/src/pass/common_optimizations.cpp
+++ b/src/common/snippets/src/pass/common_optimizations.cpp
@@ -17,7 +17,7 @@
 #include "snippets/utils.hpp"
 #include "snippets/itt.hpp"
 
-NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations", 0);
+NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations");
 
 namespace ngraph {
 namespace snippets {
diff --git a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp
index 0c4f38628dce1b..5da183db15a240 100644
--- a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp
+++ b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp
@@ -29,14 +29,9 @@ namespace internal {
 template <typename BaseNmsOp>
 class NmsStaticShapeIE : public BaseNmsOp {
 public:
-    OPENVINO_SUPPRESS_DEPRECATED_START
     // TODO: it should be std::string("NmsStaticShapeIE_") + BaseNmsOp::get_type_info_static().name,
     //       but currently it does not pass conversion to Legacy Opset correctly
-    OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name,
-                  "ie_internal_opset",
-                  BaseNmsOp,
-                  BaseNmsOp::get_type_info_static().version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name, "ie_internal_opset", BaseNmsOp);
 
     NmsStaticShapeIE() = default;
 
diff --git a/src/common/transformations/include/ov_ops/type_relaxed.hpp b/src/common/transformations/include/ov_ops/type_relaxed.hpp
index 47a791a4b0be42..fa84ab4eb7c945 100644
--- a/src/common/transformations/include/ov_ops/type_relaxed.hpp
+++ b/src/common/transformations/include/ov_ops/type_relaxed.hpp
@@ -189,10 +189,7 @@ OPENVINO_SUPPRESS_DEPRECATED_START
 template <typename BaseOp>
 class TypeRelaxed : public BaseOp, public TypeRelaxedBase {
 public:
-    OPENVINO_OP(BaseOp::get_type_info_static().name,
-                BaseOp::get_type_info_static().version_id,
-                BaseOp,
-                BaseOp::get_type_info_static().version);
+    OPENVINO_OP(BaseOp::get_type_info_static().name, BaseOp::get_type_info_static().version_id, BaseOp);
 
     using BaseOp::BaseOp;
 
diff --git a/src/common/transformations/tests/utils/compare_functions_test.cpp b/src/common/transformations/tests/utils/compare_functions_test.cpp
index f27b30f18b933b..c89eddb97a4611 100644
--- a/src/common/transformations/tests/utils/compare_functions_test.cpp
+++ b/src/common/transformations/tests/utils/compare_functions_test.cpp
@@ -201,7 +201,7 @@ TEST(TransformationTests, CompareFunctoinsTINegative) {
     const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
     auto res = fc(f, f_ref);
     EXPECT_FALSE(res.valid);
-    EXPECT_THAT(res.message, HasSubstr("LSTMCell/4 != Relu/0"));
+    EXPECT_THAT(res.message, HasSubstr("LSTMCell/opset4 != Relu/opset1"));
 }
 
 TEST(TransformationTests, CompareFunctoinsTINegativeDifferentElementTypeBetweenSubGraphsInputs) {
@@ -514,7 +514,7 @@ class DummyConstant : public ngraph::op::Op {
     DummyConstant& operator=(const DummyConstant&) = delete;
 
     const NodeTypeInfo& get_type_info() const override {
-        static const NodeTypeInfo type_info{typeid(this).name(), static_cast<uint64_t>(0)};
+        static const NodeTypeInfo type_info{typeid(this).name(), "0"};
         return type_info;
     }
 
diff --git a/src/core/include/ngraph/node.hpp b/src/core/include/ngraph/node.hpp
index 776dcb6bebc653..76090d3cca32b1 100644
--- a/src/core/include/ngraph/node.hpp
+++ b/src/core/include/ngraph/node.hpp
@@ -150,21 +150,19 @@ using ov::check_new_args_count;
         }
 #endif
 
-#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX, PARENT_CLASS)               \
-    const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() {                                    \
-        static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME,                              \
-                                                                  static_cast<uint64_t>(_VERSION_INDEX),  \
-                                                                  &PARENT_CLASS::get_type_info_static()}; \
-        return type_info_static;                                                                          \
-    }                                                                                                     \
-    _NGRAPH_RTTI_DEFINITION_COMMON(CLASS)
-
-#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX)                                          \
+#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, PARENT_CLASS)                                          \
     const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() {                                               \
-        static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, static_cast<uint64_t>(_VERSION_INDEX)}; \
+        static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, &PARENT_CLASS::get_type_info_static()}; \
         return type_info_static;                                                                                     \
     }                                                                                                                \
     _NGRAPH_RTTI_DEFINITION_COMMON(CLASS)
+
+#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME)                   \
+    const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() {        \
+        static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME}; \
+        return type_info_static;                                              \
+    }                                                                         \
+    _NGRAPH_RTTI_DEFINITION_COMMON(CLASS)
 #define NGRAPH_RTTI_DEFINITION(...)                                                               \
     _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__,                         \
                                                              _NGRAPH_RTTI_DEFINITION_WITH_PARENT, \
diff --git a/src/core/include/openvino/core/model.hpp b/src/core/include/openvino/core/model.hpp
index 159a2c57533a77..e5d0158e538ff4 100644
--- a/src/core/include/openvino/core/model.hpp
+++ b/src/core/include/openvino/core/model.hpp
@@ -47,7 +47,7 @@ class OPENVINO_API Model : public std::enable_shared_from_this<Model> {
 
 public:
     _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() {
-        static const ::ov::DiscreteTypeInfo type_info_static{"Model", static_cast<uint64_t>(0)};
+        static const ::ov::DiscreteTypeInfo type_info_static{"Model"};
         return type_info_static;
     }
     const ::ov::DiscreteTypeInfo& get_type_info() const {
diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp
index 549fca57e2c73a..c2bb3fef23404f 100644
--- a/src/core/include/openvino/core/node.hpp
+++ b/src/core/include/openvino/core/node.hpp
@@ -409,14 +409,6 @@ class OPENVINO_API Node : public std::enable_shared_from_this<Node> {
     /// Get all the nodes that uses the current node
     NodeVector get_users(bool check_is_used = false) const;
 
-    /// \return Version of this node
-    OPENVINO_DEPRECATED("This method is deprecated and will be removed soon.")
-    virtual size_t get_version() const {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        return get_type_info().version;
-        OPENVINO_SUPPRESS_DEPRECATED_END
-    }
-
     /// Use instance ids for comparison instead of memory addresses to improve determinism
     bool operator<(const Node& other) const {
         return m_instance_id < other.m_instance_id;
diff --git a/src/core/include/openvino/core/rtti.hpp b/src/core/include/openvino/core/rtti.hpp
index 505d6e687eb29b..06b541204d88c3 100644
--- a/src/core/include/openvino/core/rtti.hpp
+++ b/src/core/include/openvino/core/rtti.hpp
@@ -7,14 +7,14 @@
 #include "openvino/core/type.hpp"
 #include "openvino/core/visibility.hpp"
 
-#define _OPENVINO_RTTI_EXPAND(X)                                      X
-#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, _4, NAME, ...) NAME
+#define _OPENVINO_RTTI_EXPAND(X)                                  X
+#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, NAME, ...) NAME
 
 #define _OPENVINO_RTTI_WITH_TYPE(TYPE_NAME) _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, "util")
 
 #define _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, VERSION_NAME)                         \
     _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \
-        static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, 0, VERSION_NAME};       \
+        static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, VERSION_NAME};          \
         type_info_static.hash();                                                          \
         return type_info_static;                                                          \
     }                                                                                     \
@@ -23,19 +23,18 @@
     }
 
 #define _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) \
-    _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, 0)
+    _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS)
 
-#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, OLD_VERSION) \
-    _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() {            \
-        static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME,                                    \
-                                                       OLD_VERSION,                                  \
-                                                       VERSION_NAME,                                 \
-                                                       &PARENT_CLASS::get_type_info_static()};       \
-        type_info_static.hash();                                                                     \
-        return type_info_static;                                                                     \
-    }                                                                                                \
-    const ::ov::DiscreteTypeInfo& get_type_info() const override {                                   \
-        return get_type_info_static();                                                               \
+#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS)        \
+    _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() {      \
+        static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME,                              \
+                                                       VERSION_NAME,                           \
+                                                       &PARENT_CLASS::get_type_info_static()}; \
+        type_info_static.hash();                                                               \
+        return type_info_static;                                                               \
+    }                                                                                          \
+    const ::ov::DiscreteTypeInfo& get_type_info() const override {                             \
+        return get_type_info_static();                                                         \
     }
 
 /// Helper macro that puts necessary declarations of RTTI block inside a class definition.
@@ -92,11 +91,10 @@
 /// OPENVINO_RTTI(name, version_id)
 /// OPENVINO_RTTI(name, version_id, parent)
 /// OPENVINO_RTTI(name, version_id, parent, old_version)
-#define OPENVINO_RTTI(...)                                                                             \
-    _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__,                              \
-                                                             _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT, \
-                                                             _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT,  \
-                                                             _OPENVINO_RTTI_WITH_TYPE_VERSION,         \
+#define OPENVINO_RTTI(...)                                                                            \
+    _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__,                             \
+                                                             _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \
+                                                             _OPENVINO_RTTI_WITH_TYPE_VERSION,        \
                                                              _OPENVINO_RTTI_WITH_TYPE)(__VA_ARGS__))
 
 /// Note: Please don't use this macros for new operations
diff --git a/src/core/include/openvino/core/runtime_attribute.hpp b/src/core/include/openvino/core/runtime_attribute.hpp
index 4aca1b00330fdb..5502c3eb8ff280 100644
--- a/src/core/include/openvino/core/runtime_attribute.hpp
+++ b/src/core/include/openvino/core/runtime_attribute.hpp
@@ -20,7 +20,7 @@ class Any;
 class OPENVINO_API RuntimeAttribute {
 public:
     _OPENVINO_HIDDEN_METHOD static const DiscreteTypeInfo& get_type_info_static() {
-        static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute", static_cast<uint64_t>(0)};
+        static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute"};
         return type_info_static;
     }
     virtual const DiscreteTypeInfo& get_type_info() const {
diff --git a/src/core/include/openvino/core/type.hpp b/src/core/include/openvino/core/type.hpp
index 6fa3e3d56f6285..6ceaa39cbe08e0 100644
--- a/src/core/include/openvino/core/type.hpp
+++ b/src/core/include/openvino/core/type.hpp
@@ -30,14 +30,11 @@ namespace ov {
  */
 struct OPENVINO_API DiscreteTypeInfo {
     const char* name;
-    OPENVINO_DEPRECATED("This member was deprecated. Please use version_id instead.")
-    uint64_t version;
     const char* version_id;
     // A pointer to a parent type info; used for casting and inheritance traversal, not for
     // exact type identification
     const DiscreteTypeInfo* parent;
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
     DiscreteTypeInfo() = default;
     DiscreteTypeInfo(const DiscreteTypeInfo&) = default;
     DiscreteTypeInfo(DiscreteTypeInfo&&) = default;
@@ -47,29 +44,16 @@ struct OPENVINO_API DiscreteTypeInfo {
                                         const char* _version_id,
                                         const DiscreteTypeInfo* _parent = nullptr)
         : name(_name),
-          version(0),
           version_id(_version_id),
           parent(_parent),
           hash_value(0) {}
 
-    constexpr DiscreteTypeInfo(const char* _name, uint64_t _version, const DiscreteTypeInfo* _parent = nullptr)
+    constexpr DiscreteTypeInfo(const char* _name, const DiscreteTypeInfo* _parent = nullptr)
         : name(_name),
-          version(_version),
           version_id(nullptr),
           parent(_parent),
           hash_value(0) {}
 
-    constexpr DiscreteTypeInfo(const char* _name,
-                               uint64_t _version,
-                               const char* _version_id,
-                               const DiscreteTypeInfo* _parent = nullptr)
-        : name(_name),
-          version(_version),
-          version_id(_version_id),
-          parent(_parent),
-          hash_value(0) {}
-    OPENVINO_SUPPRESS_DEPRECATED_END
-
     bool is_castable(const DiscreteTypeInfo& target_type) const;
 
     std::string get_version() const;
diff --git a/src/core/include/openvino/op/acosh.hpp b/src/core/include/openvino/op/acosh.hpp
index 97224708df1f50..59a2dd45e4b06c 100644
--- a/src/core/include/openvino/op/acosh.hpp
+++ b/src/core/include/openvino/op/acosh.hpp
@@ -16,7 +16,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Acosh : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic, 3);
+    OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic);
 
     /// \brief Constructs an Acosh operation.
     Acosh() = default;
diff --git a/src/core/include/openvino/op/add.hpp b/src/core/include/openvino/op/add.hpp
index 0e2a812a9dc938..054ec3302d846c 100644
--- a/src/core/include/openvino/op/add.hpp
+++ b/src/core/include/openvino/op/add.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Add : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs an uninitialized addition operation
     Add() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/asinh.hpp b/src/core/include/openvino/op/asinh.hpp
index 8fa8eedd687dc4..96a7f02bfc7e12 100644
--- a/src/core/include/openvino/op/asinh.hpp
+++ b/src/core/include/openvino/op/asinh.hpp
@@ -15,7 +15,7 @@ namespace v3 {
 ///
 class OPENVINO_API Asinh : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic, 3);
+    OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic);
 
     /// \brief Constructs an Asinh operation.
     Asinh() = default;
diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp
index 7c5f2b477a573e..2ba16d46195ffc 100644
--- a/src/core/include/openvino/op/assign.hpp
+++ b/src/core/include/openvino/op/assign.hpp
@@ -14,7 +14,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Assign : public util::AssignBase {
 public:
-    OPENVINO_OP("Assign", "opset3", util::AssignBase, 3);
+    OPENVINO_OP("Assign", "opset3", util::AssignBase);
     Assign() = default;
 
     /// \brief Constructs an Assign operation.
@@ -44,7 +44,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Assign : public util::AssignBase {
 public:
-    OPENVINO_OP("Assign", "opset6", util::AssignBase, 6);
+    OPENVINO_OP("Assign", "opset6", util::AssignBase);
     Assign() = default;
 
     /// \brief Constructs an Assign operation.
diff --git a/src/core/include/openvino/op/atanh.hpp b/src/core/include/openvino/op/atanh.hpp
index 5fc62facb560af..4d4220b6fdcaa5 100644
--- a/src/core/include/openvino/op/atanh.hpp
+++ b/src/core/include/openvino/op/atanh.hpp
@@ -16,7 +16,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Atanh : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic, 3);
+    OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic);
 
     /// \brief Constructs an Atanh operation.
     Atanh() = default;
diff --git a/src/core/include/openvino/op/avg_pool.hpp b/src/core/include/openvino/op/avg_pool.hpp
index a69e6cc8502da0..24298c8602bbd2 100644
--- a/src/core/include/openvino/op/avg_pool.hpp
+++ b/src/core/include/openvino/op/avg_pool.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 ///
 class OPENVINO_API AvgPool : public Op {
 public:
-    OPENVINO_OP("AvgPool", "opset1", op::Op, 1);
+    OPENVINO_OP("AvgPool", "opset1", op::Op);
 
     /// \brief Constructs a batched average pooling operation.
     AvgPool() = default;
diff --git a/src/core/include/openvino/op/batch_norm.hpp b/src/core/include/openvino/op/batch_norm.hpp
index 7131f0078d665d..3800090312cb7f 100644
--- a/src/core/include/openvino/op/batch_norm.hpp
+++ b/src/core/include/openvino/op/batch_norm.hpp
@@ -59,7 +59,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API BatchNormInference : public Op {
 public:
-    OPENVINO_OP("BatchNormInference", "opset5", op::Op, 5);
+    OPENVINO_OP("BatchNormInference", "opset5", op::Op);
     BatchNormInference() = default;
     /// \param input [., C, ...]
     /// \param gamma gamma scaling for normalized value. [C]
diff --git a/src/core/include/openvino/op/batch_to_space.hpp b/src/core/include/openvino/op/batch_to_space.hpp
index 4d28c16d067229..6609e539087628 100644
--- a/src/core/include/openvino/op/batch_to_space.hpp
+++ b/src/core/include/openvino/op/batch_to_space.hpp
@@ -23,7 +23,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API BatchToSpace : public Op {
 public:
-    OPENVINO_OP("BatchToSpace", "opset2", op::Op, 1);
+    OPENVINO_OP("BatchToSpace", "opset2", op::Op);
     BatchToSpace() = default;
     /// \brief Constructs a BatchToSpace operation.
     ///
diff --git a/src/core/include/openvino/op/binary_convolution.hpp b/src/core/include/openvino/op/binary_convolution.hpp
index 0dbb2d494bd6eb..8cdcd91e1b03a2 100644
--- a/src/core/include/openvino/op/binary_convolution.hpp
+++ b/src/core/include/openvino/op/binary_convolution.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API BinaryConvolution : public Op {
 public:
-    OPENVINO_OP("BinaryConvolution", "opset1", op::Op, 1);
+    OPENVINO_OP("BinaryConvolution", "opset1", op::Op);
 
     enum class BinaryConvolutionMode {
         // Interpret input data and kernel values: 0 as -1, 1 as 1
diff --git a/src/core/include/openvino/op/broadcast.hpp b/src/core/include/openvino/op/broadcast.hpp
index e41c7e6c601193..fccffc5ed1d22e 100644
--- a/src/core/include/openvino/op/broadcast.hpp
+++ b/src/core/include/openvino/op/broadcast.hpp
@@ -17,7 +17,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Broadcast : public util::BroadcastBase {
 public:
-    OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase, 3);
+    OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase);
 
     /// \brief Constructs a broadcast operation.
     Broadcast() = default;
@@ -81,7 +81,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Broadcast : public util::BroadcastBase {
 public:
-    OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase, 1);
+    OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase);
 
     /// \brief Constructs a broadcast operation.
     Broadcast() = default;
diff --git a/src/core/include/openvino/op/bucketize.hpp b/src/core/include/openvino/op/bucketize.hpp
index fa8d34579e7273..33bbfbc8003862 100644
--- a/src/core/include/openvino/op/bucketize.hpp
+++ b/src/core/include/openvino/op/bucketize.hpp
@@ -13,7 +13,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Bucketize : public Op {
 public:
-    OPENVINO_OP("Bucketize", "opset3", op::Op, 3);
+    OPENVINO_OP("Bucketize", "opset3", op::Op);
 
     Bucketize() = default;
     /// \brief Constructs a Bucketize node
diff --git a/src/core/include/openvino/op/convert_like.hpp b/src/core/include/openvino/op/convert_like.hpp
index 27bd89c6e9f106..2621a1ce860131 100644
--- a/src/core/include/openvino/op/convert_like.hpp
+++ b/src/core/include/openvino/op/convert_like.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ConvertLike : public Op {
 public:
-    OPENVINO_OP("ConvertLike", "opset1", op::Op, 1);
+    OPENVINO_OP("ConvertLike", "opset1", op::Op);
 
     /// \brief Constructs a conversion operation.
     ConvertLike() = default;
diff --git a/src/core/include/openvino/op/convolution.hpp b/src/core/include/openvino/op/convolution.hpp
index 87c17f8167c41d..f340fbb544c8c5 100644
--- a/src/core/include/openvino/op/convolution.hpp
+++ b/src/core/include/openvino/op/convolution.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Convolution : public Op {
 public:
-    OPENVINO_OP("Convolution", "opset1", op::Op, 1);
+    OPENVINO_OP("Convolution", "opset1", op::Op);
 
     /// \brief Constructs a batched convolution operation.
     Convolution() = default;
@@ -129,7 +129,7 @@ class OPENVINO_API Convolution : public Op {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ConvolutionBackpropData : public Op {
 public:
-    OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op, 1);
+    OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op);
 
     /// \brief Constructs a batched-convolution data batch-backprop operation.
     ConvolutionBackpropData() = default;
diff --git a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp
index 4846f750ceaf6b..e07493a781128c 100644
--- a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp
+++ b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp
@@ -14,7 +14,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API CTCGreedyDecoderSeqLen : public Op {
 public:
-    OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op, 6);
+    OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op);
     CTCGreedyDecoderSeqLen() = default;
     /// \brief Constructs a CTCGreedyDecoderSeqLen operation
     ///
diff --git a/src/core/include/openvino/op/ctc_loss.hpp b/src/core/include/openvino/op/ctc_loss.hpp
index 21f143608fc90f..3a1077d9394e22 100644
--- a/src/core/include/openvino/op/ctc_loss.hpp
+++ b/src/core/include/openvino/op/ctc_loss.hpp
@@ -14,7 +14,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API CTCLoss : public Op {
 public:
-    OPENVINO_OP("CTCLoss", "opset4", op::Op, 4);
+    OPENVINO_OP("CTCLoss", "opset4", op::Op);
 
     CTCLoss() = default;
     /// \brief Constructs a CTCLoss operation
diff --git a/src/core/include/openvino/op/deformable_convolution.hpp b/src/core/include/openvino/op/deformable_convolution.hpp
index 3eb673cd340ad6..993c779e5cb2bd 100644
--- a/src/core/include/openvino/op/deformable_convolution.hpp
+++ b/src/core/include/openvino/op/deformable_convolution.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API DeformableConvolution : public op::util::DeformableConvolutionBase {
 public:
-    OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase, 1);
+    OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase);
 
     /// \brief Constructs a conversion operation.
     DeformableConvolution() = default;
diff --git a/src/core/include/openvino/op/deformable_psroi_pooling.hpp b/src/core/include/openvino/op/deformable_psroi_pooling.hpp
index 60bc07f3dd2861..3e3315a95b93de 100644
--- a/src/core/include/openvino/op/deformable_psroi_pooling.hpp
+++ b/src/core/include/openvino/op/deformable_psroi_pooling.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API DeformablePSROIPooling : public Op {
 public:
-    OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op, 1);
+    OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op);
 
     DeformablePSROIPooling() = default;
     /// \brief Constructs a DeformablePSROIPooling operation
diff --git a/src/core/include/openvino/op/dft.hpp b/src/core/include/openvino/op/dft.hpp
index 6072f711650b9c..b87262fd78b919 100644
--- a/src/core/include/openvino/op/dft.hpp
+++ b/src/core/include/openvino/op/dft.hpp
@@ -29,7 +29,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API DFT : public util::FFTBase {
 public:
-    OPENVINO_OP("DFT", "opset7", util::FFTBase, 7);
+    OPENVINO_OP("DFT", "opset7", util::FFTBase);
     DFT() = default;
 
     /// \brief Constructs a DFT operation. DFT is performed for full size axes.
diff --git a/src/core/include/openvino/op/divide.hpp b/src/core/include/openvino/op/divide.hpp
index c4ef648683c293..4d83d0043f4a64 100644
--- a/src/core/include/openvino/op/divide.hpp
+++ b/src/core/include/openvino/op/divide.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Divide : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic);
     /// \brief Constructs a division operation.
     Divide() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
 
diff --git a/src/core/include/openvino/op/einsum.hpp b/src/core/include/openvino/op/einsum.hpp
index e42cdc0a94f806..9d1c179c2c5503 100644
--- a/src/core/include/openvino/op/einsum.hpp
+++ b/src/core/include/openvino/op/einsum.hpp
@@ -13,7 +13,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Einsum : public Op {
 public:
-    OPENVINO_OP("Einsum", "opset7", op::Op, 7);
+    OPENVINO_OP("Einsum", "opset7", op::Op);
 
     Einsum() = default;
 
diff --git a/src/core/include/openvino/op/embedding_segments_sum.hpp b/src/core/include/openvino/op/embedding_segments_sum.hpp
index 8601bd9ac4ec12..55108f6ccb9d5c 100644
--- a/src/core/include/openvino/op/embedding_segments_sum.hpp
+++ b/src/core/include/openvino/op/embedding_segments_sum.hpp
@@ -14,7 +14,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API EmbeddingSegmentsSum : public Op {
 public:
-    OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op, 3);
+    OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op);
     /// \brief Constructs a EmbeddingSegmentsSum operation.
     EmbeddingSegmentsSum() = default;
     /// \brief Constructs a EmbeddingSegmentsSum operation.
diff --git a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp
index 0d88bdbd21af16..7c3ad7a7b74e7e 100644
--- a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp
+++ b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp
@@ -15,7 +15,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API EmbeddingBagOffsetsSum : public util::EmbeddingBagOffsetsBase {
 public:
-    OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase, 3);
+    OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase);
     /// \brief Constructs a EmbeddingBagOffsetsSum operation.
     EmbeddingBagOffsetsSum() = default;
     /// \brief Constructs a EmbeddingBagOffsetsSum operation.
diff --git a/src/core/include/openvino/op/embeddingbag_packedsum.hpp b/src/core/include/openvino/op/embeddingbag_packedsum.hpp
index 169b44484cbeab..b095b226a9e86d 100644
--- a/src/core/include/openvino/op/embeddingbag_packedsum.hpp
+++ b/src/core/include/openvino/op/embeddingbag_packedsum.hpp
@@ -15,7 +15,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API EmbeddingBagPackedSum : public util::EmbeddingBagPackedBase {
 public:
-    OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase, 3);
+    OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase);
     /// \brief Constructs a EmbeddingBagPackedSum operation.
     EmbeddingBagPackedSum() = default;
     /// \brief Constructs a EmbeddingBagPackedSum operation.
diff --git a/src/core/include/openvino/op/equal.hpp b/src/core/include/openvino/op/equal.hpp
index c8e2af9cd575b5..ae16f6c6d52703 100644
--- a/src/core/include/openvino/op/equal.hpp
+++ b/src/core/include/openvino/op/equal.hpp
@@ -29,7 +29,7 @@ namespace v1 {
 // clang-format on
 class OPENVINO_API Equal : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs an equal operation.
     Equal() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
     /// \brief Constructs an equal operation.
diff --git a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp
index c8e84bd6f09622..17221d907cb27e 100644
--- a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp
@@ -20,7 +20,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExperimentalDetectronDetectionOutput : public Op {
 public:
-    OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op, 6);
+    OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op);
 
     /// \brief Structure that specifies attributes of the operation
     struct Attributes {
diff --git a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp
index 83bf6f769c73e1..af2bfe1511f9fe 100644
--- a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp
@@ -19,7 +19,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExperimentalDetectronGenerateProposalsSingleImage : public Op {
 public:
-    OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op, 6);
+    OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op);
 
     /// \brief Structure that specifies attributes of the operation
     struct Attributes {
diff --git a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp
index 0dd697aec211a2..0865654a3dc358 100644
--- a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp
@@ -19,7 +19,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExperimentalDetectronPriorGridGenerator : public Op {
 public:
-    OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op, 6);
+    OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op);
 
     /// \brief Structure that specifies attributes of the operation
     struct Attributes {
diff --git a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp
index 109115d08abec9..b6b3d73b47ddbe 100644
--- a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp
@@ -20,7 +20,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExperimentalDetectronROIFeatureExtractor : public Op {
 public:
-    OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op, 6);
+    OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op);
 
     /// \brief Structure that specifies attributes of the operation
     struct Attributes {
diff --git a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp
index c12e1dcb374fc2..90bb99ebc5c89c 100644
--- a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp
@@ -19,7 +19,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExperimentalDetectronTopKROIs : public Op {
 public:
-    OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op, 6);
+    OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op);
 
     ExperimentalDetectronTopKROIs() = default;
     /// \brief Constructs a ExperimentalDetectronTopKROIs operation.
diff --git a/src/core/include/openvino/op/extractimagepatches.hpp b/src/core/include/openvino/op/extractimagepatches.hpp
index d9a0ca6ff6b89d..37d506fe1895b8 100644
--- a/src/core/include/openvino/op/extractimagepatches.hpp
+++ b/src/core/include/openvino/op/extractimagepatches.hpp
@@ -14,7 +14,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ExtractImagePatches : public Op {
 public:
-    OPENVINO_OP("ExtractImagePatches", "opset3", op::Op, 3);
+    OPENVINO_OP("ExtractImagePatches", "opset3", op::Op);
 
     ExtractImagePatches() = default;
     /// \brief Constructs a ExtractImagePatches operation
diff --git a/src/core/include/openvino/op/floor_mod.hpp b/src/core/include/openvino/op/floor_mod.hpp
index 2216de28b617e4..4df54f3bcd7334 100644
--- a/src/core/include/openvino/op/floor_mod.hpp
+++ b/src/core/include/openvino/op/floor_mod.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API FloorMod : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs an uninitialized addition operation
     FloorMod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/gather.hpp b/src/core/include/openvino/op/gather.hpp
index 55564080af1651..e752259271cfb4 100644
--- a/src/core/include/openvino/op/gather.hpp
+++ b/src/core/include/openvino/op/gather.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Gather : public op::util::GatherBase {
 public:
-    OPENVINO_OP("Gather", "opset1", op::util::GatherBase, 1);
+    OPENVINO_OP("Gather", "opset1", op::util::GatherBase);
     static constexpr int64_t AXIS_NOT_SET_VALUE = std::numeric_limits<int64_t>::max();
     Gather() = default;
     /// \param data The tensor from which slices are gathered
@@ -33,7 +33,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Gather : public op::util::GatherBase {
 public:
-    OPENVINO_OP("Gather", "opset7", op::util::GatherBase, 7);
+    OPENVINO_OP("Gather", "opset7", op::util::GatherBase);
     Gather() = default;
 
     /// \param data The tensor from which slices are gathered
diff --git a/src/core/include/openvino/op/gather_elements.hpp b/src/core/include/openvino/op/gather_elements.hpp
index c184d86e3628b4..4d8c419e616a10 100644
--- a/src/core/include/openvino/op/gather_elements.hpp
+++ b/src/core/include/openvino/op/gather_elements.hpp
@@ -14,7 +14,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GatherElements : public Op {
 public:
-    OPENVINO_OP("GatherElements", "opset6", op::Op, 6);
+    OPENVINO_OP("GatherElements", "opset6", op::Op);
     GatherElements() = default;
 
     /// \brief Constructs a GatherElements operation.
diff --git a/src/core/include/openvino/op/gather_nd.hpp b/src/core/include/openvino/op/gather_nd.hpp
index 146a1511664968..59680a37d4e4c6 100644
--- a/src/core/include/openvino/op/gather_nd.hpp
+++ b/src/core/include/openvino/op/gather_nd.hpp
@@ -13,7 +13,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GatherND : public op::util::GatherNDBase {
 public:
-    OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase, 5);
+    OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase);
     GatherND() = default;
 
     /// \brief Constructs a GatherND operation.
diff --git a/src/core/include/openvino/op/gather_tree.hpp b/src/core/include/openvino/op/gather_tree.hpp
index 67f455be8a2aa6..bc2169019a8cea 100644
--- a/src/core/include/openvino/op/gather_tree.hpp
+++ b/src/core/include/openvino/op/gather_tree.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GatherTree : public Op {
 public:
-    OPENVINO_OP("GatherTree", "opset1", op::Op, 1);
+    OPENVINO_OP("GatherTree", "opset1", op::Op);
 
     GatherTree() = default;
     /// \param step_ids     Tensor of shape [MAX_TIME, BATCH_SIZE, BEAM_WIDTH] with
diff --git a/src/core/include/openvino/op/gelu.hpp b/src/core/include/openvino/op/gelu.hpp
index 021a7e0ef8bd02..ae868e3909bbfd 100644
--- a/src/core/include/openvino/op/gelu.hpp
+++ b/src/core/include/openvino/op/gelu.hpp
@@ -15,7 +15,7 @@ namespace v0 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic, 0);
+    OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic);
 
     Gelu();
     /// \brief Constructs a Gelu operation.
@@ -43,7 +43,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic, 7);
+    OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic);
 
     Gelu() = default;
     /// \brief Constructs a Gelu operation.
diff --git a/src/core/include/openvino/op/greater.hpp b/src/core/include/openvino/op/greater.hpp
index 1693c09579b1e7..de889a0acae370 100644
--- a/src/core/include/openvino/op/greater.hpp
+++ b/src/core/include/openvino/op/greater.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Greater : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs a greater-than operation.
     Greater() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
     /// \brief Constructs a greater-than operation.
diff --git a/src/core/include/openvino/op/greater_eq.hpp b/src/core/include/openvino/op/greater_eq.hpp
index f4731a2da74a50..1f5fe1f984c95d 100644
--- a/src/core/include/openvino/op/greater_eq.hpp
+++ b/src/core/include/openvino/op/greater_eq.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GreaterEqual : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs a greater-than-or-equal operation.
     GreaterEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
     /// \brief Constructs a greater-than-or-equal operation.
diff --git a/src/core/include/openvino/op/group_conv.hpp b/src/core/include/openvino/op/group_conv.hpp
index f01c2a7f3dc891..a37a26e480e8de 100644
--- a/src/core/include/openvino/op/group_conv.hpp
+++ b/src/core/include/openvino/op/group_conv.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \brief Batched convolution operation, with optional window dilation and stride.
 class OPENVINO_API GroupConvolution : public Op {
 public:
-    OPENVINO_OP("GroupConvolution", "opset1", op::Op, 1);
+    OPENVINO_OP("GroupConvolution", "opset1", op::Op);
 
     /// \brief Constructs a batched convolution operation.
     GroupConvolution() = default;
@@ -126,7 +126,7 @@ class OPENVINO_API GroupConvolution : public Op {
 /// \brief Data batch backprop for batched convolution operation.
 class OPENVINO_API GroupConvolutionBackpropData : public Op {
 public:
-    OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op, 1);
+    OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op);
 
     /// \brief Constructs a batched-convolution data batch-backprop operation.
     GroupConvolutionBackpropData();
diff --git a/src/core/include/openvino/op/gru_cell.hpp b/src/core/include/openvino/op/gru_cell.hpp
index 2610c4731ae139..15543eec2b943d 100644
--- a/src/core/include/openvino/op/gru_cell.hpp
+++ b/src/core/include/openvino/op/gru_cell.hpp
@@ -24,7 +24,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GRUCell : public util::RNNCellBase {
 public:
-    OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase, 3);
+    OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase);
     GRUCell();
     ///
     /// \brief      Constructs GRUCell node.
diff --git a/src/core/include/openvino/op/gru_sequence.hpp b/src/core/include/openvino/op/gru_sequence.hpp
index 1fc9e7c9147fcf..fae54509ad0c64 100644
--- a/src/core/include/openvino/op/gru_sequence.hpp
+++ b/src/core/include/openvino/op/gru_sequence.hpp
@@ -19,7 +19,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API GRUSequence : public util::RNNCellBase {
 public:
-    OPENVINO_OP("GRUSequence", "opset5", op::Op, 5);
+    OPENVINO_OP("GRUSequence", "opset5", op::Op);
     GRUSequence();
 
     GRUSequence(const Output<Node>& X,
diff --git a/src/core/include/openvino/op/hsigmoid.hpp b/src/core/include/openvino/op/hsigmoid.hpp
index abf8d2e1cf87a9..71b07ada902617 100644
--- a/src/core/include/openvino/op/hsigmoid.hpp
+++ b/src/core/include/openvino/op/hsigmoid.hpp
@@ -17,7 +17,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API HSigmoid : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic, 5);
+    OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic);
     HSigmoid() = default;
 
     /// \brief Constructs a HSigmoid operation.
diff --git a/src/core/include/openvino/op/hswish.hpp b/src/core/include/openvino/op/hswish.hpp
index b20e3498b90dd0..34cff2955b5ab6 100644
--- a/src/core/include/openvino/op/hswish.hpp
+++ b/src/core/include/openvino/op/hswish.hpp
@@ -17,7 +17,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API HSwish : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic, 4);
+    OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic);
     HSwish() = default;
 
     /// \brief Constructs a HSwish (hard version of Swish) operation.
diff --git a/src/core/include/openvino/op/idft.hpp b/src/core/include/openvino/op/idft.hpp
index ea6b0a737a44ba..1fd0948bc9c8a2 100644
--- a/src/core/include/openvino/op/idft.hpp
+++ b/src/core/include/openvino/op/idft.hpp
@@ -17,7 +17,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API IDFT : public util::FFTBase {
 public:
-    OPENVINO_OP("IDFT", "opset7", util::FFTBase, 7);
+    OPENVINO_OP("IDFT", "opset7", util::FFTBase);
     IDFT() = default;
 
     /// \brief Constructs a IDFT operation. IDFT is performed for full size axes.
diff --git a/src/core/include/openvino/op/interpolate.hpp b/src/core/include/openvino/op/interpolate.hpp
index cec3a88e3f5f3f..d6e32cc28cb3c2 100644
--- a/src/core/include/openvino/op/interpolate.hpp
+++ b/src/core/include/openvino/op/interpolate.hpp
@@ -83,7 +83,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Interpolate : public util::InterpolateBase {
 public:
-    OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase, 4);
+    OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase);
 
     Interpolate() = default;
     /// \brief Constructs a Interpolate operation without 'axes' input.
@@ -190,7 +190,7 @@ namespace v11 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Interpolate : public util::InterpolateBase {
 public:
-    OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase, 11);
+    OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase);
     Interpolate() = default;
     /// \brief Constructs a Interpolate operation without 'axes' input.
     ///
diff --git a/src/core/include/openvino/op/less.hpp b/src/core/include/openvino/op/less.hpp
index d11285b871e392..3d87ab9b6ffbe3 100644
--- a/src/core/include/openvino/op/less.hpp
+++ b/src/core/include/openvino/op/less.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Less : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs a less-than operation.
     Less() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
     /// \brief Constructs a less-than operation.
diff --git a/src/core/include/openvino/op/less_eq.hpp b/src/core/include/openvino/op/less_eq.hpp
index 6b725f749c23ae..111f4c07140af5 100644
--- a/src/core/include/openvino/op/less_eq.hpp
+++ b/src/core/include/openvino/op/less_eq.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LessEqual : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs a less-than-or-equal operation.
     LessEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
 
diff --git a/src/core/include/openvino/op/log_softmax.hpp b/src/core/include/openvino/op/log_softmax.hpp
index d441e645998aea..b4bca830a0832f 100644
--- a/src/core/include/openvino/op/log_softmax.hpp
+++ b/src/core/include/openvino/op/log_softmax.hpp
@@ -14,7 +14,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LogSoftmax : public Op {
 public:
-    OPENVINO_OP("LogSoftmax", "opset5", op::Op, 5);
+    OPENVINO_OP("LogSoftmax", "opset5", op::Op);
     LogSoftmax() = default;
     /// \brief Constructs a LogSoftmax operation.
     ///
diff --git a/src/core/include/openvino/op/logical_and.hpp b/src/core/include/openvino/op/logical_and.hpp
index 8580accfe46df7..6d55f8f3585e0f 100644
--- a/src/core/include/openvino/op/logical_and.hpp
+++ b/src/core/include/openvino/op/logical_and.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LogicalAnd : public util::BinaryElementwiseLogical {
 public:
-    OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical, 1);
+    OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical);
     /// \brief Constructs a logical-and operation.
     LogicalAnd() = default;
 
diff --git a/src/core/include/openvino/op/logical_not.hpp b/src/core/include/openvino/op/logical_not.hpp
index 9b50a8e2ecce1e..c5421b8db14a47 100644
--- a/src/core/include/openvino/op/logical_not.hpp
+++ b/src/core/include/openvino/op/logical_not.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LogicalNot : public Op {
 public:
-    OPENVINO_OP("LogicalNot", "opset1", op::Op, 1);
+    OPENVINO_OP("LogicalNot", "opset1", op::Op);
     /// \brief Constructs a logical negation operation.
     LogicalNot() = default;
     /// \brief Constructs a logical negation operation.
diff --git a/src/core/include/openvino/op/logical_or.hpp b/src/core/include/openvino/op/logical_or.hpp
index 379b773d37617f..15c00eea04baf3 100644
--- a/src/core/include/openvino/op/logical_or.hpp
+++ b/src/core/include/openvino/op/logical_or.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LogicalOr : public util::BinaryElementwiseLogical {
 public:
-    OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical, 1);
+    OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical);
     LogicalOr() = default;
     /// \brief Constructs a logical-or operation.
     ///
diff --git a/src/core/include/openvino/op/logical_xor.hpp b/src/core/include/openvino/op/logical_xor.hpp
index 9e94a1756f98c0..41ad89abca2638 100644
--- a/src/core/include/openvino/op/logical_xor.hpp
+++ b/src/core/include/openvino/op/logical_xor.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LogicalXor : public util::BinaryElementwiseLogical {
 public:
-    OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical, 1);
+    OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical);
     LogicalXor() = default;
     /// \brief Constructs a logical-xor operation.
     ///
diff --git a/src/core/include/openvino/op/loop.hpp b/src/core/include/openvino/op/loop.hpp
index 7bbc00dc75c7fb..cb174d588b7bc3 100644
--- a/src/core/include/openvino/op/loop.hpp
+++ b/src/core/include/openvino/op/loop.hpp
@@ -31,7 +31,7 @@ class OPENVINO_API Loop : public op::util::SubGraphOp {
         int64_t body_condition_output_idx = -1;
     };
 
-    OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp, 5);
+    OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp);
 
     /// \brief Constructs a Loop operation.
     Loop() = default;
diff --git a/src/core/include/openvino/op/lstm_cell.hpp b/src/core/include/openvino/op/lstm_cell.hpp
index 3c9e53be10c46b..249b3dccdc2bf6 100644
--- a/src/core/include/openvino/op/lstm_cell.hpp
+++ b/src/core/include/openvino/op/lstm_cell.hpp
@@ -278,7 +278,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LSTMCell : public util::RNNCellBase {
 public:
-    OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase, 4);
+    OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase);
 
     LSTMCell();
     ///
diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp
index 3296bd54208134..1e7599a35ba982 100644
--- a/src/core/include/openvino/op/lstm_sequence.hpp
+++ b/src/core/include/openvino/op/lstm_sequence.hpp
@@ -127,7 +127,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API LSTMSequence : public util::RNNCellBase {
 public:
-    OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase, 5);
+    OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase);
     LSTMSequence() = default;
 
     using direction = RecurrentSequenceDirection;
diff --git a/src/core/include/openvino/op/max_pool.hpp b/src/core/include/openvino/op/max_pool.hpp
index 2acfdb9d12b6ce..c1741eef6cb717 100644
--- a/src/core/include/openvino/op/max_pool.hpp
+++ b/src/core/include/openvino/op/max_pool.hpp
@@ -15,7 +15,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API MaxPool : public op::util::MaxPoolBase {
 public:
-    OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase, 1);
+    OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase);
 
     /// \brief Constructs a batched max pooling operation.
     MaxPool() = default;
diff --git a/src/core/include/openvino/op/maximum.hpp b/src/core/include/openvino/op/maximum.hpp
index 5c21463c2ec727..742878b09c4eba 100644
--- a/src/core/include/openvino/op/maximum.hpp
+++ b/src/core/include/openvino/op/maximum.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Maximum : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs a maximum operation.
     Maximum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp
index afd75c41ea577b..c8cfc5c9d7c999 100644
--- a/src/core/include/openvino/op/minimum.hpp
+++ b/src/core/include/openvino/op/minimum.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs a minimum operation.
     Minimum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/mish.hpp b/src/core/include/openvino/op/mish.hpp
index 1d3a53ca7534c4..455bd713166f95 100644
--- a/src/core/include/openvino/op/mish.hpp
+++ b/src/core/include/openvino/op/mish.hpp
@@ -15,7 +15,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Mish : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic, 4);
+    OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic);
 
     Mish() = default;
     /// \brief Constructs an Mish operation.
diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp
index 749a7ae2b7e5d4..5e58a2ec03d733 100644
--- a/src/core/include/openvino/op/mod.hpp
+++ b/src/core/include/openvino/op/mod.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs a Mod node.
     Mod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/multiply.hpp b/src/core/include/openvino/op/multiply.hpp
index 259c0b9f03a117..2e2f3bd4c73000 100644
--- a/src/core/include/openvino/op/multiply.hpp
+++ b/src/core/include/openvino/op/multiply.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Multiply : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic);
 
     /// \brief Constructs a multiplication operation.
     Multiply() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
diff --git a/src/core/include/openvino/op/mvn.hpp b/src/core/include/openvino/op/mvn.hpp
index c9a3920aedbb4b..7f198ec1444047 100644
--- a/src/core/include/openvino/op/mvn.hpp
+++ b/src/core/include/openvino/op/mvn.hpp
@@ -99,7 +99,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API MVN : public Op {
 public:
-    OPENVINO_OP("MVN", "opset6", op::Op, 6);
+    OPENVINO_OP("MVN", "opset6", op::Op);
 
     MVN() = default;
     /// \brief Constructs an MVN operation.
diff --git a/src/core/include/openvino/op/non_max_suppression.hpp b/src/core/include/openvino/op/non_max_suppression.hpp
index 143df579e0ac69..5216c53700684f 100644
--- a/src/core/include/openvino/op/non_max_suppression.hpp
+++ b/src/core/include/openvino/op/non_max_suppression.hpp
@@ -16,7 +16,7 @@ class OPENVINO_API NonMaxSuppression : public Op {
 public:
     enum class BoxEncodingType { CORNER, CENTER };
 
-    OPENVINO_OP("NonMaxSuppression", "opset1", op::Op, 1);
+    OPENVINO_OP("NonMaxSuppression", "opset1", op::Op);
 
     NonMaxSuppression() = default;
 
@@ -86,7 +86,7 @@ class OPENVINO_API NonMaxSuppression : public Op {
 public:
     enum class BoxEncodingType { CORNER, CENTER };
 
-    OPENVINO_OP("NonMaxSuppression", "opset3", op::Op, 3);
+    OPENVINO_OP("NonMaxSuppression", "opset3", op::Op);
     NonMaxSuppression() = default;
 
     /// \brief Constructs a NonMaxSuppression operation.
@@ -166,7 +166,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API NonMaxSuppression : public op::v3::NonMaxSuppression {
 public:
-    OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression, 4);
+    OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression);
     NonMaxSuppression() = default;
 
     /// \brief Constructs a NonMaxSuppression operation.
@@ -217,7 +217,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API NonMaxSuppression : public Op {
 public:
-    OPENVINO_OP("NonMaxSuppression", "opset5", op::Op, 5);
+    OPENVINO_OP("NonMaxSuppression", "opset5", op::Op);
     enum class BoxEncodingType { CORNER, CENTER };
 
     NonMaxSuppression() = default;
@@ -365,7 +365,7 @@ namespace v9 {
 ///
 class OPENVINO_API NonMaxSuppression : public Op {
 public:
-    OPENVINO_OP("NonMaxSuppression", "opset9", op::Op, 9);
+    OPENVINO_OP("NonMaxSuppression", "opset9", op::Op);
     enum class BoxEncodingType { CORNER, CENTER };
 
     NonMaxSuppression() = default;
diff --git a/src/core/include/openvino/op/non_zero.hpp b/src/core/include/openvino/op/non_zero.hpp
index f45ae824a47cd5..e14d757e5ff341 100644
--- a/src/core/include/openvino/op/non_zero.hpp
+++ b/src/core/include/openvino/op/non_zero.hpp
@@ -20,7 +20,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API NonZero : public Op {
 public:
-    OPENVINO_OP("NonZero", "opset3", op::Op, 3);
+    OPENVINO_OP("NonZero", "opset3", op::Op);
     /// \brief Constructs a NonZero operation.
     NonZero() = default;
     /// \brief Constructs a NonZero operation.
diff --git a/src/core/include/openvino/op/not_equal.hpp b/src/core/include/openvino/op/not_equal.hpp
index 930244094d37a6..dfae8b59a8fd0e 100644
--- a/src/core/include/openvino/op/not_equal.hpp
+++ b/src/core/include/openvino/op/not_equal.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API NotEqual : public util::BinaryElementwiseComparison {
 public:
-    OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison, 1);
+    OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison);
     /// \brief Constructs a not-equal operation.
     NotEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {}
     /// \brief Constructs a not-equal operation.
diff --git a/src/core/include/openvino/op/one_hot.hpp b/src/core/include/openvino/op/one_hot.hpp
index e911d838a52baa..621fd8483c0649 100644
--- a/src/core/include/openvino/op/one_hot.hpp
+++ b/src/core/include/openvino/op/one_hot.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API OneHot : public Op {
 public:
-    OPENVINO_OP("OneHot", "opset1", op::Op, 1);
+    OPENVINO_OP("OneHot", "opset1", op::Op);
 
     /// \brief Constructs a one-hot operation.
     OneHot() = default;
diff --git a/src/core/include/openvino/op/op.hpp b/src/core/include/openvino/op/op.hpp
index c0a57a90323660..7964007180775f 100644
--- a/src/core/include/openvino/op/op.hpp
+++ b/src/core/include/openvino/op/op.hpp
@@ -16,7 +16,6 @@
 
 #define OPENVINO_OP(...)                                                                                \
     _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__,                               \
-                                                             _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT,  \
                                                              _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT,   \
                                                              _OPENVINO_RTTI_OP_WITH_TYPE_VERSION,       \
                                                              _OPENVINO_RTTI_OP_WITH_TYPE)(__VA_ARGS__)) \
@@ -40,7 +39,7 @@ class OPENVINO_API Op : public Node {
 
 public:
     _OPENVINO_HIDDEN_METHOD static const ::ov::Node::type_info_t& get_type_info_static() {
-        static ::ov::Node::type_info_t info{"Op", 0, "util"};
+        static ::ov::Node::type_info_t info{"Op", "util"};
         info.hash();
         return info;
     }
diff --git a/src/core/include/openvino/op/pad.hpp b/src/core/include/openvino/op/pad.hpp
index 4f8779f6d16b4b..a45c1f33dd52ef 100644
--- a/src/core/include/openvino/op/pad.hpp
+++ b/src/core/include/openvino/op/pad.hpp
@@ -15,7 +15,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Pad : public Op {
 public:
-    OPENVINO_OP("Pad", "opset1", op::Op, 1);
+    OPENVINO_OP("Pad", "opset1", op::Op);
 
     /// \brief Constructs a generic padding operation.
     ///
diff --git a/src/core/include/openvino/op/power.hpp b/src/core/include/openvino/op/power.hpp
index c2bb08f4154bcf..c89a98c61a203b 100644
--- a/src/core/include/openvino/op/power.hpp
+++ b/src/core/include/openvino/op/power.hpp
@@ -28,7 +28,7 @@ namespace v1 {
 // clang-format on
 class OPENVINO_API Power : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic);
 
     Power() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
 
diff --git a/src/core/include/openvino/op/proposal.hpp b/src/core/include/openvino/op/proposal.hpp
index 6f7960133d3171..c09282594d3bcb 100644
--- a/src/core/include/openvino/op/proposal.hpp
+++ b/src/core/include/openvino/op/proposal.hpp
@@ -78,7 +78,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Proposal : public op::v0::Proposal {
 public:
-    OPENVINO_OP("Proposal", "opset4", op::Op, 4);
+    OPENVINO_OP("Proposal", "opset4", op::Op);
     Proposal() = default;
     /// \brief Constructs a Proposal operation
     ///
diff --git a/src/core/include/openvino/op/range.hpp b/src/core/include/openvino/op/range.hpp
index 1cd44aed49f2de..1222d14874dba0 100644
--- a/src/core/include/openvino/op/range.hpp
+++ b/src/core/include/openvino/op/range.hpp
@@ -13,7 +13,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Range : public Op {
 public:
-    OPENVINO_OP("Range", "opset4", op::Op, 4);
+    OPENVINO_OP("Range", "opset4", op::Op);
     /// \brief Constructs an unitialized range operation.
     Range() = default;
 
diff --git a/src/core/include/openvino/op/read_value.hpp b/src/core/include/openvino/op/read_value.hpp
index 87a861b4a1e3bf..38c539427b0c4c 100644
--- a/src/core/include/openvino/op/read_value.hpp
+++ b/src/core/include/openvino/op/read_value.hpp
@@ -15,7 +15,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReadValue : public util::ReadValueBase {
 public:
-    OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase, 3);
+    OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase);
     ReadValue() = default;
 
     /// \brief Constructs a ReadValue operation.
@@ -45,7 +45,7 @@ namespace v6 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReadValue : public util::ReadValueBase {
 public:
-    OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase, 6);
+    OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase);
     ReadValue() = default;
 
     /// \brief Constructs a ReadValue operation.
diff --git a/src/core/include/openvino/op/reduce_l1.hpp b/src/core/include/openvino/op/reduce_l1.hpp
index a9f5024c6aaf06..4889e5c52a6aba 100644
--- a/src/core/include/openvino/op/reduce_l1.hpp
+++ b/src/core/include/openvino/op/reduce_l1.hpp
@@ -16,7 +16,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceL1 : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims, 4);
+    OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a reducet L1-norm operation.
     ReduceL1() = default;
     /// \brief Constructs a reduce L1-norm operation.
diff --git a/src/core/include/openvino/op/reduce_l2.hpp b/src/core/include/openvino/op/reduce_l2.hpp
index c2a18ac3668483..9f9b38b7dc5747 100644
--- a/src/core/include/openvino/op/reduce_l2.hpp
+++ b/src/core/include/openvino/op/reduce_l2.hpp
@@ -15,7 +15,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceL2 : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims, 4);
+    OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a reducet L2-norm operation.
     ReduceL2() = default;
     /// \brief Constructs a reduce L2-norm operation.
diff --git a/src/core/include/openvino/op/reduce_logical_and.hpp b/src/core/include/openvino/op/reduce_logical_and.hpp
index b7e839ab1069da..1358702a1fd39a 100644
--- a/src/core/include/openvino/op/reduce_logical_and.hpp
+++ b/src/core/include/openvino/op/reduce_logical_and.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceLogicalAnd : public util::LogicalReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims, 1);
+    OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims);
     ReduceLogicalAnd() = default;
     /// \brief Constructs a ReduceLogicalAnd node.
     ///
diff --git a/src/core/include/openvino/op/reduce_logical_or.hpp b/src/core/include/openvino/op/reduce_logical_or.hpp
index 67fe065db4585b..36a3fd34759b24 100644
--- a/src/core/include/openvino/op/reduce_logical_or.hpp
+++ b/src/core/include/openvino/op/reduce_logical_or.hpp
@@ -16,7 +16,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceLogicalOr : public util::LogicalReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims, 1);
+    OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims);
     ReduceLogicalOr() = default;
     /// \brief Constructs a ReduceLogicalOr node.
     ///
diff --git a/src/core/include/openvino/op/reduce_max.hpp b/src/core/include/openvino/op/reduce_max.hpp
index b1579ad5ccbf03..499dec82bb9f77 100644
--- a/src/core/include/openvino/op/reduce_max.hpp
+++ b/src/core/include/openvino/op/reduce_max.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceMax : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims, 1);
+    OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a summation operation.
     ReduceMax() = default;
     /// \brief Constructs a summation operation.
diff --git a/src/core/include/openvino/op/reduce_mean.hpp b/src/core/include/openvino/op/reduce_mean.hpp
index 41459857e081da..7b50dd57b7dafc 100644
--- a/src/core/include/openvino/op/reduce_mean.hpp
+++ b/src/core/include/openvino/op/reduce_mean.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceMean : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims, 1);
+    OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims);
     ReduceMean() = default;
 
     /// \param arg The tensor to be summed.
diff --git a/src/core/include/openvino/op/reduce_min.hpp b/src/core/include/openvino/op/reduce_min.hpp
index 464b232ed5fbf9..830021a0bb2ae0 100644
--- a/src/core/include/openvino/op/reduce_min.hpp
+++ b/src/core/include/openvino/op/reduce_min.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceMin : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims, 1);
+    OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a summation operation.
     ReduceMin() = default;
     /// \brief Constructs a summation operation.
diff --git a/src/core/include/openvino/op/reduce_prod.hpp b/src/core/include/openvino/op/reduce_prod.hpp
index c8697c81bcdc2a..4a9af6339b6797 100644
--- a/src/core/include/openvino/op/reduce_prod.hpp
+++ b/src/core/include/openvino/op/reduce_prod.hpp
@@ -15,7 +15,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ReduceProd : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims, 1);
+    OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a product reduction operation.
     ReduceProd() = default;
     /// \brief Constructs a product reduction operation.
diff --git a/src/core/include/openvino/op/reduce_sum.hpp b/src/core/include/openvino/op/reduce_sum.hpp
index 60622fd2b7e3ea..7a3221c68e52ef 100644
--- a/src/core/include/openvino/op/reduce_sum.hpp
+++ b/src/core/include/openvino/op/reduce_sum.hpp
@@ -61,7 +61,7 @@ namespace v1 {
 // clang-format on
 class OPENVINO_API ReduceSum : public util::ArithmeticReductionKeepDims {
 public:
-    OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims, 1);
+    OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims);
     /// \brief Constructs a summation operation.
     ReduceSum() = default;
     /// \brief Constructs a summation operation.
diff --git a/src/core/include/openvino/op/reshape.hpp b/src/core/include/openvino/op/reshape.hpp
index 2905e4ad5ec182..9d4ecc18da1cc0 100644
--- a/src/core/include/openvino/op/reshape.hpp
+++ b/src/core/include/openvino/op/reshape.hpp
@@ -17,7 +17,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Reshape : public Op {
 public:
-    OPENVINO_OP("Reshape", "opset1", op::Op, 1);
+    OPENVINO_OP("Reshape", "opset1", op::Op);
     Reshape() = default;
     /// \brief Constructs a dynamic reshape operation. This operation does not perform
     ///        transpose.
diff --git a/src/core/include/openvino/op/reverse.hpp b/src/core/include/openvino/op/reverse.hpp
index 37266573f7232d..7b1a904aacf201 100644
--- a/src/core/include/openvino/op/reverse.hpp
+++ b/src/core/include/openvino/op/reverse.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Reverse : public Op {
 public:
-    OPENVINO_OP("Reverse", "opset1", op::Op, 1);
+    OPENVINO_OP("Reverse", "opset1", op::Op);
 
     enum class Mode { INDEX, MASK };
 
diff --git a/src/core/include/openvino/op/rnn_sequence.hpp b/src/core/include/openvino/op/rnn_sequence.hpp
index 44eaf247281a6a..0ee5543687422f 100644
--- a/src/core/include/openvino/op/rnn_sequence.hpp
+++ b/src/core/include/openvino/op/rnn_sequence.hpp
@@ -18,7 +18,7 @@ namespace v5 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API RNNSequence : public util::RNNCellBase {
 public:
-    OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase, 4);
+    OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase);
 
     RNNSequence();
 
diff --git a/src/core/include/openvino/op/roi_align.hpp b/src/core/include/openvino/op/roi_align.hpp
index 2922a7bc001df3..ba1ceefe09d3ed 100644
--- a/src/core/include/openvino/op/roi_align.hpp
+++ b/src/core/include/openvino/op/roi_align.hpp
@@ -14,7 +14,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ROIAlign : public Op {
 public:
-    OPENVINO_OP("ROIAlign", "opset3", op::Op, 3);
+    OPENVINO_OP("ROIAlign", "opset3", op::Op);
     enum class PoolingMode { AVG, MAX };
 
     ROIAlign() = default;
diff --git a/src/core/include/openvino/op/roll.hpp b/src/core/include/openvino/op/roll.hpp
index 35344dffbad668..844a39b19cfd68 100644
--- a/src/core/include/openvino/op/roll.hpp
+++ b/src/core/include/openvino/op/roll.hpp
@@ -13,7 +13,7 @@ namespace v7 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Roll : public Op {
 public:
-    OPENVINO_OP("Roll", "opset7", op::Op, 7);
+    OPENVINO_OP("Roll", "opset7", op::Op);
 
     Roll() = default;
 
diff --git a/src/core/include/openvino/op/round.hpp b/src/core/include/openvino/op/round.hpp
index 994b5507ca944c..c63cee3738464a 100644
--- a/src/core/include/openvino/op/round.hpp
+++ b/src/core/include/openvino/op/round.hpp
@@ -18,7 +18,7 @@ namespace v5 {
 class OPENVINO_API Round : public util::UnaryElementwiseArithmetic {
 public:
     enum class RoundMode { HALF_TO_EVEN, HALF_AWAY_FROM_ZERO };
-    OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic, 5);
+    OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic);
 
     /// \brief Constructs a round operation.
     Round() = default;
diff --git a/src/core/include/openvino/op/scatter_elements_update.hpp b/src/core/include/openvino/op/scatter_elements_update.hpp
index 903b1fb9bab0cc..4172e99afc50df 100644
--- a/src/core/include/openvino/op/scatter_elements_update.hpp
+++ b/src/core/include/openvino/op/scatter_elements_update.hpp
@@ -14,7 +14,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ScatterElementsUpdate : public Op {
 public:
-    OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op, 3);
+    OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op);
 
     ScatterElementsUpdate() = default;
     /// \brief Constructs a ScatterElementsUpdate node
diff --git a/src/core/include/openvino/op/scatter_nd_update.hpp b/src/core/include/openvino/op/scatter_nd_update.hpp
index 1c7ac4355e1009..ab28cd374dfc6b 100644
--- a/src/core/include/openvino/op/scatter_nd_update.hpp
+++ b/src/core/include/openvino/op/scatter_nd_update.hpp
@@ -13,7 +13,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ScatterNDUpdate : public util::ScatterNDBase {
 public:
-    OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase, 3);
+    OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase);
     ScatterNDUpdate() = default;
     /// \param inputs Tensor
     /// \param indices Index tensor: Data type must be `element::i32` or `element::i64`
diff --git a/src/core/include/openvino/op/scatter_update.hpp b/src/core/include/openvino/op/scatter_update.hpp
index 66936cf2f73654..4e29bf9ab2ac2a 100644
--- a/src/core/include/openvino/op/scatter_update.hpp
+++ b/src/core/include/openvino/op/scatter_update.hpp
@@ -15,7 +15,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ScatterUpdate : public util::ScatterBase {
 public:
-    OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase, 3);
+    OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase);
     ScatterUpdate() = default;
     ///
     /// \brief      Constructs ScatterUpdate operator object.
diff --git a/src/core/include/openvino/op/select.hpp b/src/core/include/openvino/op/select.hpp
index 5d2bec70d6779f..78e8e802ab94d3 100644
--- a/src/core/include/openvino/op/select.hpp
+++ b/src/core/include/openvino/op/select.hpp
@@ -30,7 +30,7 @@ namespace v1 {
 // clang-format on
 class OPENVINO_API Select : public Op {
 public:
-    OPENVINO_OP("Select", "opset1", op::Op, 1);
+    OPENVINO_OP("Select", "opset1", op::Op);
     /// \brief Constructs a selection operation.
     Select() : m_auto_broadcast(AutoBroadcastSpec(AutoBroadcastType::NUMPY)) {}
 
diff --git a/src/core/include/openvino/op/shape_of.hpp b/src/core/include/openvino/op/shape_of.hpp
index 5e2cace77f9d51..e7ec34c1c87d86 100644
--- a/src/core/include/openvino/op/shape_of.hpp
+++ b/src/core/include/openvino/op/shape_of.hpp
@@ -13,7 +13,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API ShapeOf : public util::ShapeOfBase {
 public:
-    OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase, 3);
+    OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase);
     ShapeOf() = default;
     /// \brief Constructs a shape-of operation.
     ShapeOf(const Output<Node>& arg, const element::Type output_type = element::i64);
diff --git a/src/core/include/openvino/op/softmax.hpp b/src/core/include/openvino/op/softmax.hpp
index fc9c414df5af02..8a43c6dae7bdef 100644
--- a/src/core/include/openvino/op/softmax.hpp
+++ b/src/core/include/openvino/op/softmax.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Softmax : public Op {
 public:
-    OPENVINO_OP("Softmax", "opset1", op::Op, 1);
+    OPENVINO_OP("Softmax", "opset1", op::Op);
 
     Softmax() = default;
     /// \brief Constructs a softmax operation.
diff --git a/src/core/include/openvino/op/softplus.hpp b/src/core/include/openvino/op/softplus.hpp
index d3358268ac326c..aaff04caa53471 100644
--- a/src/core/include/openvino/op/softplus.hpp
+++ b/src/core/include/openvino/op/softplus.hpp
@@ -15,7 +15,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API SoftPlus : public util::UnaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic, 4);
+    OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic);
 
     SoftPlus() = default;
     /// \brief Constructs an SoftPlus operation.
diff --git a/src/core/include/openvino/op/space_to_batch.hpp b/src/core/include/openvino/op/space_to_batch.hpp
index ceaac33345b2c3..83d47b96ba4c45 100644
--- a/src/core/include/openvino/op/space_to_batch.hpp
+++ b/src/core/include/openvino/op/space_to_batch.hpp
@@ -23,7 +23,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API SpaceToBatch : public Op {
 public:
-    OPENVINO_OP("SpaceToBatch", "opset2", op::Op, 1);
+    OPENVINO_OP("SpaceToBatch", "opset2", op::Op);
 
     SpaceToBatch() = default;
 
diff --git a/src/core/include/openvino/op/split.hpp b/src/core/include/openvino/op/split.hpp
index 6a2b70434af549..918457c0d84a05 100644
--- a/src/core/include/openvino/op/split.hpp
+++ b/src/core/include/openvino/op/split.hpp
@@ -17,7 +17,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Split : public Op {
 public:
-    OPENVINO_OP("Split", "opset1", op::Op, 1);
+    OPENVINO_OP("Split", "opset1", op::Op);
 
     /// \brief Constructs a split operation.
     Split() = default;
diff --git a/src/core/include/openvino/op/strided_slice.hpp b/src/core/include/openvino/op/strided_slice.hpp
index f52b095424c196..5535e2925800c9 100644
--- a/src/core/include/openvino/op/strided_slice.hpp
+++ b/src/core/include/openvino/op/strided_slice.hpp
@@ -18,7 +18,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API StridedSlice : public Op {
 public:
-    OPENVINO_OP("StridedSlice", "opset1", op::Op, 1);
+    OPENVINO_OP("StridedSlice", "opset1", op::Op);
 
     StridedSlice() = default;
 
diff --git a/src/core/include/openvino/op/subtract.hpp b/src/core/include/openvino/op/subtract.hpp
index 3c129cb1f9b233..5fd58da3bd6ff5 100644
--- a/src/core/include/openvino/op/subtract.hpp
+++ b/src/core/include/openvino/op/subtract.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Subtract : public util::BinaryElementwiseArithmetic {
 public:
-    OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic, 1);
+    OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic);
 
     Subtract() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {}
 
diff --git a/src/core/include/openvino/op/swish.hpp b/src/core/include/openvino/op/swish.hpp
index 1395e62e409522..bc9935d7f3e88f 100644
--- a/src/core/include/openvino/op/swish.hpp
+++ b/src/core/include/openvino/op/swish.hpp
@@ -16,7 +16,7 @@ namespace v4 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Swish : public Op {
 public:
-    OPENVINO_OP("Swish", "opset4", op::Op, 4);
+    OPENVINO_OP("Swish", "opset4", op::Op);
     Swish() = default;
 
     /// \brief Constructs an Swish operation.
diff --git a/src/core/include/openvino/op/topk.hpp b/src/core/include/openvino/op/topk.hpp
index 2af15c7baefb92..9c2ec7a9ce1492 100644
--- a/src/core/include/openvino/op/topk.hpp
+++ b/src/core/include/openvino/op/topk.hpp
@@ -18,7 +18,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API TopK : public util::TopKBase {
 public:
-    OPENVINO_OP("TopK", "opset1", op::util::TopKBase, 1);
+    OPENVINO_OP("TopK", "opset1", op::util::TopKBase);
 
     using SortType = TopKSortType;
     using Mode = TopKMode;
@@ -69,7 +69,7 @@ namespace v3 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API TopK : public util::TopKBase {
 public:
-    OPENVINO_OP("TopK", "opset3", op::util::TopKBase, 3);
+    OPENVINO_OP("TopK", "opset3", op::util::TopKBase);
     /// \brief Constructs a TopK operation
     TopK() = default;
     /// \brief Constructs a TopK operation with two outputs: values and indices.
@@ -111,7 +111,7 @@ namespace v11 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API TopK : public util::TopKBase {
 public:
-    OPENVINO_OP("TopK", "opset11", op::util::TopKBase, 11);
+    OPENVINO_OP("TopK", "opset11", op::util::TopKBase);
     /// \brief Constructs a TopK operation
     TopK() = default;
     /// \brief Constructs a TopK operation with two outputs: values and indices.
diff --git a/src/core/include/openvino/op/transpose.hpp b/src/core/include/openvino/op/transpose.hpp
index 341906128d3273..2b4af853893270 100644
--- a/src/core/include/openvino/op/transpose.hpp
+++ b/src/core/include/openvino/op/transpose.hpp
@@ -13,7 +13,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API Transpose : public Op {
 public:
-    OPENVINO_OP("Transpose", "opset1", op::Op, 1);
+    OPENVINO_OP("Transpose", "opset1", op::Op);
 
     Transpose() = default;
     ///
diff --git a/src/core/include/openvino/op/variadic_split.hpp b/src/core/include/openvino/op/variadic_split.hpp
index 8c5034cf031cc9..2d6f751d48d3ba 100644
--- a/src/core/include/openvino/op/variadic_split.hpp
+++ b/src/core/include/openvino/op/variadic_split.hpp
@@ -14,7 +14,7 @@ namespace v1 {
 /// \ingroup ov_ops_cpp_api
 class OPENVINO_API VariadicSplit : public Op {
 public:
-    OPENVINO_OP("VariadicSplit", "opset1", op::Op, 1);
+    OPENVINO_OP("VariadicSplit", "opset1", op::Op);
 
     /// \brief Constructs a variadic split operation.
     VariadicSplit() = default;
diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp
index 70b18e710f46e3..2a9baaeb90d144 100644
--- a/src/core/src/node.cpp
+++ b/src/core/src/node.cpp
@@ -385,9 +385,13 @@ std::ostream& ov::Node::write_description(std::ostream& out, uint32_t depth) con
     if (depth == 0) {
         out << get_friendly_name();
     } else {
-        OPENVINO_SUPPRESS_DEPRECATED_START
-        out << "v" << get_type_info().version << "::" << get_type_info().name << " " << get_friendly_name() << " (";
-        OPENVINO_SUPPRESS_DEPRECATED_END
+        auto version = get_type_info().version_id;
+        if (version)
+            out << "v" << version << "::" << get_type_info().name << " " << get_friendly_name() << " (";
+        else
+            out << "v"
+                << " "
+                << "::" << get_type_info().name << " " << get_friendly_name() << " (";
         string sep = "";
         for (const auto& arg : input_values()) {
             out << sep << arg;
diff --git a/src/core/src/pass/low_latency.cpp b/src/core/src/pass/low_latency.cpp
index 455adc024cd382..5364b08fd8a88e 100644
--- a/src/core/src/pass/low_latency.cpp
+++ b/src/core/src/pass/low_latency.cpp
@@ -18,7 +18,7 @@
 #include <openvino/util/log.hpp>
 
 NGRAPH_SUPPRESS_DEPRECATED_START
-NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
+NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency");
 
 using namespace std;
 
diff --git a/src/core/src/pass/pass.cpp b/src/core/src/pass/pass.cpp
index 01f24f05fba1d3..6b246959ac3f13 100644
--- a/src/core/src/pass/pass.cpp
+++ b/src/core/src/pass/pass.cpp
@@ -57,6 +57,6 @@ ov::pass::ModelPass::~ModelPass() = default;
 
 OPENVINO_SUPPRESS_DEPRECATED_START
 
-NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass", 0);
+NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass");
 
 ngraph::pass::NodePass::~NodePass() = default;
diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp
index 81d2618e122084..dc7f634488bf5d 100644
--- a/src/core/src/pass/serialize.cpp
+++ b/src/core/src/pass/serialize.cpp
@@ -1002,11 +1002,9 @@ void ngfunction_2_ir(pugi::xml_node& netXml,
         // WA for LSTMCellv0, peephole input shall not be serialized
         if (e.to_port == 6) {
             const auto& type_info = ordered_ops[e.to_layer]->get_type_info();
-            OPENVINO_SUPPRESS_DEPRECATED_START
-            if (!strcmp(type_info.name, "LSTMCell") && type_info.version == 0) {
+            if (!strcmp(type_info.name, "LSTMCell")) {
                 continue;
             }
-            OPENVINO_SUPPRESS_DEPRECATED_END
         }
         pugi::xml_node edge = edges.append_child("edge");
         edge.append_attribute("from-layer").set_value(e.from_layer);
diff --git a/src/core/src/type.cpp b/src/core/src/type.cpp
index 3def4c0adde5f2..be48b68f1feb40 100644
--- a/src/core/src/type.cpp
+++ b/src/core/src/type.cpp
@@ -18,12 +18,9 @@ size_t DiscreteTypeInfo::hash() const {
     if (hash_value != 0)
         return hash_value;
     size_t name_hash = name ? std::hash<std::string>()(std::string(name)) : 0;
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    size_t version_hash = std::hash<decltype(version)>()(version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
     size_t version_id_hash = version_id ? std::hash<std::string>()(std::string(version_id)) : 0;
 
-    return ov::util::hash_combine(std::vector<size_t>{name_hash, version_hash, version_id_hash});
+    return ov::util::hash_combine(std::vector<size_t>{name_hash, version_id_hash});
 }
 
 size_t DiscreteTypeInfo::hash() {
@@ -40,9 +37,7 @@ std::string DiscreteTypeInfo::get_version() const {
     if (version_id) {
         return std::string(version_id);
     }
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    return std::to_string(version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    return nullptr;
 }
 
 DiscreteTypeInfo::operator std::string() const {
@@ -51,10 +46,7 @@ DiscreteTypeInfo::operator std::string() const {
 
 std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) {
     std::string version_id = info.version_id ? info.version_id : "(empty)";
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", old_version: " << info.version
-      << ", parent: ";
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", parent: ";
     if (!info.parent)
         s << info.parent;
     else
@@ -66,10 +58,7 @@ std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) {
 
 // parent is commented to fix type relaxed operations
 bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const {
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    if (version < b.version)
-        return true;
-    if (version == b.version && name != nullptr && b.name != nullptr) {
+    if (name != nullptr && b.name != nullptr) {
         int cmp_status = strcmp(name, b.name);
         if (cmp_status < 0)
             return true;
@@ -81,15 +70,20 @@ bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const {
         }
     }
 
-    OPENVINO_SUPPRESS_DEPRECATED_END
     return false;
 }
 bool DiscreteTypeInfo::operator==(const DiscreteTypeInfo& b) const {
     if (hash_value != 0 && b.hash_value != 0)
         return hash() == b.hash();
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    return version == b.version && strcmp(name, b.name) == 0;
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    if (name != nullptr && b.name != nullptr) {
+        if (strcmp(name, b.name) == 0) {
+            std::string v_id(version_id == nullptr ? "" : version_id);
+            std::string bv_id(b.version_id == nullptr ? "" : b.version_id);
+            if (v_id == bv_id)
+                return true;
+        }
+    }
+    return false;
 }
 bool DiscreteTypeInfo::operator<=(const DiscreteTypeInfo& b) const {
     return *this == b || *this < b;
diff --git a/src/core/tests/graph_rewrite.cpp b/src/core/tests/graph_rewrite.cpp
index c7fb67d243c4c3..d85146b1ffd2d0 100644
--- a/src/core/tests/graph_rewrite.cpp
+++ b/src/core/tests/graph_rewrite.cpp
@@ -55,9 +55,9 @@ class Anchor : public ngraph::pass::GraphRewrite {
     Anchor() : GraphRewrite() {}
 };
 
-NGRAPH_RTTI_DEFINITION(TestPass, "TestPass", 0);
-NGRAPH_RTTI_DEFINITION(Anchor, "Anchor", 0);
-NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass", 0);
+NGRAPH_RTTI_DEFINITION(TestPass, "TestPass");
+NGRAPH_RTTI_DEFINITION(Anchor, "Anchor");
+NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass");
 
 std::shared_ptr<Function> get_function() {
     auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
@@ -165,7 +165,7 @@ class PrivateDivide : public ngraph::opset3::Divide {
     using ngraph::opset3::Divide::Divide;
 };
 
-NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", 0, ngraph::opset3::Divide);
+NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", ngraph::opset3::Divide);
 
 std::shared_ptr<Function> get_derived_function() {
     auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
@@ -434,7 +434,7 @@ class CheckConsumers : public ngraph::pass::MatcherPass {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers", 0);
+NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers");
 
 TEST(GraphRewriteTest, nodes_use_count) {
     auto f = get_function();
diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp
index cfc5cb7250b41a..289ae1188cea53 100644
--- a/src/core/tests/opset.cpp
+++ b/src/core/tests/opset.cpp
@@ -72,7 +72,7 @@ INSTANTIATE_TEST_SUITE_P(opset,
 
 class MyOpOld : public ov::op::Op {
 public:
-    static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld", static_cast<uint64_t>(0)};
+    static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld"};
     const ov::DiscreteTypeInfo& get_type_info() const override {
         return type_info;
     }
@@ -121,7 +121,7 @@ TEST(opset, custom_opset) {
     opset.insert<MyOpNewFromOld>();
     opset.insert<MyOpNew>();
     EXPECT_EQ(opset.get_types_info().size(), 3);
-    EXPECT_TRUE(opset.contains_type("MyOpNewFromOld"));
-    EXPECT_TRUE(opset.contains_type("MyOpNew"));
-    EXPECT_TRUE(opset.contains_type("MyOpIncorrect"));
+    EXPECT_TRUE(opset.contains_type(std::string("MyOpNewFromOld")));
+    EXPECT_TRUE(opset.contains_type(std::string("MyOpNew")));
+    EXPECT_TRUE(opset.contains_type(std::string("MyOpIncorrect")));
 }
diff --git a/src/core/tests/pass_config.cpp b/src/core/tests/pass_config.cpp
index 3c9395aedc2bd2..e2c0d15e943eae 100644
--- a/src/core/tests/pass_config.cpp
+++ b/src/core/tests/pass_config.cpp
@@ -32,7 +32,7 @@ class RenameReLU : public ngraph::pass::MatcherPass {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU", 0);
+NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU");
 
 class RenameSigmoid : public ngraph::pass::MatcherPass {
 public:
@@ -50,7 +50,7 @@ class RenameSigmoid : public ngraph::pass::MatcherPass {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid", 0);
+NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid");
 
 class TestFunctionPass : public ngraph::pass::FunctionPass {
 public:
@@ -67,7 +67,7 @@ class TestFunctionPass : public ngraph::pass::FunctionPass {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass", 0);
+NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass");
 
 class TestGraphRewritePass : public ngraph::pass::GraphRewrite {
 public:
@@ -78,7 +78,7 @@ class TestGraphRewritePass : public ngraph::pass::GraphRewrite {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass", 0);
+NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass");
 
 std::tuple<std::shared_ptr<Function>, std::shared_ptr<Node>, std::shared_ptr<Node>> get_test_function() {
     auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
@@ -289,7 +289,7 @@ class TestNestedMatcher : public ngraph::pass::MatcherPass {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher", 0);
+NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher");
 
 class TestNestedGraphRewrite : public pass::GraphRewrite {
 public:
@@ -299,7 +299,7 @@ class TestNestedGraphRewrite : public pass::GraphRewrite {
     }
 };
 
-NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite", 0);
+NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite");
 
 TEST(PassConfig, EnableDisablePasses10) {
     std::shared_ptr<Function> f;
diff --git a/src/core/tests/rtti.cpp b/src/core/tests/rtti.cpp
index bb0362f52a366f..e5a1619e75d3ce 100644
--- a/src/core/tests/rtti.cpp
+++ b/src/core/tests/rtti.cpp
@@ -42,7 +42,7 @@ class OpTypeVersionParent : public OpType {
 
 class OpTypeVersionParentOld : public OpType {
 public:
-    OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType, 1);
+    OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType);
     OpTypeVersionParentOld() = default;
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
@@ -56,7 +56,6 @@ TEST(rtti, op_with_type) {
     auto type_info = op.get_type_info();
     ASSERT_EQ(type_info, OpType::get_type_info_static());
     ASSERT_EQ(strcmp(type_info.name, "OpType"), 0);
-    ASSERT_EQ(type_info.version, 0);
     ASSERT_EQ(strcmp(type_info.version_id, "extension"), 0);
     ASSERT_NE(type_info.parent, nullptr);
     ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static());
@@ -67,7 +66,6 @@ TEST(rtti, op_with_type_version) {
     auto type_info = op.get_type_info();
     ASSERT_EQ(type_info, OpTypeVersion::get_type_info_static());
     ASSERT_EQ(strcmp(type_info.name, "OpTypeVersion"), 0);
-    ASSERT_EQ(type_info.version, 0);
     ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0);
     ASSERT_NE(type_info.parent, nullptr);
     ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static());
@@ -78,7 +76,6 @@ TEST(rtti, op_with_type_version_parent) {
     auto type_info = op.get_type_info();
     ASSERT_EQ(type_info, OpTypeVersionParent::get_type_info_static());
     ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParent"), 0);
-    ASSERT_EQ(type_info.version, 0);
     ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0);
     ASSERT_NE(type_info.parent, nullptr);
     ASSERT_EQ(*type_info.parent, OpType::get_type_info_static());
@@ -90,7 +87,6 @@ TEST(rtti, op_with_type_version_parent_old) {
     ASSERT_EQ(type_info, OpTypeVersionParentOld::get_type_info_static());
     ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParentOld"), 0);
     ASSERT_EQ(strcmp(type_info.version_id, "my_version1"), 0);
-    ASSERT_EQ(type_info.version, 1);
     ASSERT_NE(type_info.parent, nullptr);
     ASSERT_EQ(*type_info.parent, OpType::get_type_info_static());
 }
diff --git a/src/core/tests/type_info.cpp b/src/core/tests/type_info.cpp
index 1beac34b3055c2..98c1f9c8e146ed 100644
--- a/src/core/tests/type_info.cpp
+++ b/src/core/tests/type_info.cpp
@@ -10,11 +10,11 @@
 
 OPENVINO_SUPPRESS_DEPRECATED_START
 TEST(type_info, compare_old_type) {
-    ov::DiscreteTypeInfo type1("type1", static_cast<uint64_t>(0));
-    ov::DiscreteTypeInfo type2("type2", static_cast<uint64_t>(0));
-    ov::DiscreteTypeInfo type3("type1", 1ul);
-    ov::DiscreteTypeInfo type4("type3", static_cast<uint64_t>(0), &type1);
-    ov::DiscreteTypeInfo type5("type3", static_cast<uint64_t>(0), &type2);
+    ov::DiscreteTypeInfo type1("type1");
+    ov::DiscreteTypeInfo type2("type2");
+    ov::DiscreteTypeInfo type3("type1");
+    ov::DiscreteTypeInfo type4("type3", &type1);
+    ov::DiscreteTypeInfo type5("type3", &type2);
     ASSERT_TRUE(type1 != type2);
     ASSERT_TRUE(type1 == type1);
     ASSERT_TRUE(type1 < type2);
@@ -46,40 +46,37 @@ TEST(type_info, compare_new_type) {
 }
 
 TEST(type_info, compare_new_with_old_type) {
-    ov::DiscreteTypeInfo type1("type1", static_cast<uint64_t>(0), "version1");
-    ov::DiscreteTypeInfo type1_o("type1", static_cast<uint64_t>(0));
+    ov::DiscreteTypeInfo type1("type1", "version1");
+    ov::DiscreteTypeInfo type1_o("type1", "version1");
     ASSERT_TRUE(type1 == type1_o);
 }
 
 TEST(type_info, check_hash_value) {
-    const auto& hash_val = [](const char* name, const char* version_id, uint64_t version) -> size_t {
+    const auto& hash_val = [](const char* name, const char* version_id) -> size_t {
         size_t name_hash = name ? std::hash<std::string>()(std::string(name)) : 0;
-        size_t version_hash = std::hash<decltype(version)>()(version);
         size_t version_id_hash = version_id ? std::hash<std::string>()(std::string(version_id)) : 0;
         // don't use parent for hash calculation, it is not a part of type (yet)
-        return ov::util::hash_combine(std::vector<size_t>{name_hash, version_hash, version_id_hash});
+        return ov::util::hash_combine(std::vector<size_t>{name_hash, version_id_hash});
     };
-    ov::DiscreteTypeInfo type("type1", 0, "version1");
-    ov::DiscreteTypeInfo type_old("type1", 1);
-    ov::DiscreteTypeInfo type_with_version("type1", 1, "version1");
-    ov::DiscreteTypeInfo type_empty_name("", static_cast<uint64_t>(0));
-    ov::DiscreteTypeInfo type_empty_ver("type", static_cast<uint64_t>(0), "");
-    EXPECT_EQ(hash_val(type.name, type.version_id, type.version), type.hash());
-    EXPECT_EQ(hash_val(type_old.name, type_old.version_id, type_old.version), type_old.hash());
-    EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id, type_with_version.version),
-              type_with_version.hash());
-    EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id, type_empty_name.version),
-              type_empty_name.hash());
-    EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id, type_empty_ver.version), type_empty_ver.hash());
+    ov::DiscreteTypeInfo type("type1", "version1");
+    ov::DiscreteTypeInfo type_old("type1");
+    ov::DiscreteTypeInfo type_with_version("type1", "version1");
+    ov::DiscreteTypeInfo type_empty_name("");
+    ov::DiscreteTypeInfo type_empty_ver("type", "");
+    EXPECT_EQ(hash_val(type.name, type.version_id), type.hash());
+    EXPECT_EQ(hash_val(type_old.name, type_old.version_id), type_old.hash());
+    EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id), type_with_version.hash());
+    EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id), type_empty_name.hash());
+    EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id), type_empty_ver.hash());
 }
 
 TEST(type_info, find_in_map) {
     std::vector<std::string> vector_names;
-    ov::DiscreteTypeInfo a("Mod", 1ul, "opset1");
-    ov::DiscreteTypeInfo b("Prelu", static_cast<uint64_t>(0), "opset1");
-    ov::DiscreteTypeInfo c("Vector", static_cast<uint64_t>(0));
-    ov::DiscreteTypeInfo d("Mod", 1ul, "opset3");
-    ov::DiscreteTypeInfo f("Mod", 2ul);
+    ov::DiscreteTypeInfo a("Mod", "opset1");
+    ov::DiscreteTypeInfo b("Prelu", "opset1");
+    ov::DiscreteTypeInfo c("Vector");
+    ov::DiscreteTypeInfo d("Mod", "opset3");
+    ov::DiscreteTypeInfo f("Mod", "opset2");
 
     std::map<ov::DiscreteTypeInfo, int> test_map;
     test_map[a] = 1;
@@ -94,20 +91,20 @@ TEST(type_info, find_in_map) {
         test_map[type] = 2;
         std::string name = type.name;
         vector_names.emplace_back(name);
-        ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str(), 1000);
-        ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str(), static_cast<uint64_t>(0));
+        ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str());
+        ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str());
         test_map[t] = 3;
         test_map[t2] = 4;
         std::string name1 = "a" + name;
         vector_names.emplace_back(name1);
-        ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str(), 1000);
-        ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str(), static_cast<uint64_t>(0));
+        ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str());
+        ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str());
         test_map[t3] = 5;
         test_map[t4] = 6;
         std::string name2 = name + "z";
         vector_names.emplace_back(name2);
-        ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str(), 1000);
-        ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str(), static_cast<uint64_t>(0));
+        ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str());
+        ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str());
         test_map[t5] = 7;
         test_map[t6] = 8;
     }
diff --git a/src/core/tests/type_prop/broadcast.cpp b/src/core/tests/type_prop/broadcast.cpp
index e4a82aefb31285..e587512b4e7a70 100644
--- a/src/core/tests/type_prop/broadcast.cpp
+++ b/src/core/tests/type_prop/broadcast.cpp
@@ -613,7 +613,6 @@ TEST(type_prop, broadcast_v3_bidirectional_mode_string) {
     const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, "BIDIRECTIONAL");
 
     ASSERT_EQ(broadcast_v3->get_broadcast_spec(), op::BroadcastType::BIDIRECTIONAL);
-    ASSERT_EQ(broadcast_v3->get_version(), 3);
 }
 
 TEST(type_prop, broadcast_v3_shape_unexpected_axes_mapping_input) {
diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp
index 72eaeb9b07acf4..0a4ece0aae5e63 100644
--- a/src/frontends/ir/src/ir_deserializer.cpp
+++ b/src/frontends/ir/src/ir_deserializer.cpp
@@ -771,7 +771,7 @@ std::shared_ptr<ngraph::Node> XmlDeserializer::create_node(
     const std::string& type_name = translate_type_name(params.type);
 
     std::shared_ptr<ngraph::Node> ngraphNode;
-    ov::DiscreteTypeInfo type(type_name.c_str(), 0, params.version.c_str());
+    ov::DiscreteTypeInfo type(type_name.c_str(), params.version.c_str());
     auto extensionIt = m_extensions.find(type);
 
     if (extensionIt != m_extensions.end()) {
@@ -885,7 +885,7 @@ std::shared_ptr<ngraph::Node> XmlDeserializer::create_node(
                 item.print(ss);
                 IE_THROW() << "rt_info attribute: " << attribute_name << " has no \"version\" field: " << ss.str();
             }
-            const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), 0, attribute_version.c_str());
+            const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), attribute_version.c_str());
             auto attr = attrs_factory.create_by_type_info(type_info);
             if (!attr.empty()) {
                 if (attr.is<ov::RuntimeAttribute>()) {
diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp
index 6baf232df80f57..16bfdc7d883f98 100644
--- a/src/inference/src/ie_network_reader.cpp
+++ b/src/inference/src/ie_network_reader.cpp
@@ -47,7 +47,7 @@ class ExtensionWrapper : public ov::LegacyOpExtension {
         : m_ext(ext),
           m_opset_name(opset),
           m_type(name),
-          m_ext_type(m_type.c_str(), 0, m_opset_name.c_str()) {}
+          m_ext_type(m_type.c_str(), m_opset_name.c_str()) {}
 
     const ov::DiscreteTypeInfo& get_type_info() const override {
         return m_ext_type;
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp
index f03806a7adee38..8a836b46f9c186 100755
--- a/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp
@@ -10,7 +10,7 @@
 
 #include "itt.hpp"
 
-NGRAPH_RTTI_DEFINITION(ov::intel_cpu::SwapConvertTranspose, "SwapConvertTranspose", 0);
+NGRAPH_RTTI_DEFINITION(ov::intel_cpu::SwapConvertTranspose, "SwapConvertTranspose");
 
 ov::intel_cpu::SwapConvertTranspose::SwapConvertTranspose() {
     MATCHER_SCOPE(SwapConvertTranspose);
diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp
index b1f93795cf874c..84856a3e6a90ef 100644
--- a/src/plugins/intel_cpu/src/nodes/if.cpp
+++ b/src/plugins/intel_cpu/src/nodes/if.cpp
@@ -48,7 +48,7 @@ void If::PortMapHelper::redefineTo() {
 bool If::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
         if (!one_of(op->get_type_info(), ov::op::v8::If::get_type_info_static())) {
-            errorMessage = "Not supported If operation version " + std::to_string(op->get_type_info().version) +
+            errorMessage = "Not supported If operation version " + std::string(op->get_type_info().version_id) +
                     " with name '" + op->get_friendly_name() + "'. Node If supports only opset8 version.";
             return false;
         }
diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
index 780e6eb4607f36..67400590ce40a4 100644
--- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
+++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
@@ -600,7 +600,7 @@ NonMaxSuppression::NonMaxSuppression(const std::shared_ptr<ngraph::Node>& op, co
             sortResultDescending = nmsIe->m_sort_result_descending;
         } else {
             const auto &typeInfo = op->get_type_info();
-            IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version;
+            IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version_id;
         }
 
         const auto &boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims();
diff --git a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp
index e295e25e06ec6c..e06f8c5a528adc 100644
--- a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp
+++ b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp
@@ -32,12 +32,6 @@ class ngraph::op::OneHotIE : public Op {
                       float off_value,
                       element::Type type);
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    size_t get_version() const override {
-        return 1;
-    }
-    OPENVINO_SUPPRESS_DEPRECATED_END
-
     void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
     bool visit_attributes(AttributeVisitor& visitor) override;
diff --git a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp
index 4f6b98c27bef73..076280819e3ad3 100644
--- a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp
+++ b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp
@@ -29,12 +29,6 @@ class PadIE : public Op {
           Shape output_shape,
           float pad_value);
 
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    size_t get_version() const override {
-        return 1;
-    }
-    OPENVINO_SUPPRESS_DEPRECATED_END
-
     void validate_and_infer_types() override;
     bool visit_attributes(AttributeVisitor& visitor) override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
diff --git a/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp b/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp
index 295f75ccaeb7fc..74d73a777c9859 100644
--- a/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp
+++ b/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp
@@ -234,10 +234,10 @@ TEST(ConvertFunctionToCNNNetworkTests, UnsupportedDynamicOps) {
     } catch (InferenceEngine::Exception& e) {
         EXPECT_THAT(e.what(),
                     testing::HasSubstr(std::string("Unsupported dynamic ops: \n"
-                                                   "v0::Parameter param () -> (f32[...])\n"
-                                                   "v0::Relu relu (param[0]:f32[...]) -> (f32[...])\n"
-                                                   "v3::NonZero non_zero (relu[0]:f32[...]) -> (i64[?,?])\n"
-                                                   "v0::Result result (non_zero[0]:i64[?,?]) -> (i64[?,?])")));
+                                                   "vopset1::Parameter param () -> (f32[...])\n"
+                                                   "vopset1::Relu relu (param[0]:f32[...]) -> (f32[...])\n"
+                                                   "vopset3::NonZero non_zero (relu[0]:f32[...]) -> (i64[?,?])\n"
+                                                   "vopset1::Result result (non_zero[0]:i64[?,?]) -> (i64[?,?])")));
     }
 }
 
diff --git a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp
index 809af60bc6017f..c4c1fd18f74304 100644
--- a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp
+++ b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp
@@ -20,11 +20,11 @@ using namespace ov::intel_gna::pass;
 using namespace ov::intel_gna::ngraph_util;
 using namespace ov::opset9;
 
-NGRAPH_RTTI_DEFINITION(InsertCopyBeforeAssignLayer, "InsertCopyBeforeAssignLayer", 0);
-NGRAPH_RTTI_DEFINITION(InsertCopyBeforeConcatLayer, "InsertCopyBeforeConcatLayer", 0);
-NGRAPH_RTTI_DEFINITION(HandleMultiConnectedLayerToConcatAndMemory, "HandleMultiConnectedLayerToConcatAndMemory", 0);
-NGRAPH_RTTI_DEFINITION(MatchNonComputationalLayers, "MatchNonComputationalLayers", 0);
-NGRAPH_RTTI_DEFINITION(HandleNonFunctionalSubgraphs, "HandleNonFunctionalSubgraphs", 0);
+NGRAPH_RTTI_DEFINITION(InsertCopyBeforeAssignLayer, "InsertCopyBeforeAssignLayer");
+NGRAPH_RTTI_DEFINITION(InsertCopyBeforeConcatLayer, "InsertCopyBeforeConcatLayer");
+NGRAPH_RTTI_DEFINITION(HandleMultiConnectedLayerToConcatAndMemory, "HandleMultiConnectedLayerToConcatAndMemory");
+NGRAPH_RTTI_DEFINITION(MatchNonComputationalLayers, "MatchNonComputationalLayers");
+NGRAPH_RTTI_DEFINITION(HandleNonFunctionalSubgraphs, "HandleNonFunctionalSubgraphs");
 
 namespace {
 void insert_copy_layer_between(std::shared_ptr<ngraph::Node> input_op,
diff --git a/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp b/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp
index 100e0c00ba5aed..d2bcfa13c7202f 100644
--- a/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp
+++ b/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp
@@ -25,8 +25,8 @@ using namespace ov::intel_gna;
 using namespace ov::intel_gna::pass;
 using namespace ov::intel_gna::common;
 
-NGRAPH_RTTI_DEFINITION(PWLApproximation, "PWLApproximation", 0);
-NGRAPH_RTTI_DEFINITION(PWLApproximationWithFq, "PWLApproximationWithFq", 0);
+NGRAPH_RTTI_DEFINITION(PWLApproximation, "PWLApproximation");
+NGRAPH_RTTI_DEFINITION(PWLApproximationWithFq, "PWLApproximationWithFq");
 
 template <typename T>
 double get_break_bound() {
diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp
index cffe8a8c7b1622..6e75d68d012b7f 100644
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@@ -418,7 +418,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
 
 void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive");
-    GPU_DEBUG_LOG << "Process " << "op::v" << op->get_type_info().version << "::" << op->get_type_name() << " operation "
+    GPU_DEBUG_LOG << "Process " << "op::v" << op->get_type_info().version_id << "::" << op->get_type_name() << " operation "
                   << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl;
 
     bool is_created = false;
@@ -442,7 +442,7 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::s
     if (!is_created) {
         IE_THROW() << "Operation: " << op->get_friendly_name()
                    << " of type " << op->get_type_name()
-                   << "(op::v" << op->get_type_info().version << ") is not supported";
+                   << "(op::v" << op->get_type_info().version_id << ") is not supported";
     }
 }
 
@@ -560,7 +560,7 @@ void validate_inputs_count(const std::shared_ptr<ngraph::Node>& op, std::vector<
 
     IE_THROW() << "Invalid inputs count (" << op->get_input_size() << ") in "
                << op->get_friendly_name() << " (" << op->get_type_name()
-               << " op::v" << op->get_type_info().version << ")";
+               << " op::v" << op->get_type_info().version_id << ")";
 }
 
 }  // namespace intel_gpu
diff --git a/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp b/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp
index 54b2c4abdc0dd5..00636bca0d8182 100644
--- a/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp
+++ b/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp
@@ -34,11 +34,9 @@ bool is_type_relaxed(const std::string& type) {
 }
 
 bool compare_type_info(const ngraph::DiscreteTypeInfo& info1, const ngraph::DiscreteTypeInfo& info2) {
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    if (!is_type_relaxed(info1.name) && !is_type_relaxed(info2.name) && (info1.version != info2.version)) {
+    if (!is_type_relaxed(info1.name) && !is_type_relaxed(info2.name) && (std::strcmp(info1.version_id, info2.version_id) != 0)) {
         return false;
     }
-            OPENVINO_SUPPRESS_DEPRECATED_END
 
     const std::string info1Name =
             is_type_relaxed(info1.name) && (info1.parent != nullptr) ? info1.parent->name : info1.name;
@@ -89,9 +87,7 @@ bool less_by_parent_name(const std::shared_ptr<ngraph::op::v0::Result>& l,
 }
 
 std::string typeInfoToStr(const ngraph::Node::type_info_t& typeInfo) {
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    return std::string(typeInfo.name) + "/" + to_str(typeInfo.version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    return std::string(typeInfo.name) + "/" + std::string(typeInfo.version_id);
 }
 
 std::string tensor_names(const ngraph::descriptor::Tensor& t) {
diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
index c6a7caefeb5934..b6cd1dc3c15c47 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -274,9 +274,7 @@ std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> getCons
 namespace {
 
 std::string toString(const NodeTypeInfo& typeInfo) {
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    return std::string(typeInfo.name) + " ver. " + std::to_string(typeInfo.version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    return std::string(typeInfo.name) + " ver. " + std::string(typeInfo.version_id);
 }
 
 void CompareShapes(const PartialShape& actual, const PartialShape& expected) {
@@ -337,9 +335,7 @@ std::shared_ptr<ngraph::Node> getNodeSharedPtr(const ngraph::NodeTypeInfo &type_
             ngraphNode->validate_and_infer_types();
             return ngraphNode;
         }
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    NGRAPH_UNREACHABLE("supported opsets does not contain op with name: ", type_info.name, " version: ", type_info.version);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    NGRAPH_UNREACHABLE("supported opsets does not contain op with name: ", type_info.name, " version: ", type_info.version_id);
 }
 
 bool is_tensor_iterator_exist(const std::shared_ptr<ngraph::Function> & func) {

From 4561aa7109b12a03664da9f580a2d5f9daf8c8af Mon Sep 17 00:00:00 2001
From: Jan Iwaszkiewicz <jan.iwaszkiewicz@intel.com>
Date: Wed, 22 Mar 2023 16:12:07 +0100
Subject: [PATCH 039/296] [PyOV] OVDict class - new return value from inference
 (#16370)

---
 src/bindings/python/requirements.txt          |   1 +
 src/bindings/python/requirements_test.txt     |   1 +
 .../python/src/openvino/runtime/ie_api.py     |  33 ++-
 .../runtime/utils/data_helpers/__init__.py    |   1 +
 .../runtime/utils/data_helpers/wrappers.py    | 118 ++++++++-
 .../python/src/pyopenvino/core/common.cpp     | 177 ++++++-------
 .../python/src/pyopenvino/core/common.hpp     |  16 +-
 .../src/pyopenvino/core/compiled_model.cpp    |   4 -
 .../python/src/pyopenvino/core/containers.cpp |  23 --
 .../python/src/pyopenvino/core/containers.hpp |  23 --
 .../src/pyopenvino/core/infer_request.cpp     |  12 +-
 .../python/src/pyopenvino/pyopenvino.cpp      |   4 -
 .../python/tests/test_runtime/test_ovdict.py  | 249 ++++++++++++++++++
 13 files changed, 489 insertions(+), 173 deletions(-)
 delete mode 100644 src/bindings/python/src/pyopenvino/core/containers.cpp
 delete mode 100644 src/bindings/python/src/pyopenvino/core/containers.hpp
 create mode 100644 src/bindings/python/tests/test_runtime/test_ovdict.py

diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt
index e83f59eb8b3ae7..968d95b8760bed 100644
--- a/src/bindings/python/requirements.txt
+++ b/src/bindings/python/requirements.txt
@@ -1 +1,2 @@
 numpy>=1.16.6
+singledispatchmethod; python_version<'3.8'
diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt
index 530a28b3bf1e16..2bd82fb628bc26 100644
--- a/src/bindings/python/requirements_test.txt
+++ b/src/bindings/python/requirements_test.txt
@@ -40,3 +40,4 @@ types-pkg_resources
 wheel>=0.38.1
 protobuf~=3.18.1
 numpy>=1.16.6,<=1.23.4
+singledispatchmethod; python_version<'3.8'
diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py
index 7bab65a0382113..90099609a1a313 100644
--- a/src/bindings/python/src/openvino/runtime/ie_api.py
+++ b/src/bindings/python/src/openvino/runtime/ie_api.py
@@ -2,7 +2,6 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from functools import singledispatch
 from typing import Any, Iterable, Union, Dict, Optional
 from pathlib import Path
 
@@ -16,6 +15,7 @@
 from openvino._pyopenvino import Tensor
 
 from openvino.runtime.utils.data_helpers import (
+    OVDict,
     _InferRequestWrapper,
     _data_dispatch,
     tensor_from_file,
@@ -25,7 +25,7 @@
 class InferRequest(_InferRequestWrapper):
     """InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
 
-    def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict:
+    def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict:
         """Infers specified input(s) in synchronous mode.
 
         Blocks all methods of InferRequest while request is running.
@@ -68,14 +68,14 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict:
 
                               Default value: False
         :type shared_memory: bool, optional
-        :return: Dictionary of results from output tensors with ports as keys.
-        :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
+        :return: Dictionary of results from output tensors with port/int/str keys.
+        :rtype: OVDict
         """
-        return super().infer(_data_dispatch(
+        return OVDict(super().infer(_data_dispatch(
             self,
             inputs,
             is_shared=shared_memory,
-        ))
+        )))
 
     def start_async(
         self,
@@ -138,6 +138,15 @@ def start_async(
             userdata,
         )
 
+    @property
+    def results(self) -> OVDict:
+        """Gets all outputs tensors of this InferRequest.
+
+        :return: Dictionary of results from output tensors with ports as keys.
+        :rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
+        """
+        return OVDict(super().results)
+
 
 class CompiledModel(CompiledModelBase):
     """CompiledModel class.
@@ -161,7 +170,7 @@ def create_infer_request(self) -> InferRequest:
         """
         return InferRequest(super().create_infer_request())
 
-    def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> dict:
+    def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> OVDict:
         """Infers specified input(s) in synchronous mode.
 
         Blocks all methods of CompiledModel while request is running.
@@ -187,8 +196,8 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
 
         :param inputs: Data to be set on input tensors.
         :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
-        :return: Dictionary of results from output tensors with ports as keys.
-        :rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
+        :return: Dictionary of results from output tensors with port/int/str keys.
+        :rtype: OVDict
         """
         # It returns wrapped python InferReqeust and then call upon
         # overloaded functions of InferRequest class
@@ -196,7 +205,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray]
 
     def __call__(self,
                  inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
-                 shared_memory: bool = True) -> dict:
+                 shared_memory: bool = True) -> OVDict:
         """Callable infer wrapper for CompiledModel.
 
         Infers specified input(s) in synchronous mode.
@@ -248,8 +257,8 @@ def __call__(self,
                               Default value: True
         :type shared_memory: bool, optional
 
-        :return: Dictionary of results from output tensors with ports as keys.
-        :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
+        :return: Dictionary of results from output tensors with port/int/str as keys.
+        :rtype: OVDict
         """
         if self._infer_request is None:
             self._infer_request = self.create_infer_request()
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
index e49265ccca987f..829a77af96a04c 100644
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py
@@ -5,3 +5,4 @@
 from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch
 from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file
 from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
+from openvino.runtime.utils.data_helpers.wrappers import OVDict
diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
index 24b09d40de9555..e2849b8d5e01bd 100644
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
@@ -4,7 +4,17 @@
 
 import numpy as np
 
-from openvino._pyopenvino import Tensor
+# TODO: remove this WA and refactor OVDict when Python3.8
+# becomes minimal supported version.
+try:
+    from functools import singledispatchmethod
+except ImportError:
+    from singledispatchmethod import singledispatchmethod  # type: ignore[no-redef]
+
+from collections.abc import Mapping
+from typing import Union, Dict, List, Iterator, KeysView, ItemsView, ValuesView
+
+from openvino._pyopenvino import Tensor, ConstOutput
 from openvino._pyopenvino import InferRequest as InferRequestBase
 
 
@@ -20,3 +30,109 @@ def __init__(self, other: InferRequestBase) -> None:
         # Private memeber to store newly created shared memory data
         self._inputs_data = None
         super().__init__(other)
+
+
+class OVDict(Mapping):
+    """Custom OpenVINO dictionary with inference results.
+
+    This class is a dict-like object. It provides possibility to
+    address data tensors with three key types:
+
+    * `openvino.runtime.ConstOutput` - port of the output
+    * `int` - index of the output
+    * `str` - names of the output
+
+    This class follows `frozenset`/`tuple` concept of immutability.
+    It is prohibited to assign new items or edit them.
+
+    To revert to the previous behavior use `to_dict` method which
+    return shallow copy of underlaying dictionary.
+    Note: It removes addressing feature! New dictionary keeps
+          only `ConstOutput` keys.
+
+    If a tuple returns value is needed, use `to_tuple` method which
+    converts values to the tuple.
+
+    :Example:
+
+    .. code-block:: python
+
+        # Reverts to the previous behavior of the native dict
+        result = request.infer(inputs).to_dict()
+        # or alternatively:
+        result = dict(request.infer(inputs))
+
+    .. code-block:: python
+
+        # To dispatch outputs of multi-ouput inference:
+        out1, out2, out3, _ = request.infer(inputs).values()
+        # or alternatively:
+        out1, out2, out3, _ = request.infer(inputs).to_tuple()
+    """
+    def __init__(self, _dict: Dict[ConstOutput, np.ndarray]) -> None:
+        self._dict = _dict
+
+    def __iter__(self) -> Iterator:
+        return self._dict.__iter__()
+
+    def __len__(self) -> int:
+        return len(self._dict)
+
+    def __repr__(self) -> str:
+        return self._dict.__repr__()
+
+    def __get_key(self, index: int) -> ConstOutput:
+        return list(self._dict.keys())[index]
+
+    @singledispatchmethod
+    def __getitem_impl(self, key: Union[ConstOutput, int, str]) -> np.ndarray:
+        raise TypeError("Unknown key type!")
+
+    @__getitem_impl.register
+    def _(self, key: ConstOutput) -> np.ndarray:
+        return self._dict[key]
+
+    @__getitem_impl.register
+    def _(self, key: int) -> np.ndarray:
+        try:
+            return self._dict[self.__get_key(key)]
+        except IndexError:
+            raise KeyError(key)
+
+    @__getitem_impl.register
+    def _(self, key: str) -> np.ndarray:
+        try:
+            return self._dict[self.__get_key(self.names().index(key))]
+        except ValueError:
+            raise KeyError(key)
+
+    def __getitem__(self, key: Union[ConstOutput, int, str]) -> np.ndarray:
+        return self.__getitem_impl(key)
+
+    def keys(self) -> KeysView[ConstOutput]:
+        return self._dict.keys()
+
+    def values(self) -> ValuesView[np.ndarray]:
+        return self._dict.values()
+
+    def items(self) -> ItemsView[ConstOutput, np.ndarray]:
+        return self._dict.items()
+
+    def names(self) -> List[str]:
+        """Return a name of every output key.
+
+        Throws RuntimeError if any of ConstOutput keys has no name.
+        """
+        return [key.get_any_name() for key in self._dict.keys()]
+
+    def to_dict(self) -> Dict[ConstOutput, np.ndarray]:
+        """Return underlaying native dictionary.
+
+        Function performs shallow copy, thus any modifications to
+        returned values may affect this class as well.
+        """
+        return self._dict
+
+    def to_tuple(self) -> tuple:
+        """Convert values of this dictionary to a tuple."""
+        return tuple(self._dict.values())
diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
index 2ad7e395a92895..ef5313cec0185d 100644
--- a/src/bindings/python/src/pyopenvino/core/common.cpp
+++ b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -53,6 +53,27 @@ const std::map<std::string, ov::element::Type>& dtype_to_ov_type() {
     return dtype_to_ov_type_mapping;
 }
 
+namespace containers {
+const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) {
+    TensorIndexMap result_map;
+    for (auto&& input : inputs) {
+        int idx;
+        if (py::isinstance<py::int_>(input.first)) {
+            idx = input.first.cast<int>();
+        } else {
+            throw py::type_error("incompatible function arguments!");
+        }
+        if (py::isinstance<ov::Tensor>(input.second)) {
+            auto tensor = Common::cast_to_tensor(input.second);
+            result_map[idx] = tensor;
+        } else {
+            throw ov::Exception("Unable to cast tensor " + std::to_string(idx) + "!");
+        }
+    }
+    return result_map;
+}
+};  // namespace containers
+
 namespace array_helpers {
 
 bool is_contiguous(const py::array& array) {
@@ -110,6 +131,67 @@ py::array as_contiguous(py::array& array, ov::element::Type type) {
     }
 }
 
+py::array array_from_tensor(ov::Tensor&& t) {
+    switch (t.get_element_type()) {
+    case ov::element::Type_t::f32: {
+        return py::array_t<float>(t.get_shape(), t.data<float>());
+        break;
+    }
+    case ov::element::Type_t::f64: {
+        return py::array_t<double>(t.get_shape(), t.data<double>());
+        break;
+    }
+    case ov::element::Type_t::bf16: {
+        return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
+        break;
+    }
+    case ov::element::Type_t::f16: {
+        return py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
+        break;
+    }
+    case ov::element::Type_t::i8: {
+        return py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
+        break;
+    }
+    case ov::element::Type_t::i16: {
+        return py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
+        break;
+    }
+    case ov::element::Type_t::i32: {
+        return py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
+        break;
+    }
+    case ov::element::Type_t::i64: {
+        return py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
+        break;
+    }
+    case ov::element::Type_t::u8: {
+        return py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
+        break;
+    }
+    case ov::element::Type_t::u16: {
+        return py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
+        break;
+    }
+    case ov::element::Type_t::u32: {
+        return py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
+        break;
+    }
+    case ov::element::Type_t::u64: {
+        return py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
+        break;
+    }
+    case ov::element::Type_t::boolean: {
+        return py::array_t<bool>(t.get_shape(), t.data<bool>());
+        break;
+    }
+    default: {
+        throw ov::Exception("Numpy array cannot be created from given OV Tensor!");
+        break;
+    }
+    }
+}
+
 };  // namespace array_helpers
 
 template <>
@@ -226,38 +308,6 @@ const ov::Tensor& cast_to_tensor(const py::handle& tensor) {
     return tensor.cast<const ov::Tensor&>();
 }
 
-const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs) {
-    Containers::TensorNameMap result_map;
-    for (auto&& input : inputs) {
-        std::string name;
-        if (py::isinstance<py::str>(input.first)) {
-            name = input.first.cast<std::string>();
-        } else {
-            throw py::type_error("incompatible function arguments!");
-        }
-        OPENVINO_ASSERT(py::isinstance<ov::Tensor>(input.second), "Unable to cast tensor ", name, "!");
-        auto tensor = Common::cast_to_tensor(input.second);
-        result_map[name] = tensor;
-    }
-    return result_map;
-}
-
-const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) {
-    Containers::TensorIndexMap result_map;
-    for (auto&& input : inputs) {
-        int idx;
-        if (py::isinstance<py::int_>(input.first)) {
-            idx = input.first.cast<int>();
-        } else {
-            throw py::type_error("incompatible function arguments!");
-        }
-        OPENVINO_ASSERT(py::isinstance<ov::Tensor>(input.second), "Unable to cast tensor ", idx, "!");
-        auto tensor = Common::cast_to_tensor(input.second);
-        result_map[idx] = tensor;
-    }
-    return result_map;
-}
-
 void set_request_tensors(ov::InferRequest& request, const py::dict& inputs) {
     if (!inputs.empty()) {
         for (auto&& input : inputs) {
@@ -293,67 +343,10 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) {
     }
 }
 
-py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs, ov::InferRequest& request) {
+py::dict outputs_to_dict(InferRequestWrapper& request) {
     py::dict res;
-    for (const auto& out : outputs) {
-        ov::Tensor t{request.get_tensor(out)};
-        switch (t.get_element_type()) {
-        case ov::element::Type_t::i8: {
-            res[py::cast(out)] = py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
-            break;
-        }
-        case ov::element::Type_t::i16: {
-            res[py::cast(out)] = py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
-            break;
-        }
-        case ov::element::Type_t::i32: {
-            res[py::cast(out)] = py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
-            break;
-        }
-        case ov::element::Type_t::i64: {
-            res[py::cast(out)] = py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
-            break;
-        }
-        case ov::element::Type_t::u8: {
-            res[py::cast(out)] = py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
-            break;
-        }
-        case ov::element::Type_t::u16: {
-            res[py::cast(out)] = py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
-            break;
-        }
-        case ov::element::Type_t::u32: {
-            res[py::cast(out)] = py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
-            break;
-        }
-        case ov::element::Type_t::u64: {
-            res[py::cast(out)] = py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
-            break;
-        }
-        case ov::element::Type_t::bf16: {
-            res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
-            break;
-        }
-        case ov::element::Type_t::f16: {
-            res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
-            break;
-        }
-        case ov::element::Type_t::f32: {
-            res[py::cast(out)] = py::array_t<float>(t.get_shape(), t.data<float>());
-            break;
-        }
-        case ov::element::Type_t::f64: {
-            res[py::cast(out)] = py::array_t<double>(t.get_shape(), t.data<double>());
-            break;
-        }
-        case ov::element::Type_t::boolean: {
-            res[py::cast(out)] = py::array_t<bool>(t.get_shape(), t.data<bool>());
-            break;
-        }
-        default: {
-            break;
-        }
-        }
+    for (const auto& out : request.m_outputs) {
+        res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out));
     }
     return res;
 }
diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp
index 910d9e55e966ed..de033c3ddf383c 100644
--- a/src/bindings/python/src/pyopenvino/core/common.hpp
+++ b/src/bindings/python/src/pyopenvino/core/common.hpp
@@ -20,14 +20,20 @@
 #include "openvino/runtime/infer_request.hpp"
 #include "openvino/runtime/tensor.hpp"
 #include "openvino/pass/serialize.hpp"
-#include "pyopenvino/core/containers.hpp"
 #include "pyopenvino/graph/any.hpp"
 #include "pyopenvino/graph/ops/constant.hpp"
+#include "pyopenvino/core/infer_request.hpp"
 
 namespace py = pybind11;
 
 namespace Common {
 
+namespace containers {
+    using TensorIndexMap = std::map<size_t, ov::Tensor>;
+
+    const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs);
+}; // namespace containers
+
 namespace values {
 
 // Minimum amount of bits for common numpy types. Used to perform checks against OV types.
@@ -52,6 +58,8 @@ std::vector<size_t> get_strides(const py::array& array);
 
 py::array as_contiguous(py::array& array, ov::element::Type type);
 
+py::array array_from_tensor(ov::Tensor&& t);
+
 }; // namespace array_helpers
 
 template <typename T>
@@ -80,15 +88,11 @@ ov::PartialShape partial_shape_from_list(const py::list& shape);
 
 const ov::Tensor& cast_to_tensor(const py::handle& tensor);
 
-const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs);
-
-const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs);
-
 void set_request_tensors(ov::InferRequest& request, const py::dict& inputs);
 
 uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual);
 
-py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs, ov::InferRequest& request);
+py::dict outputs_to_dict(InferRequestWrapper& request);
 
 ov::pass::Serialize::Version convert_to_version(const std::string& version);
 
diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
index 9cd0202f32f415..7cca9af077e15a 100644
--- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
+++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp
@@ -9,13 +9,9 @@
 
 #include "common.hpp"
 #include "pyopenvino/core/compiled_model.hpp"
-#include "pyopenvino/core/containers.hpp"
 #include "pyopenvino/core/infer_request.hpp"
 #include "pyopenvino/utils/utils.hpp"
 
-PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap);
-PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap);
-
 namespace py = pybind11;
 
 void regclass_CompiledModel(py::module m) {
diff --git a/src/bindings/python/src/pyopenvino/core/containers.cpp b/src/bindings/python/src/pyopenvino/core/containers.cpp
deleted file mode 100644
index 8ee414e007a14f..00000000000000
--- a/src/bindings/python/src/pyopenvino/core/containers.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pyopenvino/core/containers.hpp"
-
-#include <pybind11/stl_bind.h>
-
-PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap);
-PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap);
-
-namespace py = pybind11;
-
-namespace Containers {
-
-void regclass_TensorIndexMap(py::module m) {
-    py::bind_map<TensorIndexMap>(m, "TensorIndexMap");
-}
-
-void regclass_TensorNameMap(py::module m) {
-    py::bind_map<TensorNameMap>(m, "TensorNameMap");
-}
-}  // namespace Containers
diff --git a/src/bindings/python/src/pyopenvino/core/containers.hpp b/src/bindings/python/src/pyopenvino/core/containers.hpp
deleted file mode 100644
index becf2f717847de..00000000000000
--- a/src/bindings/python/src/pyopenvino/core/containers.hpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <map>
-#include <vector>
-
-#include <pybind11/pybind11.h>
-
-#include <openvino/runtime/tensor.hpp>
-
-namespace py = pybind11;
-
-namespace Containers {
-    using TensorIndexMap = std::map<size_t, ov::Tensor>;
-    using TensorNameMap = std::map<std::string, ov::Tensor>;
-
-    void regclass_TensorIndexMap(py::module m);
-    void regclass_TensorNameMap(py::module m);
-}
diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp
index 585441569f9e77..8be02e8adb86bb 100644
--- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp
+++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp
@@ -11,12 +11,8 @@
 #include <string>
 
 #include "pyopenvino/core/common.hpp"
-#include "pyopenvino/core/containers.hpp"
 #include "pyopenvino/utils/utils.hpp"
 
-PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap);
-PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap);
-
 namespace py = pybind11;
 
 inline py::dict run_sync_infer(InferRequestWrapper& self) {
@@ -26,7 +22,7 @@ inline py::dict run_sync_infer(InferRequestWrapper& self) {
         self.m_request.infer();
         *self.m_end_time = Time::now();
     }
-    return Common::outputs_to_dict(self.m_outputs, self.m_request);
+    return Common::outputs_to_dict(self);
 }
 
 void regclass_InferRequest(py::module m) {
@@ -103,7 +99,7 @@ void regclass_InferRequest(py::module m) {
     cls.def(
         "set_output_tensors",
         [](InferRequestWrapper& self, const py::dict& outputs) {
-            auto outputs_map = Common::cast_to_tensor_index_map(outputs);
+            auto outputs_map = Common::containers::cast_to_tensor_index_map(outputs);
             for (auto&& output : outputs_map) {
                 self.m_request.set_output_tensor(output.first, output.second);
             }
@@ -120,7 +116,7 @@ void regclass_InferRequest(py::module m) {
     cls.def(
         "set_input_tensors",
         [](InferRequestWrapper& self, const py::dict& inputs) {
-            auto inputs_map = Common::cast_to_tensor_index_map(inputs);
+            auto inputs_map = Common::containers::cast_to_tensor_index_map(inputs);
             for (auto&& input : inputs_map) {
                 self.m_request.set_input_tensor(input.first, input.second);
             }
@@ -719,7 +715,7 @@ void regclass_InferRequest(py::module m) {
     cls.def_property_readonly(
         "results",
         [](InferRequestWrapper& self) {
-            return Common::outputs_to_dict(self.m_outputs, self.m_request);
+            return Common::outputs_to_dict(self);
         },
         R"(
             Gets all outputs tensors of this InferRequest.
diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp
index a229f9eaa7d72e..0f2cdf38278010 100644
--- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp
+++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp
@@ -24,7 +24,6 @@
 #endif
 #include "pyopenvino/core/async_infer_queue.hpp"
 #include "pyopenvino/core/compiled_model.hpp"
-#include "pyopenvino/core/containers.hpp"
 #include "pyopenvino/core/core.hpp"
 #include "pyopenvino/core/extension.hpp"
 #include "pyopenvino/core/infer_request.hpp"
@@ -210,9 +209,6 @@ PYBIND11_MODULE(_pyopenvino, m) {
 
     regclass_Core(m);
     regclass_Tensor(m);
-    // Registering specific types of containers
-    Containers::regclass_TensorIndexMap(m);
-    Containers::regclass_TensorNameMap(m);
 
     regclass_CompiledModel(m);
     regclass_InferRequest(m);
diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py
new file mode 100644
index 00000000000000..e8c76a6d8d3bf7
--- /dev/null
+++ b/src/bindings/python/tests/test_runtime/test_ovdict.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from collections.abc import Mapping
+import numpy as np
+import pytest
+
+import openvino.runtime.opset10 as ops
+from openvino.runtime import Core, ConstOutput, CompiledModel, InferRequest, Model
+from openvino.runtime.ie_api import OVDict
+
+
+def _get_ovdict(
+    device,
+    input_shape=None,
+    data_type=np.float32,
+    input_names=None,
+    output_names=None,
+    multi_output=False,
+    direct_infer=False,
+    split_num=5,
+):
+    # Create model
+    # If model is multi-output (multi_output=True), input_shape must match
+    # requirements of split operation.
+    # TODO OpenSource: refactor it to be more generic
+    if input_shape is None:
+        input_shape = [1, 20]
+    if input_names is None:
+        input_names = ["data_0"]
+    if output_names is None:
+        output_names = ["output_0"]
+    if multi_output:
+        assert isinstance(output_names, (list, tuple))
+        assert len(output_names) > 1
+        assert len(output_names) == split_num
+    param = ops.parameter(input_shape, data_type, name=input_names[0])
+    model = Model(
+        ops.split(param, 1, split_num) if multi_output else ops.abs(param), [param],
+    )
+    # Manually name outputs
+    for i in range(len(output_names)):
+        model.output(i).tensor.names = {output_names[i]}
+    # Compile model
+    core = Core()
+    compiled_model = core.compile_model(model, device)
+    # Create test data
+    input_data = np.random.random(input_shape).astype(data_type)
+    # Two ways of infering
+    if direct_infer:
+        result = compiled_model(input_data)
+        assert result is not None
+        return result, compiled_model
+
+    request = compiled_model.create_infer_request()
+    result = request.infer(input_data)
+    assert result is not None
+    return result, request
+
+
+def _check_keys(keys, outs):
+    outs_iter = iter(outs)
+    for key in keys:
+        assert isinstance(key, ConstOutput)
+        assert key == next(outs_iter)
+    return True
+
+
+def _check_values(result):
+    for value in result.values():
+        assert isinstance(value, np.ndarray)
+    return True
+
+
+def _check_items(result, outs, output_names):
+    i = 0
+    for key, value in result.items():
+        assert isinstance(key, ConstOutput)
+        assert isinstance(value, np.ndarray)
+        # Check values
+        assert np.equal(result[outs[i]], result[key]).all()
+        assert np.equal(result[outs[i]], result[i]).all()
+        assert np.equal(result[outs[i]], result[output_names[i]]).all()
+        i += 1
+    return True
+
+
+def _check_dict(result, obj, output_names=None):
+    if output_names is None:
+        output_names = ["output_0"]
+
+    outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs
+    assert len(outs) == len(result)
+    assert len(outs) == len(output_names)
+    # Check for __iter__
+    assert _check_keys(result, outs)
+    # Check for keys function
+    assert _check_keys(result.keys(), outs)
+    assert _check_values(result)
+    assert _check_items(result, outs, output_names)
+    assert result.names() == output_names
+
+    return True
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_assign(device, is_direct):
+    result, _ = _get_ovdict(device, multi_output=False, direct_infer=is_direct)
+
+    with pytest.raises(TypeError) as e:
+        result["some_name"] = 99
+    assert "'OVDict' object does not support item assignment" in str(e.value)
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_single_output_basic(device, is_direct):
+    result, obj = _get_ovdict(device, multi_output=False, direct_infer=is_direct)
+
+    assert isinstance(result, OVDict)
+    if isinstance(obj, (InferRequest, CompiledModel)):
+        assert _check_dict(result, obj)
+    else:
+        raise TypeError("Unknown `obj` type!")
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_single_output_noname(device, is_direct):
+    result, obj = _get_ovdict(
+        device,
+        multi_output=False,
+        direct_infer=is_direct,
+        output_names=[],
+    )
+
+    assert isinstance(result, OVDict)
+
+    outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs
+
+    assert isinstance(result[outs[0]], np.ndarray)
+    assert isinstance(result[0], np.ndarray)
+
+    with pytest.raises(RuntimeError) as e0:
+        _ = result["some_name"]
+    assert "Attempt to get a name for a Tensor without names" in str(e0.value)
+
+    with pytest.raises(RuntimeError) as e1:
+        _ = result.names()
+    assert "Attempt to get a name for a Tensor without names" in str(e1.value)
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_single_output_wrongname(device, is_direct):
+    result, obj = _get_ovdict(
+        device,
+        multi_output=False,
+        direct_infer=is_direct,
+        output_names=["output_21"],
+    )
+
+    assert isinstance(result, OVDict)
+
+    outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs
+
+    assert isinstance(result[outs[0]], np.ndarray)
+    assert isinstance(result[0], np.ndarray)
+
+    with pytest.raises(KeyError) as e:
+        _ = result["output_37"]
+    assert "output_37" in str(e.value)
+
+    with pytest.raises(KeyError) as e:
+        _ = result[6]
+    assert "6" in str(e.value)
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+@pytest.mark.parametrize("use_function", [True, False])
+def test_ovdict_single_output_dict(device, is_direct, use_function):
+    result, obj = _get_ovdict(
+        device,
+        multi_output=False,
+        direct_infer=is_direct,
+    )
+
+    assert isinstance(result, OVDict)
+
+    outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs
+    native_dict = result.to_dict() if use_function else dict(result)
+
+    assert issubclass(type(native_dict), dict)
+    assert not isinstance(native_dict, OVDict)
+    assert isinstance(native_dict[outs[0]], np.ndarray)
+
+    with pytest.raises(KeyError) as e:
+        _ = native_dict["output_0"]
+    assert "output_0" in str(e.value)
+
+    with pytest.raises(KeyError) as e:
+        _ = native_dict[0]
+    assert "0" in str(e.value)
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_multi_output_basic(device, is_direct):
+    output_names = ["output_0", "output_1", "output_2", "output_3", "output_4"]
+    result, obj = _get_ovdict(
+        device,
+        multi_output=True,
+        direct_infer=is_direct,
+        output_names=output_names,
+    )
+
+    assert isinstance(result, OVDict)
+    if isinstance(obj, (InferRequest, CompiledModel)):
+        assert _check_dict(result, obj, output_names)
+    else:
+        raise TypeError("Unknown `obj` type!")
+
+
+@pytest.mark.parametrize("is_direct", [True, False])
+@pytest.mark.parametrize("use_function", [True, False])
+def test_ovdict_multi_output_tuple0(device, is_direct, use_function):
+    output_names = ["output_0", "output_1"]
+    result, obj = _get_ovdict(
+        device,
+        input_shape=(1, 10),
+        multi_output=True,
+        direct_infer=is_direct,
+        split_num=2,
+        output_names=output_names,
+    )
+
+    out0, out1 = None, None
+    if use_function:
+        assert isinstance(result.to_tuple(), tuple)
+        out0, out1 = result.to_tuple()
+    else:
+        out0, out1 = result.values()
+
+    assert out0 is not None
+    assert out1 is not None
+    assert isinstance(out0, np.ndarray)
+    assert isinstance(out1, np.ndarray)
+
+    outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs
+
+    assert np.equal(result[outs[0]], out0).all()
+    assert np.equal(result[outs[1]], out1).all()

From c23a1170ba5a494be4993c475273e01d43c02709 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Wed, 22 Mar 2023 19:51:07 +0400
Subject: [PATCH 040/296] Remove plugins xml (#16470)

* Update core_impl.cpp

Add first implementation of register_compile_time_plugins (needs to depend on the actual CMake configuration as a next step).

* Update core.cpp

Check for missing plugins.xml

* Update core_impl.cpp

Avoid exception for missing plugins.xml

* Update core_impl.hpp

Add register_compile_time_plugins function definition

* Plugin loading based on CMake configuration

* Remove debug output command

* Unify static/dynamic plugin loading

* Add CMake option for plugins.xml that defaults to off

* Move GENERATE_PLUGINS_XML option to features.cmake

* Add missing brace

* Remove unnecessary #ifdef check

* Prepare to resolve conflicts

* Fix compile error

* Activate generation of plugins.xml in OpenVINODeveloperPackageConfig.cmake

* Fix CMake installation

* Plugin loading logic implemented in ie_core.cpp as well

* Fix format

* Small fixes

* Fixed code style

* Skip if xml file wasn't found

* Added function to find compiled plugins

* Generalize plugins hpp

* Use new API

* Fixed old core

* Fixed static build

---------

Co-authored-by: CSBVision <bjoern.boeken@csb.com>
---
 .../plugins/create_plugins_hpp.cmake          | 38 +++++-----
 cmake/developer_package/plugins/plugins.cmake | 53 +++++++-------
 .../developer_package/plugins/plugins.hpp.in  | 17 ++++-
 cmake/features.cmake                          |  2 +
 .../OpenVINODeveloperPackageConfig.cmake.in   |  3 +
 src/cmake/openvino.cmake                      |  2 +-
 src/common/util/CMakeLists.txt                |  7 ++
 .../util/include/openvino/util/file_util.hpp  |  8 +++
 src/common/util/src/file_util.cpp             | 32 +++++++++
 src/inference/src/core.cpp                    | 25 +++----
 src/inference/src/dev/core_impl.cpp           | 34 +++++++++
 src/inference/src/dev/core_impl.hpp           | 70 ++++++-------------
 src/inference/src/ie_core.cpp                 | 18 ++---
 13 files changed, 191 insertions(+), 118 deletions(-)

diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake
index cddcad738470ca..10adcac6c28f1f 100644
--- a/cmake/developer_package/plugins/create_plugins_hpp.cmake
+++ b/cmake/developer_package/plugins/create_plugins_hpp.cmake
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-foreach(var IE_DEVICE_MAPPING IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN)
+foreach(var IE_DEVICE_MAPPING OV_DYNAMIC IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN)
     if(NOT DEFINED ${var})
         message(FATAL_ERROR "${var} is required, but not defined")
     endif()
@@ -19,20 +19,6 @@ foreach(dev_map IN LISTS IE_DEVICE_MAPPING)
     list(GET dev_map 0 mapped_dev_name)
     list(GET dev_map 1 actual_dev_name)
 
-    # common
-    set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}")
-    set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}")
-
-    # declarations
-    set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
-IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});")
-    if(${actual_dev_name}_AS_EXTENSION)
-        set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
-IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});")
-    else()
-        set(_IE_CREATE_EXTENSION_FUNC "nullptr")
-    endif()
-
     # definitions
     set(dev_config "{")
     if(${mapped_dev_name}_CONFIG)
@@ -48,8 +34,28 @@ IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});")
     endif()
     set(dev_config "${dev_config}}")
 
-    set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
+
+    if(NOT OV_DYNAMIC)
+        # common
+        set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}")
+        set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}")
+
+        # declarations
+        set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
+        IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});")
+        if(${actual_dev_name}_AS_EXTENSION)
+            set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
+            IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});")
+        else()
+            set(_IE_CREATE_EXTENSION_FUNC "nullptr")
+        endif()
+
+        set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
         { \"${mapped_dev_name}\", Value { ${_IE_CREATE_PLUGIN_FUNC}, ${_IE_CREATE_EXTENSION_FUNC}, ${dev_config} } },")
+    else()
+        set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
+        { \"${mapped_dev_name}\", Value { \"${actual_dev_name}\", ${dev_config} } },")
+    endif()
 endforeach()
 
 set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
index b4cfe20bd024e5..7f00cc70269861 100644
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@@ -113,7 +113,7 @@ function(ie_add_plugin)
             if(IE_PLUGIN_PSEUDO_DEVICE)
                 set(plugin_hidden HIDDEN)
             endif()
-            ie_cpack_add_component(${install_component} 
+            ie_cpack_add_component(${install_component}
                                    DISPLAY_NAME "${IE_PLUGIN_DEVICE_NAME} runtime"
                                    DESCRIPTION "${IE_PLUGIN_DEVICE_NAME} runtime"
                                    ${plugin_hidden}
@@ -227,16 +227,18 @@ macro(ie_register_plugins_dynamic)
 
     # Combine all <device_name>.xml files into plugins.xml
 
-    add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD
-                      COMMAND
-                        "${CMAKE_COMMAND}"
-                        -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}"
-                        -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}"
-                        -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins"
-                        -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake"
-                      COMMENT
-                        "Registering plugins to plugins.xml config file"
-                      VERBATIM)
+    if(ENABLE_PLUGINS_XML)
+        add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD
+                          COMMAND
+                            "${CMAKE_COMMAND}"
+                            -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}"
+                            -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}"
+                            -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins"
+                            -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake"
+                          COMMENT
+                            "Registering plugins to plugins.xml config file"
+                          VERBATIM)
+    endif()
 endmacro()
 
 #
@@ -282,10 +284,6 @@ endfunction()
 # ie_generate_plugins_hpp()
 #
 function(ie_generate_plugins_hpp)
-    if(BUILD_SHARED_LIBS)
-        return()
-    endif()
-
     set(device_mapping)
     set(device_configs)
     set(as_extension)
@@ -296,17 +294,23 @@ function(ie_generate_plugins_hpp)
             message(FATAL_ERROR "Unexpected error, please, contact developer of this script")
         endif()
 
-        # create device mapping: preudo device => actual device
+        # create device mapping: pseudo device => actual device
         list(GET name 0 device_name)
-        if(${device_name}_PSEUDO_PLUGIN_FOR)
-            list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}")
+        if(BUILD_SHARED_LIBS)
+            list(GET name 1 library_name)
+            ie_plugin_get_file_name(${library_name} library_name)
+            list(APPEND device_mapping "${device_name}:${library_name}")
         else()
-            list(APPEND device_mapping "${device_name}:${device_name}")
-        endif()
+            if(${device_name}_PSEUDO_PLUGIN_FOR)
+                list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}")
+            else()
+                list(APPEND device_mapping "${device_name}:${device_name}")
+            endif()
 
-        # register plugin as extension
-        if(${device_name}_AS_EXTENSION)
-            list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON")
+            # register plugin as extension
+            if(${device_name}_AS_EXTENSION)
+                list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON")
+            endif()
         endif()
 
         # add default plugin config options
@@ -330,6 +334,7 @@ function(ie_generate_plugins_hpp)
                        COMMAND
                         "${CMAKE_COMMAND}"
                         -D "IE_DEVICE_MAPPING=${device_mapping}"
+                        -D "OV_DYNAMIC=${BUILD_SHARED_LIBS}"
                         -D "IE_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}"
                         -D "IE_PLUGINS_HPP_HEADER=${ie_plugins_hpp}"
                         ${device_configs}
@@ -339,7 +344,7 @@ function(ie_generate_plugins_hpp)
                          "${plugins_hpp_in}"
                          "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake"
                        COMMENT
-                         "Generate ie_plugins.hpp for static build"
+                         "Generate ie_plugins.hpp for build"
                        VERBATIM)
 
     # for some reason dependency on source files does not work
diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in
index fa8119756b82e8..d351bcfb76f3d0 100644
--- a/cmake/developer_package/plugins/plugins.hpp.in
+++ b/cmake/developer_package/plugins/plugins.hpp.in
@@ -4,6 +4,11 @@
 
 #pragma once
 
+#include <map>
+#include <string>
+
+#ifdef OPENVINO_STATIC_LIBRARY
+
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 
 @IE_PLUGINS_DECLARATIONS@
@@ -14,10 +19,20 @@ struct Value {
     std::map<std::string, std::string> m_default_config;
 };
 
+#else
+
+struct Value {
+    std::string m_plugin_path;
+    std::map<std::string, std::string> m_default_config;
+};
+
+#endif
+
 using Key = std::string;
 using PluginsStaticRegistry = std::map<Key, Value>;
 
-inline const std::map<Key, Value> getStaticPluginsRegistry() {
+
+inline const std::map<Key, Value> getCompiledPluginsRegistry() {
 @IE_PLUGINS_MAP_DEFINITION@
     return plugins_hpp;
 }
diff --git a/cmake/features.cmake b/cmake/features.cmake
index 17c5ccc1b3c7e5..24dfaef46e89a7 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -94,6 +94,8 @@ ie_option (ENABLE_HETERO "Enables Hetero Device Plugin" ON)
 
 ie_option (ENABLE_TEMPLATE "Enable template plugin" ON)
 
+ie_dependent_option (ENABLE_PLUGINS_XML "Generate plugins.xml configuration file or not" OFF "NOT BUILD_SHARED_LIBS" OFF)
+
 ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_TESTS;ENABLE_GAPI_PREPROCESSING" OFF)
 
 ie_dependent_option (ENABLE_DATA "fetch models from testdata repo" ON "ENABLE_FUNCTIONAL_TESTS;NOT ANDROID" OFF)
diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in
index 24238be0604c1b..d530ea36d1d9c8 100644
--- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in
+++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in
@@ -28,6 +28,9 @@ foreach(option IN LISTS ov_options)
 endforeach()
 message(" ")
 
+# activate generation of plugins.xml
+set(ENABLE_PLUGINS_XML ON)
+
 # for samples in 3rd party projects
 if(ENABLE_SAMPLES)
     set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake
index 7870e2963e3c59..0a0b9f9d1896a4 100644
--- a/src/cmake/openvino.cmake
+++ b/src/cmake/openvino.cmake
@@ -131,7 +131,7 @@ ie_cpack_add_component(${OV_CPACK_COMP_CORE_DEV}
                        HIDDEN
                        DEPENDS ${OV_CPACK_COMP_CORE} ${core_dev_components})
 
-if(BUILD_SHARED_LIBS)
+if(ENABLE_PLUGINS_XML)
     install(FILES $<TARGET_FILE_DIR:${TARGET_NAME}>/plugins.xml
             DESTINATION ${OV_CPACK_PLUGINSDIR}
             COMPONENT ${OV_CPACK_COMP_CORE})
diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt
index a589c283390761..160be0259b8b30 100644
--- a/src/common/util/CMakeLists.txt
+++ b/src/common/util/CMakeLists.txt
@@ -24,6 +24,13 @@ endif()
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 
+set(MIXED_SRC
+    "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp")
+
+set_property(SOURCE ${MIXED_SRC}
+    APPEND PROPERTY INCLUDE_DIRECTORIES
+        $<TARGET_PROPERTY:ngraph_obj,INTERFACE_INCLUDE_DIRECTORIES>)
+
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${PUBLIC_HEADERS})
 
diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp
index 00d8dbe073cd61..ccf8ed4e46c6e0 100644
--- a/src/common/util/include/openvino/util/file_util.hpp
+++ b/src/common/util/include/openvino/util/file_util.hpp
@@ -260,6 +260,14 @@ inline std::basic_string<C> make_plugin_library_name(const std::basic_string<C>&
  */
 FilePath get_plugin_path(const std::string& plugin);
 
+/**
+ * @brief Find the plugins which are located together with OV library
+ * @param plugin - Path (absolute or relative) or name of a plugin. Depending on platform, `plugin` is wrapped with
+ * shared library suffix and prefix to identify library full name
+ * @return absolute path or file name with extension (to be found in ENV)
+ */
+FilePath get_compiled_plugin_path(const std::string& plugin);
+
 /**
  * @brief Format plugin path (canonicalize, complete to absolute or complete to file name) for further
  * dynamic loading by OS
diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp
index bcbd3fe2f906c7..f39f2dd3c677d9 100644
--- a/src/common/util/src/file_util.cpp
+++ b/src/common/util/src/file_util.cpp
@@ -12,6 +12,7 @@
 #include <fstream>
 #include <sstream>
 
+#include "openvino/core/version.hpp"
 #include "openvino/util/common_util.hpp"
 
 #ifdef _WIN32
@@ -504,6 +505,37 @@ ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin) {
     return ov::util::to_file_path(lib_name);
 }
 
+ov::util::FilePath ov::util::get_compiled_plugin_path(const std::string& plugin) {
+    const auto ov_library_path = get_ov_lib_path();
+
+    // plugin can be found either:
+
+    // 1. in openvino-X.Y.Z folder relative to libopenvino.so
+    std::ostringstream str;
+    str << "openvino-" << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH;
+    const auto sub_folder = str.str();
+
+    std::string abs_file_path = ov::util::path_join({ov_library_path, sub_folder, plugin});
+    if (ov::util::file_exists(abs_file_path))
+        return ov::util::to_file_path(abs_file_path);
+
+    // 2. in the openvino.so location
+    abs_file_path = ov::util::path_join({ov_library_path, plugin});
+    if (ov::util::file_exists(abs_file_path))
+        return ov::util::to_file_path(abs_file_path);
+
+    auto lib_name = plugin;
+    // For 3rd case - convert to 4th case
+    if (!ov::util::ends_with(plugin, ov::util::FileTraits<char>::library_ext()))
+        lib_name = ov::util::make_plugin_library_name({}, plugin);
+
+    // For 4th case
+    auto lib_path = ov::util::to_file_path(ov::util::get_absolute_file_path(lib_name));
+    if (ov::util::file_exists(lib_path))
+        return lib_path;
+    return ov::util::to_file_path(lib_name);
+}
+
 ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin, const std::string& xml_path, bool as_abs_only) {
     // Assume `plugin` (from XML "location" record) contains only:
     // 1. /path/to/libexample.so absolute path
diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp
index 0a2fba9072b6ff..fef2652b275d17 100644
--- a/src/inference/src/core.cpp
+++ b/src/inference/src/core.cpp
@@ -9,13 +9,10 @@
 #include "dev/converter_utils.hpp"
 #include "dev/core_impl.hpp"
 #include "ie_itt.hpp"
+#include "ie_plugins.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "so_extension.hpp"
 
-#ifdef OPENVINO_STATIC_LIBRARY
-#    include "ie_plugins.hpp"
-#endif
-
 namespace {
 std::string resolve_extension_path(const std::string& path) {
     std::string retvalue;
@@ -32,8 +29,6 @@ std::string resolve_extension_path(const std::string& path) {
 
 namespace ov {
 
-#ifndef OPENVINO_STATIC_LIBRARY
-
 std::string findPluginXML(const std::string& xmlFile) {
     std::string xmlConfigFile_ = xmlFile;
     if (xmlConfigFile_.empty()) {
@@ -56,14 +51,10 @@ std::string findPluginXML(const std::string& xmlFile) {
         xmlConfigFileDefault = FileUtils::makePath(ielibraryDir, ov::util::to_file_path("plugins.xml"));
         if (FileUtils::fileExist(xmlConfigFileDefault))
             return xmlConfigFile_ = ov::util::from_file_path(xmlConfigFileDefault);
-
-        OPENVINO_THROW("Failed to find plugins.xml file");
     }
     return xmlConfigFile_;
 }
 
-#endif  // OPENVINO_STATIC_LIBRARY
-
 #define OV_CORE_CALL_STATEMENT(...)             \
     try {                                       \
         __VA_ARGS__;                            \
@@ -81,13 +72,13 @@ class Core::Impl : public CoreImpl {
 Core::Core(const std::string& xml_config_file) {
     _impl = std::make_shared<Impl>();
 
-#ifdef OPENVINO_STATIC_LIBRARY
-    OV_CORE_CALL_STATEMENT(_impl->register_plugins_in_registry(::getStaticPluginsRegistry());)
-#else
-    OV_CORE_CALL_STATEMENT(
-        // If XML is default, load default plugins by absolute paths
-        _impl->register_plugins_in_registry(findPluginXML(xml_config_file), xml_config_file.empty());)
-#endif
+    std::string xmlConfigFile = ov::findPluginXML(xml_config_file);
+    if (!xmlConfigFile.empty())
+        OV_CORE_CALL_STATEMENT(
+            // If XML is default, load default plugins by absolute paths
+            _impl->register_plugins_in_registry(xmlConfigFile, xml_config_file.empty());)
+    // Load plugins from the pre-compiled list
+    OV_CORE_CALL_STATEMENT(_impl->register_compile_time_plugins();)
 }
 
 std::map<std::string, Version> Core::get_versions(const std::string& device_name) const {
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index ed39bc67f1f94e..d97a89f8f79411 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -34,6 +34,7 @@
 #include "openvino/runtime/remote_context.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/util/common_util.hpp"
+#include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "preprocessing/preprocessing.hpp"
 #include "xml_parse_utils.h"
@@ -311,6 +312,39 @@ ov::CoreImpl::CoreImpl(bool _newAPI) : m_new_api(_newAPI) {
     }
 }
 
+void ov::CoreImpl::register_compile_time_plugins() {
+    std::lock_guard<std::mutex> lock(get_mutex());
+
+    const decltype(::getCompiledPluginsRegistry())& plugins = getCompiledPluginsRegistry();
+#ifdef OPENVINO_STATIC_LIBRARY
+    for (const auto& plugin : plugins) {
+        const auto& deviceName = plugin.first;
+        if (deviceName.find('.') != std::string::npos) {
+            OPENVINO_THROW("Device name must not contain dot '.' symbol");
+        }
+        if (pluginRegistry.find(deviceName) == pluginRegistry.end()) {
+            const auto& value = plugin.second;
+            ov::AnyMap config = any_copy(value.m_default_config);
+            PluginDescriptor desc{value.m_create_plugin_func, config, value.m_create_extension_func};
+            pluginRegistry[deviceName] = desc;
+            add_mutex(deviceName);
+        }
+    }
+#else
+    for (const auto& plugin : plugins) {
+        const auto& deviceName = plugin.first;
+        const auto& pluginPath = ov::util::get_compiled_plugin_path(plugin.second.m_plugin_path);
+
+        if (pluginRegistry.find(deviceName) == pluginRegistry.end() && ov::util::file_exists(pluginPath)) {
+            ov::AnyMap config = any_copy(plugin.second.m_default_config);
+            PluginDescriptor desc{pluginPath, config};
+            pluginRegistry[deviceName] = desc;
+            add_mutex(deviceName);
+        }
+    }
+#endif
+}
+
 void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_file, const bool& by_abs_path) {
     std::lock_guard<std::mutex> lock(get_mutex());
 
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 2277d70b9d0acd..8fe7768dc6c91a 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -15,6 +15,7 @@
 #include "ie_cache_manager.hpp"
 #include "ie_extension.h"
 #include "ie_icore.hpp"
+#include "ie_plugins.hpp"
 #include "multi-device/multi_device_config.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/core/extension.hpp"
@@ -22,10 +23,7 @@
 #include "openvino/runtime/common.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
-
-#ifdef OPENVINO_STATIC_LIBRARY
-#    include "ie_plugins.hpp"
-#endif
+#include "openvino/util/file_util.hpp"
 
 namespace ov {
 
@@ -48,16 +46,13 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const AnyMap& co
  *
  * @param device_name Target device
  * @param device_name_to_parse Device ID of property
- * @return true if ov::device::properties(<device_name_to_parse>, ...) is applicable for device identified by 'device_name
+ * @return true if ov::device::properties(<device_name_to_parse>, ...) is applicable for device identified by
+ * 'device_name
  */
 bool is_config_applicable(const std::string& device_name, const std::string& device_name_to_parse);
 
-#ifndef OPENVINO_STATIC_LIBRARY
-
 std::string findPluginXML(const std::string& xmlFile);
 
-#endif
-
 class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_this<InferenceEngine::ICore> {
 private:
     mutable std::map<std::string, ov::Plugin> plugins;
@@ -94,8 +89,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
 
         // Creating thread-safe copy of config including shared_ptr to ICacheManager
         // Passing empty or not-existing name will return global cache config
-        CacheConfig get_cache_config_for_device(const ov::Plugin& plugin,
-                                                ov::AnyMap& parsedConfig) const;
+        CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const;
 
     private:
         mutable std::mutex _cacheConfigMutex;
@@ -158,16 +152,17 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
     const bool m_new_api;
 
     ov::SoPtr<ov::ICompiledModel> compile_model_and_cache(const std::shared_ptr<const ov::Model>& model,
-                                                     ov::Plugin& plugin,
-                                                     const ov::AnyMap& parsedConfig,
-                                                     const ov::RemoteContext& context,
-                                                     const CacheContent& cacheContent) const;
+                                                          ov::Plugin& plugin,
+                                                          const ov::AnyMap& parsedConfig,
+                                                          const ov::RemoteContext& context,
+                                                          const CacheContent& cacheContent) const;
 
-    static ov::SoPtr<ov::ICompiledModel> load_model_from_cache(const CacheContent& cacheContent,
-                                                               ov::Plugin& plugin,
-                                                               const ov::AnyMap& config,
-                                                               const ov::RemoteContext& context,
-                                                               std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda);
+    static ov::SoPtr<ov::ICompiledModel> load_model_from_cache(
+        const CacheContent& cacheContent,
+        ov::Plugin& plugin,
+        const ov::AnyMap& config,
+        const ov::RemoteContext& context,
+        std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda);
 
     bool device_supports_import_export(const ov::Plugin& plugin) const;
 
@@ -177,12 +172,11 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
     bool device_supports_cache_dir(const ov::Plugin& plugin) const;
 
     ov::SoPtr<ov::ICompiledModel> compile_model_with_preprocess(ov::Plugin& plugin,
-                                                const std::shared_ptr<const ov::Model>& model,
-                                                const ov::RemoteContext& context,
-                                                const ov::AnyMap& config) const;
+                                                                const std::shared_ptr<const ov::Model>& model,
+                                                                const ov::RemoteContext& context,
+                                                                const ov::AnyMap& config) const;
 
-    ov::AnyMap create_compile_config(const ov::Plugin& plugin,
-                                     const ov::AnyMap& origConfig) const;
+    ov::AnyMap create_compile_config(const ov::Plugin& plugin, const ov::AnyMap& origConfig) const;
 
     // Legacy API
     void AddExtensionUnsafe(const InferenceEngine::IExtensionPtr& extension) const;
@@ -218,30 +212,10 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
                              std::string& deviceName,
                              ov::AnyMap& config) const;
 
-#ifdef OPENVINO_STATIC_LIBRARY
-
-    /**
-     * @brief Register plugins for devices using statically defined configuration
-     * @note The function supports UNICODE path
-     * @param static_registry a statically defined configuration with device / plugin information
+    /*
+     * @brief Register plugins according to the build configuration
      */
-    void register_plugins_in_registry(const decltype(::getStaticPluginsRegistry())& static_registry) {
-        std::lock_guard<std::mutex> lock(get_mutex());
-
-        for (const auto& plugin : static_registry) {
-            const auto& deviceName = plugin.first;
-            if (deviceName.find('.') != std::string::npos) {
-                IE_THROW() << "Device name must not contain dot '.' symbol";
-            }
-            const auto& value = plugin.second;
-            ov::AnyMap config = any_copy(value.m_default_config);
-            PluginDescriptor desc{value.m_create_plugin_func, config, value.m_create_extension_func};
-            pluginRegistry[deviceName] = desc;
-            add_mutex(deviceName);
-        }
-    }
-
-#endif
+    void register_compile_time_plugins();
 
     //
     // ICore public API
diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp
index cc138a0f13d17a..de604f6fab4f21 100644
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@@ -30,6 +30,7 @@
 #include "ie_network_reader.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "ie_plugin_config.hpp"
+#include "ie_plugins.hpp"
 #include "ie_remote_context.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/ngraph.hpp"
@@ -47,10 +48,6 @@
 #include "so_extension.hpp"
 #include "xml_parse_utils.h"
 
-#ifdef OPENVINO_STATIC_LIBRARY
-#    include "ie_plugins.hpp"
-#endif
-
 using namespace InferenceEngine::PluginConfigParams;
 using namespace InferenceEngine;
 using namespace std::placeholders;
@@ -91,13 +88,12 @@ class Core::Impl : public ov::CoreImpl {
 Core::Core(const std::string& xmlConfigFile) {
     _impl = std::make_shared<Impl>();
 
-#ifdef OPENVINO_STATIC_LIBRARY
-    _impl->register_plugins_in_registry(::getStaticPluginsRegistry());
-#else
-    // If XML is default, load default plugins by absolute paths
-    auto loadByAbsPath = xmlConfigFile.empty();
-    _impl->register_plugins_in_registry(ov::findPluginXML(xmlConfigFile), loadByAbsPath);
-#endif
+    std::string xmlConfigFile_ = ov::findPluginXML(xmlConfigFile);
+    if (!xmlConfigFile_.empty())
+        // If XML is default, load default plugins by absolute paths
+        _impl->register_plugins_in_registry(xmlConfigFile_, xmlConfigFile.empty());
+    // Load plugins from pre-compiled list
+    _impl->register_compile_time_plugins();
 }
 
 std::map<std::string, Version> Core::GetVersions(const std::string& deviceName) const {

From 8eb142ca6ead9ac08e15741d554e7bb061339e0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Wed, 22 Mar 2023 17:00:53 +0100
Subject: [PATCH 041/296] Interpolate v11 -> v4 downgrade transformation
 (#16448)

---
 .../convert_interpolate11_downgrade.hpp       |  24 +++
 .../common_optimizations.cpp                  |   2 +
 .../convert_interpolate11_downgrade.cpp       |  75 +++++++++
 .../convert_interpolate11_downgrade_test.cpp  | 147 ++++++++++++++++++
 src/core/src/op/interpolate.cpp               |  15 ++
 src/core/tests/type_prop/interpolate.cpp      |  22 +++
 6 files changed, 285 insertions(+)
 create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp
 create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp
 create mode 100644 src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp

diff --git a/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp
new file mode 100644
index 00000000000000..b112c5d8abdf45
--- /dev/null
+++ b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Converts Interpolate version 11 to Interpolate version 4 if the new op uses any of the v4 allowed
+ *        interpolation modes.
+ */
+class TRANSFORMATIONS_API ConvertInterpolate11ToInterpolate4 : public MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertInterpolate11ToInterpolate4", "0");
+    ConvertInterpolate11ToInterpolate4();
+};
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index 8b43dcfc8d2b29..6064effe880c4b 100644
--- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -76,6 +76,7 @@
 #include "transformations/op_conversions/convert_gather_downgrade.hpp"
 #include "transformations/op_conversions/convert_gather_upgrade.hpp"
 #include "transformations/op_conversions/convert_gelu.hpp"
+#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp"
 #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
 #include "transformations/op_conversions/convert_maxpool_downgrade.hpp"
 #include "transformations/op_conversions/convert_maxpool_upgrade.hpp"
@@ -211,6 +212,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
     REGISTER_PASS(manager, ConvertMulticlassNms8ToMulticlassNms9)
     REGISTER_PASS(manager, ConvertXorToLogicalXor)
     REGISTER_PASS(manager, ConvertTopK11ToTopK3)
+    REGISTER_PASS(manager, ConvertInterpolate11ToInterpolate4)
 
     auto fq_fusions = manager.register_pass<GraphRewrite>();
     ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion)
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp
new file mode 100644
index 00000000000000..c9b2e15dd4cfaf
--- /dev/null
+++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp"
+
+#include <array>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include <openvino/opsets/opset11.hpp>
+#include <openvino/opsets/opset4.hpp>
+
+#include "itt.hpp"
+
+ov::pass::ConvertInterpolate11ToInterpolate4::ConvertInterpolate11ToInterpolate4() {
+    MATCHER_SCOPE(ConvertInterpolate11ToInterpolate4);
+
+    const auto interpolate_v11_pattern = pattern::wrap_type<opset11::Interpolate>();
+
+    const matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto v4_compatible_interpolation_mode = [](const op::util::InterpolateBase::InterpolateMode mode) {
+            constexpr std::array<op::util::InterpolateBase::InterpolateMode, 4> allowed_modes = {
+                op::util::InterpolateBase::InterpolateMode::NEAREST,
+                op::util::InterpolateBase::InterpolateMode::LINEAR,
+                op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX,
+                op::util::InterpolateBase::InterpolateMode::CUBIC};
+
+            return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes);
+        };
+
+        const auto interpolate_v11 = std::dynamic_pointer_cast<opset11::Interpolate>(m.get_match_root());
+        if (!interpolate_v11 || !v4_compatible_interpolation_mode(interpolate_v11->get_attrs().mode) ||
+            transformation_callback(interpolate_v11)) {
+            return false;
+        }
+
+        // downgrade only if the interpolation mode used to create v11 is supported by v4
+        std::shared_ptr<ov::opset4::Interpolate> interpolate_v4;
+        ov::Output<ov::Node> v4_input_output_shape;
+        ov::Output<ov::Node> v4_input_scales;
+
+        if (interpolate_v11->get_attrs().shape_calculation_mode ==
+            ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) {
+            v4_input_scales = interpolate_v11->input_value(1);
+            v4_input_output_shape = opset4::Constant::create(element::i32, Shape{1}, {1});
+            copy_runtime_info(interpolate_v11, v4_input_output_shape.get_node_shared_ptr());
+        } else {
+            v4_input_output_shape = interpolate_v11->input_value(1);
+            v4_input_scales = opset4::Constant::create(element::f32, Shape{1}, {1.0f});
+            copy_runtime_info(interpolate_v11, v4_input_scales.get_node_shared_ptr());
+        }
+
+        if (interpolate_v11->get_input_size() == 3) {  // with axes input
+            interpolate_v4 = std::make_shared<ov::opset4::Interpolate>(interpolate_v11->input_value(0),
+                                                                       v4_input_output_shape,
+                                                                       v4_input_scales,
+                                                                       interpolate_v11->input_value(2),
+                                                                       interpolate_v11->get_attrs());
+        } else {
+            interpolate_v4 = std::make_shared<ov::opset4::Interpolate>(interpolate_v11->input_value(0),
+                                                                       v4_input_output_shape,
+                                                                       v4_input_scales,
+                                                                       interpolate_v11->get_attrs());
+        }
+
+        interpolate_v4->set_friendly_name(interpolate_v11->get_friendly_name());
+        copy_runtime_info(interpolate_v11, interpolate_v4);
+        replace_node(interpolate_v11, interpolate_v4);
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(interpolate_v11_pattern, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp
new file mode 100644
index 00000000000000..7504cd378ebba6
--- /dev/null
+++ b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp
@@ -0,0 +1,147 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/opsets/opset11.hpp>
+#include <openvino/opsets/opset4.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/op_conversions/convert_interpolate11_downgrade.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+
+namespace {
+constexpr bool WITH_AXES = true;
+constexpr bool WITHOUT_AXES = false;
+
+std::shared_ptr<ov::Model> create_v11_model(const bool with_axes,
+                                            const ov::opset11::Interpolate::ShapeCalcMode shape_calc_mode) {
+    auto attributes = ov::opset11::Interpolate::InterpolateAttrs{};
+    attributes.shape_calculation_mode = shape_calc_mode;
+    attributes.pads_begin = {0, 0};
+    attributes.pads_end = {0, 0};
+
+    const auto input = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{1, 2, 10, 10});
+    std::shared_ptr<ov::opset11::Parameter> scales_or_sizes;
+    std::shared_ptr<ov::opset11::Interpolate> interpolate;
+
+    const size_t num_scales_or_sizes = with_axes ? 2 : 4;
+    if (shape_calc_mode == ov::opset11::Interpolate::ShapeCalcMode::SCALES) {
+        scales_or_sizes = std::make_shared<ov::opset11::Parameter>(ov::element::f32, ov::Shape{num_scales_or_sizes});
+    } else {
+        scales_or_sizes = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{num_scales_or_sizes});
+    }
+
+    ov::ParameterVector model_params;
+    model_params.push_back(input);
+    model_params.push_back(scales_or_sizes);
+    if (with_axes) {
+        const auto axes = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{2});
+        model_params.push_back(axes);
+        interpolate = std::make_shared<ov::opset11::Interpolate>(input, scales_or_sizes, axes, attributes);
+    } else {
+        interpolate = std::make_shared<ov::opset11::Interpolate>(input, scales_or_sizes, attributes);
+    }
+    interpolate->set_friendly_name("interpolate11");
+
+    return std::make_shared<ov::Model>(interpolate->outputs(), model_params);
+}
+
+std::shared_ptr<ov::Model> create_v4_model(const bool with_axes,
+                                           const ov::opset4::Interpolate::ShapeCalcMode shape_calc_mode) {
+    auto attributes = ov::opset4::Interpolate::InterpolateAttrs{};
+    attributes.shape_calculation_mode = shape_calc_mode;
+    attributes.pads_begin = {0, 0};
+    attributes.pads_end = {0, 0};
+
+    const auto input = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{1, 2, 10, 10});
+    std::shared_ptr<ov::Node> output_shape;
+    std::shared_ptr<ov::Node> scales;
+    std::shared_ptr<ov::opset4::Interpolate> interpolate;
+
+    ov::ParameterVector model_params;
+    model_params.push_back(input);
+
+    const size_t num_scales_or_sizes = with_axes ? 2 : 4;
+    if (shape_calc_mode == ov::opset4::Interpolate::ShapeCalcMode::SCALES) {
+        scales = std::make_shared<ov::opset4::Parameter>(ov::element::f32, ov::Shape{num_scales_or_sizes});
+        model_params.push_back(std::dynamic_pointer_cast<ov::opset4::Parameter>(scales));
+        output_shape = ov::opset4::Constant::create(ov::element::i32, ov::Shape{1}, {1});
+
+    } else {
+        output_shape = std::make_shared<ov::opset4::Parameter>(ov::element::i32, ov::Shape{num_scales_or_sizes});
+        model_params.push_back(std::dynamic_pointer_cast<ov::opset4::Parameter>(output_shape));
+        scales = ov::opset4::Constant::create(ov::element::f32, ov::Shape{1}, {1.0f});
+    }
+
+    if (with_axes) {
+        const auto axes = std::make_shared<ov::opset4::Parameter>(ov::element::i32, ov::Shape{2});
+        model_params.push_back(axes);
+        interpolate = std::make_shared<ov::opset4::Interpolate>(input, output_shape, scales, axes, attributes);
+    } else {
+        interpolate = std::make_shared<ov::opset4::Interpolate>(input, output_shape, scales, attributes);
+    }
+    interpolate->set_friendly_name("interpolate11");
+
+    return std::make_shared<ov::Model>(interpolate->outputs(), model_params);
+}
+
+}  // namespace
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales) {
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+    function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES);
+    function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES);
+}
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes) {
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+    function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES);
+    function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES);
+}
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales_no_axes) {
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+    function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES);
+    function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES);
+}
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes_no_axes) {
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+    function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES);
+    function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES);
+}
+
+namespace {
+std::shared_ptr<ov::Model> create_non_downgradeable_model(const ov::opset11::Interpolate::InterpolateMode mode) {
+    auto attributes = ov::opset11::Interpolate::InterpolateAttrs{};
+    attributes.mode = mode;
+    attributes.shape_calculation_mode = ov::opset11::Interpolate::ShapeCalcMode::SCALES;
+    attributes.pads_begin = {0, 0};
+    attributes.pads_end = {0, 0};
+
+    const auto input = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{1, 2, 10, 10});
+    const auto scales = std::make_shared<ov::opset11::Parameter>(ov::element::f32, ov::Shape{2});
+    const auto axes = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{2});
+
+    const auto interpolate = std::make_shared<ov::opset11::Interpolate>(input, scales, axes, attributes);
+    interpolate->set_friendly_name("interpolate11");
+
+    return std::make_shared<ov::Model>(interpolate->outputs(), ov::ParameterVector{input, scales, axes});
+}
+}  // namespace
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bicubic_pillow) {
+    function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BICUBIC_PILLOW);
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+}
+
+TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bilinear_pillow) {
+    function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BILINEAR_PILLOW);
+    manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
+}
diff --git a/src/core/src/op/interpolate.cpp b/src/core/src/op/interpolate.cpp
index 6bfd961fc35de8..b34d39bc60ec63 100644
--- a/src/core/src/op/interpolate.cpp
+++ b/src/core/src/op/interpolate.cpp
@@ -186,6 +186,21 @@ void ov::op::v4::Interpolate::validate_and_infer_types() {
         input_shapes = {input_shape, target_spatial_shape, scales, axes};
     }
 
+    const auto interpolation_mode_check = [](const op::util::InterpolateBase::InterpolateMode mode) {
+        constexpr std::array<op::util::InterpolateBase::InterpolateMode, 4> allowed_modes = {
+            op::util::InterpolateBase::InterpolateMode::NEAREST,
+            op::util::InterpolateBase::InterpolateMode::LINEAR,
+            op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX,
+            op::util::InterpolateBase::InterpolateMode::CUBIC};
+
+        return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes);
+    };
+
+    NODE_VALIDATION_CHECK(this,
+                          interpolation_mode_check(m_attrs.mode),
+                          "Unsupported interpolation mode used with version 4 of the Interpolate op: ",
+                          as_string(m_attrs.mode));
+
     util::correct_pads_attr(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes);
     shape_infer(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes, output_shapes, {});
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
diff --git a/src/core/tests/type_prop/interpolate.cpp b/src/core/tests/type_prop/interpolate.cpp
index b220ecd8a8f754..7f0f5ff3a5bb68 100644
--- a/src/core/tests/type_prop/interpolate.cpp
+++ b/src/core/tests/type_prop/interpolate.cpp
@@ -214,6 +214,28 @@ TEST(type_prop, interpolate_v4_interval_logic) {
     ASSERT_TRUE(interp->get_output_partial_shape(0).same_scheme(out_shape));
 }
 
+TEST(type_prop, interpolate_v4_incorrect_mode) {
+    const auto image = std::make_shared<op::Parameter>(element::f32, Shape{1, 3, 30, 60});
+    const auto target_shape = std::make_shared<op::Parameter>(element::i32, Shape{2});
+    const auto scales = op::Constant::create<float>(element::f32, Shape{2}, {6.f, 12.f});
+    const auto axes = op::Constant::create<int64_t>(element::i64, Shape{2}, {2, 3});
+
+    ov::op::util::InterpolateBase::InterpolateAttrs attrs;
+    attrs.shape_calculation_mode = ov::op::util::InterpolateBase::ShapeCalcMode::SCALES;
+    attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BICUBIC_PILLOW;
+    attrs.pads_begin = {0, 0, 0, 0};
+    attrs.pads_end = {0, 0, 0, 0};
+
+    OV_EXPECT_THROW(auto interp = std::make_shared<ov::op::v4::Interpolate>(image, target_shape, scales, axes, attrs),
+                    ov::NodeValidationFailure,
+                    HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op"));
+
+    attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BILINEAR_PILLOW;
+    OV_EXPECT_THROW(auto interp = std::make_shared<ov::op::v4::Interpolate>(image, target_shape, scales, axes, attrs),
+                    ov::NodeValidationFailure,
+                    HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op"));
+}
+
 TEST(type_prop, interpolate_v11_scales) {
     const auto image = std::make_shared<op::Parameter>(element::f32, Shape{1, 3, 30, 60});
     const auto scales = op::Constant::create<float>(element::f32, Shape{2}, {0.2f, 0.2f});

From 6ac5e42b62ef065ecf6b553b2a9d662cea5a5b03 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Wed, 22 Mar 2023 20:07:47 +0400
Subject: [PATCH 042/296] [CONFORMANCE] Fix if impossible to remove log
 (#16485)

* fix_reporting

* w/a for remove

* Update merge_xmls.py

remove extra
---
 .../functional_test_utils/layer_tests_summary/merge_xmls.py  | 3 +--
 .../layer_tests_summary/run_parallel.py                      | 5 ++++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
index e32ecf275b846a..5e1e8d01779363 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
@@ -70,7 +70,7 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re
             aggregated_device_results = aggregated_results.find(xml_device_entry.tag)
             if aggregated_device_results is None:
                 aggregated_results.append(xml_device_entry)
-                continue
+                aggregated_device_results = aggregated_results.find(xml_device_entry.tag)
             # op or api_type
             for xml_results_entry in xml_device_entry:
                 aggregated_results_entry = aggregated_device_results.find(xml_results_entry.tag)
@@ -88,7 +88,6 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re
                             aggregated_results_entry.append(xml_real_device_entry)
                             continue
                         update_result_node(xml_real_device_entry, aggregated_real_device_api_report)
-                a = 1
     return aggregated_timestamp
 
 
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
index 52cad8139552cc..62e7111372ea3f 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
@@ -199,7 +199,10 @@ def __replace_restricted_symbols(input_string:str):
     def __get_test_list_by_runtime(self):
         test_list_file_name = os.path.join(self._working_dir, "test_list.lst")
         if os.path.isfile(test_list_file_name):
-            os.remove(test_list_file_name)
+            try:
+                os.remove(test_list_file_name)
+            except Exception as err:
+                logger.warning(f"Imposible to remove {test_list_file_name}. Error: {err}")
         command_to_get_test_list = self._command + f' --gtest_list_tests >> {test_list_file_name}'
         logger.info(f"Get test list using command: {command_to_get_test_list}")
         run_res = run(command_to_get_test_list, check=True, shell=True)

From 5290822f8b0b3df91a083649867fbbe45c51cc78 Mon Sep 17 00:00:00 2001
From: Yury Gaydaychuk <yury.gaydaychuk@intel.com>
Date: Wed, 22 Mar 2023 17:36:05 +0100
Subject: [PATCH 043/296] [CPU] Enabled BatchToSpace and SpaceToBatch with
 nonconstant inputs support (#16344)

---
 .../intel_cpu/src/nodes/batch_to_space.cpp    |  46 +++--
 .../intel_cpu/src/nodes/batch_to_space.h      |   1 +
 .../intel_cpu/src/nodes/space_to_batch.cpp    |  45 +++--
 .../intel_cpu/src/nodes/space_to_batch.h      |   1 +
 .../single_layer_tests/batch_to_space.cpp     | 189 +++++++++++++-----
 .../single_layer_tests/space_to_batch.cpp     | 167 +++++++++++++---
 6 files changed, 331 insertions(+), 118 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp
index 9aab08d021fecb..804f79d507d70d 100644
--- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp
+++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp
@@ -24,12 +24,6 @@ bool BatchToSpace::isSupportedOperation(const std::shared_ptr<const ngraph::Node
             errorMessage = "Only opset2 BatchToSpace operation is supported";
             return false;
         }
-        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)) == nullptr ||
-            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2)) == nullptr ||
-            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(3)) == nullptr) {
-            errorMessage = "Only constant 'block_shape', 'crops_begin', 'crops_end' are supported";
-            return false;
-        }
     } catch (...) {
         return false;
     }
@@ -54,9 +48,6 @@ BatchToSpace::BatchToSpace(const std::shared_ptr<ngraph::Node>& op, const GraphC
         IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size();
     if (inDims.size() != outDims.size())
         IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions";
-
-    blockShapeIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<size_t>();
-    cropsBeginIn  = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
 }
 
 void BatchToSpace::initSupportedPrimitiveDescriptors() {
@@ -70,30 +61,30 @@ void BatchToSpace::initSupportedPrimitiveDescriptors() {
         IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
 
     addSupportedPrimDesc({{LayoutType::nspc, precision},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp}},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
                          {{LayoutType::nspc, precision}},
                          impl_desc_type::ref_any);
     addSupportedPrimDesc({{LayoutType::ncsp, precision},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp}},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
                          {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
     if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 8 == 0) {
         addSupportedPrimDesc({{LayoutType::nCsp8c, precision},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp}},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32}},
                              {{LayoutType::nCsp8c, precision}},
                              impl_desc_type::ref_any);
     }
     if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 16 == 0) {
         addSupportedPrimDesc({{LayoutType::nCsp16c, precision},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp}},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32}},
                              {{LayoutType::nCsp16c, precision}},
                              impl_desc_type::ref_any);
     }
@@ -112,6 +103,19 @@ static std::vector<size_t> getShape5D(const SizeVector &shape) {
 template<typename T>
 void BatchToSpace::batchToSpaceKernel() {
     const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const auto *blockShapesPtr = reinterpret_cast<int *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
+    size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank();
+    blockShapeIn.clear();
+    for (size_t i = 0; i < dataRank; i++) {
+        blockShapeIn.push_back(*(blockShapesPtr + i));
+    }
+
+    const auto *padsBeginPtr = reinterpret_cast<int *>(getParentEdgeAt(2)->getMemoryPtr()->GetPtr());
+    cropsBeginIn.clear();
+    for (size_t i = 0; i < dataRank; i++) {
+        cropsBeginIn.push_back(*(padsBeginPtr + i));
+    }
+
     auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
     const auto &inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims();
diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.h b/src/plugins/intel_cpu/src/nodes/batch_to_space.h
index 8d3ff90d3a7b20..430893f4689060 100644
--- a/src/plugins/intel_cpu/src/nodes/batch_to_space.h
+++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.h
@@ -24,6 +24,7 @@ class BatchToSpace : public Node {
     bool created() const override;
 
     bool needPrepareParams() const override { return false; };
+    bool needShapeInfer() const override {return true;};
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp
index 3389053df33501..4136724303f40f 100644
--- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp
+++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp
@@ -26,12 +26,6 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ngraph::Node
             errorMessage = "Only opset2 SpaceToBatch operation is supported";
             return false;
         }
-        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)) == nullptr ||
-            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2)) == nullptr ||
-            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(3)) == nullptr) {
-            errorMessage = "Only constant 'block_shape', 'pads_begin', 'pads_end' are supported";
-            return false;
-        }
     } catch (...) {
         return false;
     }
@@ -56,8 +50,6 @@ SpaceToBatch::SpaceToBatch(const std::shared_ptr<ngraph::Node>& op, const GraphC
         IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << srcRank;
     if (srcRank != dstRank)
         IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions";
-    blockShapeIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<size_t>();
-    padsBeginIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
 }
 
 void SpaceToBatch::initSupportedPrimitiveDescriptors() {
@@ -71,30 +63,30 @@ void SpaceToBatch::initSupportedPrimitiveDescriptors() {
         IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
 
     addSupportedPrimDesc({{LayoutType::nspc, precision},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp}},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
                          {{LayoutType::nspc, precision}},
                          impl_desc_type::ref_any);
     addSupportedPrimDesc({{LayoutType::ncsp, precision},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp},
-                          {LayoutType::ncsp}},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
                          {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
     if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 8 == 0) {
         addSupportedPrimDesc({{LayoutType::nCsp8c, precision},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp}},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32}},
                              {{LayoutType::nCsp8c, precision}},
                              impl_desc_type::ref_any);
     }
     if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 16 == 0) {
         addSupportedPrimDesc({{LayoutType::nCsp16c, precision},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp},
-                              {LayoutType::ncsp}},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32},
+                              {LayoutType::ncsp, Precision::I32}},
                              {{LayoutType::nCsp16c, precision}},
                              impl_desc_type::ref_any);
     }
@@ -112,6 +104,19 @@ static std::vector<size_t> getShape5D(const SizeVector &shape) {
 
 template<typename T>
 void SpaceToBatch::SpaceToBatchKernel() {
+    const auto *blockShapesPtr = reinterpret_cast<int *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
+    size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank();
+    blockShapeIn.clear();
+    for (size_t i = 0; i < dataRank; i++) {
+        blockShapeIn.push_back(*(blockShapesPtr + i));
+    }
+
+    const auto *padsBeginPtr = reinterpret_cast<int *>(getParentEdgeAt(2)->getMemoryPtr()->GetPtr());
+    padsBeginIn.clear();
+    for (size_t i = 0; i < dataRank; i++) {
+        padsBeginIn.push_back(*(padsBeginPtr + i));
+    }
+
     const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.h b/src/plugins/intel_cpu/src/nodes/space_to_batch.h
index ab7bb40b7c97a2..ccfa0d853d4be2 100644
--- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h
+++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h
@@ -24,6 +24,7 @@ class SpaceToBatch : public Node {
     bool created() const override;
 
     bool needPrepareParams() const override { return false; };
+    bool needShapeInfer() const override {return true;};
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
index 8f62cc50b342fb..aa248f85e29447 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <common_test_utils/ov_tensor_utils.hpp>
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
@@ -13,6 +14,11 @@ using namespace ov::test;
 
 namespace CPULayerTestsDefinitions  {
 
+namespace {
+    std::vector<int64_t> blockShape, cropsBegin, cropsEnd;
+    ngraph::Shape paramShape;
+}  // namespace
+
 using BatchToSpaceLayerTestCPUParams = std::tuple<
         std::vector<InputShape>,            // Input shapes
         std::vector<int64_t>,               // block shape
@@ -26,7 +32,6 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<BatchToSpaceLayerTestCPUParams> &obj) {
         std::vector<InputShape> inputShapes;
-        std::vector<int64_t> blockShape, cropsBegin, cropsEnd;
         Precision netPrecision;
         CPUSpecificParams cpuParams;
         std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = obj.param;
@@ -53,21 +58,51 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
         return result.str();
     }
 
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); i++) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+            if (i == 0) {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+            } else if (i == 1) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < blockShape.size(); j++) {
+                    dataPtr[j] = blockShape[j];
+                }
+            } else if (i == 2) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < cropsBegin.size(); j++) {
+                    dataPtr[j] = cropsBegin[j];
+                }
+            } else if (i == 3) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < cropsEnd.size(); j++) {
+                    dataPtr[j] = cropsEnd[j];
+                }
+            }
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
 protected:
     void SetUp() override {
         targetDevice = CommonTestUtils::DEVICE_CPU;
 
         std::vector<InputShape>  inputShapes;
-        std::vector<int64_t> blockShape, cropsBegin, cropsEnd;
         Precision netPrecision;
         CPUSpecificParams cpuParams;
         std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
         auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        inType = outType = ngPrec;
 
-        init_input_shapes(inputShapes);
+        const std::vector<InputShape> inputShapesVec{inputShapes};
+        init_input_shapes(inputShapesVec);
 
         if (strcmp(netPrecision.name(), "U8") == 0)
             selectedType = std::string("ref_any_") + "I8";
@@ -76,9 +111,21 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface<BatchToSpace
 
         auto params = ngraph::builder::makeDynamicParams(ngPrec, {inputDynamicShapes.front()});
         auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        auto b2s = ngraph::builder::makeBatchToSpace(paramOuts[0], ngPrec, blockShape, cropsBegin, cropsEnd);
-        b2s->get_rt_info() = getCPUInfo();
-        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(b2s)};
+        paramShape = {paramOuts[0].get_partial_shape().size()};
+
+        std::shared_ptr<ov::Node> in2, in3, in4;
+        auto blockShapeParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        in2 = blockShapeParam;
+        params.push_back(blockShapeParam);
+        auto cropsBeginParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        params.push_back(cropsBeginParam);
+        in3 = cropsBeginParam;
+        auto cropsEndParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        params.push_back(cropsEndParam);
+        in4 = cropsEndParam;
+        auto btsNode = std::make_shared<ngraph::opset2::BatchToSpace>(paramOuts[0], in2, in3, in4);
+        btsNode->get_rt_info() = getCPUInfo();
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(btsNode)};
         function = std::make_shared<ngraph::Function>(results, params, "BatchToSpace");
     }
 };
@@ -103,20 +150,31 @@ const std::vector<std::vector<int64_t>> cropsBegin4D1  = {{0, 0, 0, 0}, {0, 0, 0
 const std::vector<std::vector<int64_t>> cropsEnd4D1    = {{0, 0, 0, 0}, {0, 0, 1, 0}, {0, 0, 1, 1}};
 
 std::vector<std::vector<ov::Shape>> staticInputShapes4D1 = {
-        {{8, 16, 10, 10}}
+        {{8, 16, 10, 10}, {4}, {4}, {4}}
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes4D1 = {
-        {
-                {{{-1, -1, -1, -1}, {{8, 8, 6, 7}, {4, 10, 5, 5}, {12, 9, 7, 5}}}},
-                {{{{4, 12}, {8, 16}, 6, -1}, {{8, 8, 6, 7}, {4, 10, 6, 5}, {12, 9, 6, 5}}}}
-        }
+    {
+        {{-1, -1, -1, -1}, {{8, 8, 6, 7}, {4, 10, 5, 5}, {12, 9, 7, 5}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    },
+    {
+        {{{4, 12}, {8, 16}, 6, -1}, {{8, 8, 6, 7}, {4, 10, 6, 5}, {12, 9, 6, 5}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes4D1Blocked = {
-        {
-                {{{-1, 16, -1, -1}, {{4, 16, 5, 8}, {8, 16, 7, 6}, {12, 16, 4, 5}}}}
-        }
+    {
+        {{-1, 16, -1, -1}, {{4, 16, 5, 8}, {8, 16, 7, 6}, {12, 16, 4, 5}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
 const std::vector<std::vector<int64_t>> blockShape4D2  = {{1, 2, 3, 4}, {1, 3, 4, 2}};
@@ -124,20 +182,31 @@ const std::vector<std::vector<int64_t>> cropsBegin4D2  = {{0, 0, 0, 1}, {0, 0, 1
 const std::vector<std::vector<int64_t>> cropsEnd4D2    = {{0, 0, 1, 0}, {0, 0, 3, 1}};
 
 std::vector<std::vector<ov::Shape>> staticInputShapes4D2 = {
-        {{24, 16, 7, 8}}
+        {{24, 16, 7, 8}, {4}, {4}, {4}}
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes4D2 = {
-        {
-                {{{-1, -1, -1, -1}, {{48, 4, 7, 8}, {24, 8, 6, 7}, {24, 16, 5, 5}}}},
-                 {{{24, {4, 10}, -1, -1}, {{24, 8, 6, 7}, {24, 6, 7, 5}, {24, 4, 5, 5}}}}
-        }
+    {
+        {{-1, -1, -1, -1}, {{48, 4, 7, 8}, {24, 8, 6, 7}, {24, 16, 5, 5}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    },
+    {
+        {{24, {4, 10}, -1, -1}, {{24, 8, 6, 7}, {24, 6, 7, 5}, {24, 4, 5, 5}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes4D2Blocked = {
-        {
-                 {{-1, 16, -1, -1}, {{24, 16, 5, 5}, {24, 16, 6, 7}, {48, 16, 4, 4}}}
-        }
+    {
+        {{-1, 16, -1, -1}, {{24, 16, 5, 5}, {24, 16, 6, 7}, {48, 16, 4, 4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
 const std::vector<CPUSpecificParams> cpuParamsWithBlock_4D = {
@@ -223,20 +292,32 @@ const std::vector<std::vector<int64_t>> cropsBegin5D1  = {{0, 0, 0, 0, 0}, {0, 0
 const std::vector<std::vector<int64_t>> cropsEnd5D1    = {{0, 0, 0, 0, 0}, {0, 0, 1, 0, 1}};
 
 std::vector<std::vector<ov::Shape>> staticInputShapes5D1 = {
-        {{8, 16, 4, 10, 10}}
+    {{8, 16, 4, 10, 10}, {5}, {5}, {5}}
 };
 
+
 std::vector<std::vector<InputShape>> dynamicInputShapes5D1 = {
-        {
-                {{{-1, -1, -1, -1, -1}, {{8, 16, 4, 10, 10}, {16, 10, 5, 11, 9}, {24, 6, 6, 8, 8}}}},
-                {{{{8, 16}, {8, 16}, {2, 7}, -1, -1}, {{8, 16, 2, 6, 8}, {8, 10, 4, 7, 5}, {16, 8, 7, 5, 10}}}}
-        }
+    {
+        {{-1, -1, -1, -1, -1}, {{8, 16, 4, 10, 10}, {16, 10, 5, 11, 9}, {24, 6, 6, 8, 8}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    },
+    {
+        {{{8, 16}, {8, 16}, {2, 7}, -1, -1}, {{8, 16, 2, 6, 8}, {8, 10, 4, 7, 5}, {16, 8, 7, 5, 10}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes5D1Blocked = {
-        {
-                {{{-1, 16, -1, -1, -1}, {{24, 16, 3, 6, 7}, {48, 16, 4, 5, 5}, {24, 16, 5, 8, 5}}}}
-        }
+    {
+        {{-1, 16, -1, -1, -1}, {{24, 16, 3, 6, 7}, {48, 16, 4, 5, 5}, {24, 16, 5, 8, 5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
 const std::vector<std::vector<int64_t>> blockShape5D2  = {{1, 2, 4, 3, 1}, {1, 1, 2, 4, 3}};
@@ -244,29 +325,43 @@ const std::vector<std::vector<int64_t>> cropsBegin5D2  = {{0, 0, 1, 2, 0}, {0, 0
 const std::vector<std::vector<int64_t>> cropsEnd5D2    = {{0, 0, 1, 0, 1}, {0, 0, 1, 1, 1}};
 
 std::vector<std::vector<ov::Shape>> staticInputShapes5D2 = {
-        {{48, 16, 3, 3, 3}}
+    {{48, 16, 3, 3, 3}, {5}, {5}, {5}}
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes5D2 = {
+    {
+        {{-1, -1, -1, -1, -1}, {{48, 4, 3, 3, 3}, {24, 16, 5, 3, 5}, {24, 8, 7, 5, 5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    },
+    {
+        {{24, {8, 16}, {3, 5}, -1, -1}, {{24, 16, 3, 4, 3}, {24, 12, 5, 3, 5}, {24, 8, 4, 5, 5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    },
+    {
+        // special case
+        {{{1, 24}, {1, 16}, {1, 10}, {1, 10}, {1, 10}},
         {
-                {{{-1, -1, -1, -1, -1}, {{48, 4, 3, 3, 3}, {24, 16, 5, 3, 5}, {24, 8, 7, 5, 5}}}},
-                {{{24, {8, 16}, {3, 5}, -1, -1}, {{24, 16, 3, 4, 3}, {24, 12, 5, 3, 5}, {24, 8, 4, 5, 5}}}},
-                // special case
-                {
-                    {{{1, 24}, {1, 16}, {1, 10}, {1, 10}, {1, 10}},
-                    {
-                        {24, 16, 5, 3, 5},
-                        {24, 16, 5, 3, 5},
-                        {24, 16, 7, 5, 5}
-                    }}
-                }
-        }
+            {24, 16, 5, 3, 5},
+            {24, 16, 5, 3, 5},
+            {24, 16, 7, 5, 5}
+        }},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
 std::vector<std::vector<InputShape>> dynamicInputShapes5D2Blocked = {
-        {
-                {{{-1, 16, -1, -1, -1}, {{24, 16, 4, 5, 5}, {48, 16, 3, 4, 3}, {24, 16, 5, 3, 5}}}}
-        }
+    {
+        {{-1, 16, -1, -1, -1}, {{24, 16, 4, 5, 5}, {48, 16, 3, 4, 3}, {24, 16, 5, 3, 5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
 const std::vector<CPUSpecificParams> cpuParamsWithBlock_5D = {
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp
index 01edafe13aeda3..4cbf4379033a6f 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <common_test_utils/ov_tensor_utils.hpp>
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
@@ -13,8 +14,13 @@ using namespace ov::test;
 
 namespace CPULayerTestsDefinitions  {
 
+namespace {
+    std::vector<int64_t> blockShape, padsBegin, padsEnd;
+    ngraph::Shape paramShape;
+}  // namespace
+
 using SpaceToBatchLayerTestCPUParams = std::tuple<
-        InputShape,            // Input shapes
+        std::vector<InputShape>,            // Input shapes
         std::vector<int64_t>,               // block shape
         std::vector<int64_t>,               // pads begin
         std::vector<int64_t>,               // pads end
@@ -25,21 +31,24 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface<SpaceToBatch
                                  virtual public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<SpaceToBatchLayerTestCPUParams> &obj) {
-        InputShape inputShapes;
-        std::vector<int64_t> blockShape, padsBegin, padsEnd;
+        std::vector<InputShape> inputShapes;
         Precision netPrecision;
         CPUSpecificParams cpuParams;
         std::tie(inputShapes, blockShape, padsBegin, padsEnd, netPrecision, cpuParams) = obj.param;
         std::ostringstream result;
-        if (inputShapes.first.size() != 0) {
+        if (inputShapes.front().first.size() != 0) {
             result << "IS=(";
-            result << CommonTestUtils::partialShape2str(std::vector<ngraph::PartialShape>{inputShapes.first}) << "_";
+            for (const auto &shape : inputShapes) {
+                result << CommonTestUtils::partialShape2str({shape.first}) << "_";
+            }
             result.seekp(-1, result.cur);
             result << ")_";
         }
         result << "TS=";
-        for (const auto &item : inputShapes.second) {
-            result << CommonTestUtils::vec2str(item) << "_";
+        for (const auto& shape : inputShapes) {
+            for (const auto& item : shape.second) {
+                result << CommonTestUtils::vec2str(item) << "_";
+            }
         }
         result << "blockShape=" << CommonTestUtils::vec2str(blockShape) << "_";
         result << "padsBegin=" << CommonTestUtils::vec2str(padsBegin) << "_";
@@ -49,19 +58,47 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface<SpaceToBatch
         return result.str();
     }
 
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); i++) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+            if (i == 0) {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+            } else if (i == 1) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < blockShape.size(); j++) {
+                    dataPtr[j] = blockShape[j];
+                }
+            } else if (i == 2) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < padsBegin.size(); j++) {
+                    dataPtr[j] = padsBegin[j];
+                }
+            } else if (i == 3) {
+                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
+                auto *dataPtr = tensor.data<int64_t>();
+                for (size_t j = 0; j < padsEnd.size(); j++) {
+                    dataPtr[j] = padsEnd[j];
+                }
+            }
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
 protected:
     void SetUp() override {
         targetDevice = CommonTestUtils::DEVICE_CPU;
-
-        InputShape  inputShapes;
-        std::vector<int64_t> blockShape, padsBegin, padsEnd;
+        std::vector<InputShape> inputShapes;
         Precision netPrecision;
         CPUSpecificParams cpuParams;
         std::tie(inputShapes, blockShape, padsBegin, padsEnd, netPrecision, cpuParams) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
         auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        inType = outType = ngPrec;
         const std::vector<InputShape> inputShapesVec{inputShapes};
         init_input_shapes(inputShapesVec);
 
@@ -72,7 +109,20 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface<SpaceToBatch
 
         auto params = ngraph::builder::makeDynamicParams(ngPrec, {inputDynamicShapes.front()});
         auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        auto s2b = ngraph::builder::makeSpaceToBatch(paramOuts[0], ngPrec, blockShape, padsBegin, padsEnd);
+        paramShape = {paramOuts[0].get_partial_shape().size()};
+
+        std::shared_ptr<ov::Node> in2, in3, in4;
+        auto blockShapeParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        in2 = blockShapeParam;
+        params.push_back(blockShapeParam);
+        auto padsBeginParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        in3 = padsBeginParam;
+        params.push_back(padsBeginParam);
+        auto padsEndParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        in4 = padsEndParam;
+        params.push_back(padsEndParam);
+
+        auto s2b = std::make_shared<ngraph::opset2::SpaceToBatch>(paramOuts[0], in2, in3, in4);
         function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU");
     }
 };
@@ -96,29 +146,67 @@ const std::vector<std::vector<int64_t>> blockShape4D1 = {{1, 2, 1, 2}, {1, 1, 2,
 const std::vector<std::vector<int64_t>> padsBegin4D1 = {{0, 0, 0, 1}, {0, 0, 2, 1}, {0, 0, 4, 3}};
 const std::vector<std::vector<int64_t>> padsEnd4D1   = {{0, 0, 0, 1}, {0, 0, 4, 1}, {0, 0, 2, 3}};
 
-std::vector<ov::Shape> staticInputShapes4D1 = {{1, 16, 8, 12}, {1, 32, 8, 8}};
+std::vector<std::vector<ov::Shape>> staticInputShapes4D1 = {
+    {{1, 16, 8, 12}, {4}, {4}, {4}},
+    {{1, 32, 8, 8}, {4}, {4}, {4}},
+};
 
-std::vector<InputShape> dynamicInputShapes4D1 = {
-                {{-1, -1, -1, -1}, {{1, 6, 4, 8}, {2, 4, 8, 10}, {1, 8, 4, 10}}},
-                {{{1, 4}, {2, 16}, 6, -1}, {{4, 8, 6, 4}, {1, 6, 6, 8}, {2, 12, 6, 4}}}
+std::vector<std::vector<InputShape>> dynamicInputShapes4D1 = {
+    {
+        {{-1, -1, -1, -1}, {{1, 6, 4, 8}, {2, 4, 8, 10}, {1, 8, 4, 10}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    },
+    {
+        {{{1, 4}, {2, 16}, 6, -1}, {{4, 8, 6, 4}, {1, 6, 6, 8}, {2, 12, 6, 4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
-std::vector<InputShape> dynamicInputShapes4D1Blocked = {
-                {{-1, 16, -1, -1}, {{1, 16, 4, 6}, {2, 16, 6, 6}, {4, 16, 4, 8}}}
+std::vector<std::vector<InputShape>> dynamicInputShapes4D1Blocked = {
+    {
+        {{-1, 16, -1, -1}, {{1, 16, 4, 6}, {2, 16, 6, 6}, {4, 16, 4, 8}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
+
 const std::vector<std::vector<int64_t>> blockShape4D2 = { {1, 2, 4, 3}, {1, 4, 4, 1}};
 const std::vector<std::vector<int64_t>> padsBegin4D2 = {{0, 0, 0, 0}, {0, 0, 4, 3}};
 const std::vector<std::vector<int64_t>> padsEnd4D2   = {{0, 0, 4, 0}, {0, 0, 4, 3}};
 
-std::vector<ov::Shape> staticInputShapes4D2 = {{1, 16, 12, 12}, {1, 32, 12, 15}};
-std::vector<InputShape> dynamicInputShapes4D2 = {
-                {{-1, -1, -1, -1}, {{1, 4, 8, 9}, {2, 8, 12, 9}, {6, 12, 4, 12}}},
-                 {{2, {4, 16}, -1, -1}, {{2, 8, 4, 9}, {2, 4, 8, 6}, {2, 12, 12, 3}}}
+std::vector<std::vector<ov::Shape>> staticInputShapes4D2 = {
+        {{1, 16, 12, 12}, {4}, {4}, {4}},
+        {{1, 32, 12, 15}, {4}, {4}, {4}},
 };
 
-std::vector<InputShape> dynamicInputShapes4D2Blocked = {
-                 {{-1, 16, -1, -1}, {{2, 16, 4, 15}, {2, 16, 8, 12}, {3, 16, 12, 9}}}
+std::vector<std::vector<InputShape>> dynamicInputShapes4D2 = {
+    {
+        {{-1, -1, -1, -1}, {{1, 4, 8, 9}, {2, 8, 12, 9}, {6, 12, 4, 12}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    },
+    {
+        {{2, {4, 16}, -1, -1}, {{2, 8, 4, 9}, {2, 4, 8, 6}, {2, 12, 12, 3}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
+};
+
+std::vector<std::vector<InputShape>> dynamicInputShapes4D2Blocked = {
+    {
+        {{-1, 16, -1, -1}, {{2, 16, 4, 15}, {2, 16, 8, 12}, {3, 16, 12, 9}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}},
+        {{4}, {{4}, {4}, {4}}}
+    }
 };
 
 const std::vector<CPUSpecificParams> cpuParamsWithBlock_4D = {
@@ -203,15 +291,34 @@ const std::vector<std::vector<int64_t>> blockShape5D = {{1, 1, 2, 2, 1}, {1, 2,
 const std::vector<std::vector<int64_t>> padsBegin5D = {{0, 0, 0, 0, 0}, {0, 0, 4, 0, 0}, {0, 0, 0, 2, 3}};
 const std::vector<std::vector<int64_t>> padsEnd5D   = {{0, 0, 0, 0, 0}, {0, 0, 0, 4, 3}, {0, 0, 4, 2, 3}};
 
-std::vector<ov::Shape> staticInputShapes5D = {{2, 16, 4, 6, 12}, {1, 32, 8, 8, 6}, {1, 16, 4, 12, 12}};
+std::vector<std::vector<ov::Shape>> staticInputShapes5D = {
+    {{2, 16, 4, 6, 12}, {5}, {5}, {5}},
+    {{1, 32, 8, 8, 6}, {5}, {5}, {5}},
+    {{1, 16, 4, 12, 12}, {5}, {5}, {5}}
+};
 
-std::vector<InputShape> dynamicInputShapes5D = {
-                {{-1, -1, -1, -1, -1}, {{2, 2, 12, 4, 15}, {4, 4, 8, 6, 9}, {3, 6, 4, 2, 12}}},
-                {{{1, 10}, {2, 20}, {4, 50}, -1, -1}, {{3, 12, 8, 6, 9}, {5, 10, 4, 8, 15}, {6, 8, 20, 4, 12}}}
+std::vector<std::vector<InputShape>> dynamicInputShapes5D = {
+    {
+        {{-1, -1, -1, -1, -1}, {{2, 2, 12, 4, 15}, {4, 4, 8, 6, 9}, {3, 6, 4, 2, 12}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    },
+    {
+        {{{1, 10}, {2, 20}, {4, 50}, -1, -1}, {{3, 12, 8, 6, 9}, {5, 10, 4, 8, 15}, {6, 8, 20, 4, 12}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
-std::vector<InputShape> dynamicInputShapes5DBlocked = {
-                {{-1, 16, -1, -1, -1}, {{2, 16, 4, 6, 9}, {5, 16, 16, 4, 6}, {7, 16, 8, 2, 3}}}
+std::vector<std::vector<InputShape>> dynamicInputShapes5DBlocked = {
+    {
+        {{-1, 16, -1, -1, -1}, {{2, 16, 4, 6, 9}, {5, 16, 16, 4, 6}, {7, 16, 8, 2, 3}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}},
+        {{5}, {{5}, {5}, {5}}}
+    }
 };
 
 const std::vector<CPUSpecificParams> cpuParamsWithBlock_5D = {

From 951c5fdae9fcd081954517ed76280962f52614d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Wed, 22 Mar 2023 19:12:16 +0100
Subject: [PATCH 044/296] Interpolate 11 exposed to Python (#16465)

---
 docs/api/ie_python_api/api.rst                |   8 +-
 src/bindings/python/setup.py                  |   1 +
 .../src/compatibility/ngraph/__init__.py      | 348 +++++++++---------
 .../compatibility/ngraph/opset11/__init__.py  | 177 +++++++++
 .../src/compatibility/ngraph/opset11/ops.py   |  77 ++++
 .../ngraph/utils/node_factory.py              |   2 +-
 .../compatibility/pyngraph/node_factory.cpp   |   2 +-
 .../python/src/openvino/runtime/__init__.py   |  33 +-
 .../src/openvino/runtime/opset11/__init__.py  | 178 +++++++++
 .../src/openvino/runtime/opset11/ops.py       |  77 ++++
 .../openvino/runtime/utils/node_factory.py    |   2 +-
 .../src/pyopenvino/graph/node_factory.cpp     |   2 +-
 .../python/tests/test_graph/test_create_op.py |  28 +-
 .../test_ngraph/test_create_op.py             |  25 +-
 14 files changed, 760 insertions(+), 200 deletions(-)
 create mode 100644 src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
 create mode 100644 src/bindings/python/src/compatibility/ngraph/opset11/ops.py
 create mode 100644 src/bindings/python/src/openvino/runtime/opset11/__init__.py
 create mode 100644 src/bindings/python/src/openvino/runtime/opset11/ops.py

diff --git a/docs/api/ie_python_api/api.rst b/docs/api/ie_python_api/api.rst
index c06cc2b2fb8c80..5faa85f4e05b8e 100644
--- a/docs/api/ie_python_api/api.rst
+++ b/docs/api/ie_python_api/api.rst
@@ -73,6 +73,12 @@ OpenVINO Python API
 
    openvino.runtime.opset10
 
+.. autosummary::
+   :toctree: _autosummary
+   :template: custom-module-template.rst
+
+   openvino.runtime.opset11
+
 .. autosummary::
    :toctree: _autosummary
    :template: custom-module-template.rst
@@ -95,4 +101,4 @@ OpenVINO Python API
    :maxdepth: 2
    :hidden:
 
-   compatibility
\ No newline at end of file
+   compatibility
diff --git a/src/bindings/python/setup.py b/src/bindings/python/setup.py
index 021e37875e32da..6205f70aeb4e04 100644
--- a/src/bindings/python/setup.py
+++ b/src/bindings/python/setup.py
@@ -55,6 +55,7 @@
     "openvino.runtime.opset8",
     "openvino.runtime.opset9",
     "openvino.runtime.opset10",
+    "openvino.runtime.opset11",
     "openvino.runtime.utils",
     "openvino.runtime.op",
     "openvino.runtime.op.util",
diff --git a/src/bindings/python/src/compatibility/ngraph/__init__.py b/src/bindings/python/src/compatibility/ngraph/__init__.py
index 3109d9e3d1d525..d80f2199dfc5d0 100644
--- a/src/bindings/python/src/compatibility/ngraph/__init__.py
+++ b/src/bindings/python/src/compatibility/ngraph/__init__.py
@@ -18,180 +18,180 @@
 from ngraph.impl import PartialShape
 from ngraph.helpers import function_from_cnn
 from ngraph.helpers import function_to_cnn
-from ngraph.opset10 import absolute
-from ngraph.opset10 import absolute as abs
-from ngraph.opset10 import acos
-from ngraph.opset10 import acosh
-from ngraph.opset10 import adaptive_avg_pool
-from ngraph.opset10 import adaptive_max_pool
-from ngraph.opset10 import add
-from ngraph.opset10 import asin
-from ngraph.opset10 import asinh
-from ngraph.opset10 import assign
-from ngraph.opset10 import atan
-from ngraph.opset10 import atanh
-from ngraph.opset10 import avg_pool
-from ngraph.opset10 import batch_norm_inference
-from ngraph.opset10 import batch_to_space
-from ngraph.opset10 import binary_convolution
-from ngraph.opset10 import broadcast
-from ngraph.opset10 import bucketize
-from ngraph.opset10 import ceiling
-from ngraph.opset10 import ceiling as ceil
-from ngraph.opset10 import clamp
-from ngraph.opset10 import concat
-from ngraph.opset10 import constant
-from ngraph.opset10 import convert
-from ngraph.opset10 import convert_like
-from ngraph.opset10 import convolution
-from ngraph.opset10 import convolution_backprop_data
-from ngraph.opset10 import cos
-from ngraph.opset10 import cosh
-from ngraph.opset10 import ctc_greedy_decoder
-from ngraph.opset10 import ctc_greedy_decoder_seq_len
-from ngraph.opset10 import ctc_loss
-from ngraph.opset10 import cum_sum
-from ngraph.opset10 import cum_sum as cumsum
-from ngraph.opset10 import deformable_convolution
-from ngraph.opset10 import deformable_psroi_pooling
-from ngraph.opset10 import depth_to_space
-from ngraph.opset10 import detection_output
-from ngraph.opset10 import dft
-from ngraph.opset10 import divide
-from ngraph.opset10 import einsum
-from ngraph.opset10 import elu
-from ngraph.opset10 import embedding_bag_offsets_sum
-from ngraph.opset10 import embedding_bag_packed_sum
-from ngraph.opset10 import embedding_segments_sum
-from ngraph.opset10 import extract_image_patches
-from ngraph.opset10 import equal
-from ngraph.opset10 import erf
-from ngraph.opset10 import exp
-from ngraph.opset10 import eye
-from ngraph.opset10 import fake_quantize
-from ngraph.opset10 import floor
-from ngraph.opset10 import floor_mod
-from ngraph.opset10 import gather
-from ngraph.opset10 import gather_elements
-from ngraph.opset10 import gather_nd
-from ngraph.opset10 import gather_tree
-from ngraph.opset10 import gelu
-from ngraph.opset10 import generate_proposals
-from ngraph.opset10 import greater
-from ngraph.opset10 import greater_equal
-from ngraph.opset10 import grid_sample
-from ngraph.opset10 import grn
-from ngraph.opset10 import group_convolution
-from ngraph.opset10 import group_convolution_backprop_data
-from ngraph.opset10 import gru_cell
-from ngraph.opset10 import gru_sequence
-from ngraph.opset10 import hard_sigmoid
-from ngraph.opset10 import hsigmoid
-from ngraph.opset10 import hswish
-from ngraph.opset10 import idft
-from ngraph.opset10 import if_op
-from ngraph.opset10 import interpolate
-from ngraph.opset10 import irdft
-from ngraph.opset10 import is_finite
-from ngraph.opset10 import is_inf
-from ngraph.opset10 import is_nan
-from ngraph.opset10 import i420_to_bgr
-from ngraph.opset10 import i420_to_rgb
-from ngraph.opset10 import less
-from ngraph.opset10 import less_equal
-from ngraph.opset10 import log
-from ngraph.opset10 import logical_and
-from ngraph.opset10 import logical_not
-from ngraph.opset10 import logical_or
-from ngraph.opset10 import logical_xor
-from ngraph.opset10 import log_softmax
-from ngraph.opset10 import loop
-from ngraph.opset10 import lrn
-from ngraph.opset10 import lstm_cell
-from ngraph.opset10 import lstm_sequence
-from ngraph.opset10 import matmul
-from ngraph.opset10 import matrix_nms
-from ngraph.opset10 import max_pool
-from ngraph.opset10 import maximum
-from ngraph.opset10 import minimum
-from ngraph.opset10 import mish
-from ngraph.opset10 import mod
-from ngraph.opset10 import multiclass_nms
-from ngraph.opset10 import multiply
-from ngraph.opset10 import mvn
-from ngraph.opset10 import negative
-from ngraph.opset10 import non_max_suppression
-from ngraph.opset10 import non_zero
-from ngraph.opset10 import normalize_l2
-from ngraph.opset10 import not_equal
-from ngraph.opset10 import nv12_to_bgr
-from ngraph.opset10 import nv12_to_rgb
-from ngraph.opset10 import one_hot
-from ngraph.opset10 import pad
-from ngraph.opset10 import parameter
-from ngraph.opset10 import power
-from ngraph.opset10 import prelu
-from ngraph.opset10 import prior_box
-from ngraph.opset10 import prior_box_clustered
-from ngraph.opset10 import psroi_pooling
-from ngraph.opset10 import proposal
-from ngraph.opset10 import random_uniform
-from ngraph.opset10 import range
-from ngraph.opset10 import rdft
-from ngraph.opset10 import read_value
-from ngraph.opset10 import reduce_l1
-from ngraph.opset10 import reduce_l2
-from ngraph.opset10 import reduce_logical_and
-from ngraph.opset10 import reduce_logical_or
-from ngraph.opset10 import reduce_max
-from ngraph.opset10 import reduce_mean
-from ngraph.opset10 import reduce_min
-from ngraph.opset10 import reduce_prod
-from ngraph.opset10 import reduce_sum
-from ngraph.opset10 import region_yolo
-from ngraph.opset10 import reorg_yolo
-from ngraph.opset10 import relu
-from ngraph.opset10 import reshape
-from ngraph.opset10 import result
-from ngraph.opset10 import reverse_sequence
-from ngraph.opset10 import rnn_cell
-from ngraph.opset10 import rnn_sequence
-from ngraph.opset10 import roi_align
-from ngraph.opset10 import roi_pooling
-from ngraph.opset10 import roll
-from ngraph.opset10 import round
-from ngraph.opset10 import scatter_elements_update
-from ngraph.opset10 import scatter_update
-from ngraph.opset10 import select
-from ngraph.opset10 import selu
-from ngraph.opset10 import shape_of
-from ngraph.opset10 import shuffle_channels
-from ngraph.opset10 import sigmoid
-from ngraph.opset10 import sign
-from ngraph.opset10 import sin
-from ngraph.opset10 import sinh
-from ngraph.opset10 import slice
-from ngraph.opset10 import softmax
-from ngraph.opset10 import softplus
-from ngraph.opset10 import softsign
-from ngraph.opset10 import space_to_batch
-from ngraph.opset10 import space_to_depth
-from ngraph.opset10 import split
-from ngraph.opset10 import sqrt
-from ngraph.opset10 import squared_difference
-from ngraph.opset10 import squeeze
-from ngraph.opset10 import strided_slice
-from ngraph.opset10 import subtract
-from ngraph.opset10 import swish
-from ngraph.opset10 import tan
-from ngraph.opset10 import tanh
-from ngraph.opset10 import tensor_iterator
-from ngraph.opset10 import tile
-from ngraph.opset10 import topk
-from ngraph.opset10 import transpose
-from ngraph.opset10 import unique
-from ngraph.opset10 import unsqueeze
-from ngraph.opset10 import variadic_split
+from ngraph.opset11 import absolute
+from ngraph.opset11 import absolute as abs
+from ngraph.opset11 import acos
+from ngraph.opset11 import acosh
+from ngraph.opset11 import adaptive_avg_pool
+from ngraph.opset11 import adaptive_max_pool
+from ngraph.opset11 import add
+from ngraph.opset11 import asin
+from ngraph.opset11 import asinh
+from ngraph.opset11 import assign
+from ngraph.opset11 import atan
+from ngraph.opset11 import atanh
+from ngraph.opset11 import avg_pool
+from ngraph.opset11 import batch_norm_inference
+from ngraph.opset11 import batch_to_space
+from ngraph.opset11 import binary_convolution
+from ngraph.opset11 import broadcast
+from ngraph.opset11 import bucketize
+from ngraph.opset11 import ceiling
+from ngraph.opset11 import ceiling as ceil
+from ngraph.opset11 import clamp
+from ngraph.opset11 import concat
+from ngraph.opset11 import constant
+from ngraph.opset11 import convert
+from ngraph.opset11 import convert_like
+from ngraph.opset11 import convolution
+from ngraph.opset11 import convolution_backprop_data
+from ngraph.opset11 import cos
+from ngraph.opset11 import cosh
+from ngraph.opset11 import ctc_greedy_decoder
+from ngraph.opset11 import ctc_greedy_decoder_seq_len
+from ngraph.opset11 import ctc_loss
+from ngraph.opset11 import cum_sum
+from ngraph.opset11 import cum_sum as cumsum
+from ngraph.opset11 import deformable_convolution
+from ngraph.opset11 import deformable_psroi_pooling
+from ngraph.opset11 import depth_to_space
+from ngraph.opset11 import detection_output
+from ngraph.opset11 import dft
+from ngraph.opset11 import divide
+from ngraph.opset11 import einsum
+from ngraph.opset11 import elu
+from ngraph.opset11 import embedding_bag_offsets_sum
+from ngraph.opset11 import embedding_bag_packed_sum
+from ngraph.opset11 import embedding_segments_sum
+from ngraph.opset11 import extract_image_patches
+from ngraph.opset11 import equal
+from ngraph.opset11 import erf
+from ngraph.opset11 import exp
+from ngraph.opset11 import eye
+from ngraph.opset11 import fake_quantize
+from ngraph.opset11 import floor
+from ngraph.opset11 import floor_mod
+from ngraph.opset11 import gather
+from ngraph.opset11 import gather_elements
+from ngraph.opset11 import gather_nd
+from ngraph.opset11 import gather_tree
+from ngraph.opset11 import gelu
+from ngraph.opset11 import generate_proposals
+from ngraph.opset11 import greater
+from ngraph.opset11 import greater_equal
+from ngraph.opset11 import grid_sample
+from ngraph.opset11 import grn
+from ngraph.opset11 import group_convolution
+from ngraph.opset11 import group_convolution_backprop_data
+from ngraph.opset11 import gru_cell
+from ngraph.opset11 import gru_sequence
+from ngraph.opset11 import hard_sigmoid
+from ngraph.opset11 import hsigmoid
+from ngraph.opset11 import hswish
+from ngraph.opset11 import idft
+from ngraph.opset11 import if_op
+from ngraph.opset11 import interpolate
+from ngraph.opset11 import irdft
+from ngraph.opset11 import is_finite
+from ngraph.opset11 import is_inf
+from ngraph.opset11 import is_nan
+from ngraph.opset11 import i420_to_bgr
+from ngraph.opset11 import i420_to_rgb
+from ngraph.opset11 import less
+from ngraph.opset11 import less_equal
+from ngraph.opset11 import log
+from ngraph.opset11 import logical_and
+from ngraph.opset11 import logical_not
+from ngraph.opset11 import logical_or
+from ngraph.opset11 import logical_xor
+from ngraph.opset11 import log_softmax
+from ngraph.opset11 import loop
+from ngraph.opset11 import lrn
+from ngraph.opset11 import lstm_cell
+from ngraph.opset11 import lstm_sequence
+from ngraph.opset11 import matmul
+from ngraph.opset11 import matrix_nms
+from ngraph.opset11 import max_pool
+from ngraph.opset11 import maximum
+from ngraph.opset11 import minimum
+from ngraph.opset11 import mish
+from ngraph.opset11 import mod
+from ngraph.opset11 import multiclass_nms
+from ngraph.opset11 import multiply
+from ngraph.opset11 import mvn
+from ngraph.opset11 import negative
+from ngraph.opset11 import non_max_suppression
+from ngraph.opset11 import non_zero
+from ngraph.opset11 import normalize_l2
+from ngraph.opset11 import not_equal
+from ngraph.opset11 import nv12_to_bgr
+from ngraph.opset11 import nv12_to_rgb
+from ngraph.opset11 import one_hot
+from ngraph.opset11 import pad
+from ngraph.opset11 import parameter
+from ngraph.opset11 import power
+from ngraph.opset11 import prelu
+from ngraph.opset11 import prior_box
+from ngraph.opset11 import prior_box_clustered
+from ngraph.opset11 import psroi_pooling
+from ngraph.opset11 import proposal
+from ngraph.opset11 import random_uniform
+from ngraph.opset11 import range
+from ngraph.opset11 import rdft
+from ngraph.opset11 import read_value
+from ngraph.opset11 import reduce_l1
+from ngraph.opset11 import reduce_l2
+from ngraph.opset11 import reduce_logical_and
+from ngraph.opset11 import reduce_logical_or
+from ngraph.opset11 import reduce_max
+from ngraph.opset11 import reduce_mean
+from ngraph.opset11 import reduce_min
+from ngraph.opset11 import reduce_prod
+from ngraph.opset11 import reduce_sum
+from ngraph.opset11 import region_yolo
+from ngraph.opset11 import reorg_yolo
+from ngraph.opset11 import relu
+from ngraph.opset11 import reshape
+from ngraph.opset11 import result
+from ngraph.opset11 import reverse_sequence
+from ngraph.opset11 import rnn_cell
+from ngraph.opset11 import rnn_sequence
+from ngraph.opset11 import roi_align
+from ngraph.opset11 import roi_pooling
+from ngraph.opset11 import roll
+from ngraph.opset11 import round
+from ngraph.opset11 import scatter_elements_update
+from ngraph.opset11 import scatter_update
+from ngraph.opset11 import select
+from ngraph.opset11 import selu
+from ngraph.opset11 import shape_of
+from ngraph.opset11 import shuffle_channels
+from ngraph.opset11 import sigmoid
+from ngraph.opset11 import sign
+from ngraph.opset11 import sin
+from ngraph.opset11 import sinh
+from ngraph.opset11 import slice
+from ngraph.opset11 import softmax
+from ngraph.opset11 import softplus
+from ngraph.opset11 import softsign
+from ngraph.opset11 import space_to_batch
+from ngraph.opset11 import space_to_depth
+from ngraph.opset11 import split
+from ngraph.opset11 import sqrt
+from ngraph.opset11 import squared_difference
+from ngraph.opset11 import squeeze
+from ngraph.opset11 import strided_slice
+from ngraph.opset11 import subtract
+from ngraph.opset11 import swish
+from ngraph.opset11 import tan
+from ngraph.opset11 import tanh
+from ngraph.opset11 import tensor_iterator
+from ngraph.opset11 import tile
+from ngraph.opset11 import topk
+from ngraph.opset11 import transpose
+from ngraph.opset11 import unique
+from ngraph.opset11 import unsqueeze
+from ngraph.opset11 import variadic_split
 
 
 # Extend Node class to support binary operators
diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
new file mode 100644
index 00000000000000..91f84b81f415cd
--- /dev/null
+++ b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
@@ -0,0 +1,177 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from ngraph.opset1.ops import absolute
+from ngraph.opset1.ops import absolute as abs
+from ngraph.opset1.ops import acos
+from ngraph.opset4.ops import acosh
+from ngraph.opset8.ops import adaptive_avg_pool
+from ngraph.opset8.ops import adaptive_max_pool
+from ngraph.opset1.ops import add
+from ngraph.opset1.ops import asin
+from ngraph.opset4.ops import asinh
+from ngraph.opset3.ops import assign
+from ngraph.opset1.ops import atan
+from ngraph.opset4.ops import atanh
+from ngraph.opset1.ops import avg_pool
+from ngraph.opset5.ops import batch_norm_inference
+from ngraph.opset2.ops import batch_to_space
+from ngraph.opset1.ops import binary_convolution
+from ngraph.opset3.ops import broadcast
+from ngraph.opset3.ops import bucketize
+from ngraph.opset1.ops import ceiling
+from ngraph.opset1.ops import ceiling as ceil
+from ngraph.opset1.ops import clamp
+from ngraph.opset1.ops import concat
+from ngraph.opset1.ops import constant
+from ngraph.opset1.ops import convert
+from ngraph.opset1.ops import convert_like
+from ngraph.opset1.ops import convolution
+from ngraph.opset1.ops import convolution_backprop_data
+from ngraph.opset1.ops import cos
+from ngraph.opset1.ops import cosh
+from ngraph.opset1.ops import ctc_greedy_decoder
+from ngraph.opset6.ops import ctc_greedy_decoder_seq_len
+from ngraph.opset4.ops import ctc_loss
+from ngraph.opset3.ops import cum_sum
+from ngraph.opset3.ops import cum_sum as cumsum
+from ngraph.opset8.ops import deformable_convolution
+from ngraph.opset1.ops import deformable_psroi_pooling
+from ngraph.opset1.ops import depth_to_space
+from ngraph.opset8.ops import detection_output
+from ngraph.opset7.ops import dft
+from ngraph.opset1.ops import divide
+from ngraph.opset7.ops import einsum
+from ngraph.opset1.ops import elu
+from ngraph.opset3.ops import embedding_bag_offsets_sum
+from ngraph.opset3.ops import embedding_bag_packed_sum
+from ngraph.opset3.ops import embedding_segments_sum
+from ngraph.opset3.ops import extract_image_patches
+from ngraph.opset1.ops import equal
+from ngraph.opset1.ops import erf
+from ngraph.opset1.ops import exp
+from ngraph.opset9.ops import eye
+from ngraph.opset1.ops import fake_quantize
+from ngraph.opset1.ops import floor
+from ngraph.opset1.ops import floor_mod
+from ngraph.opset8.ops import gather
+from ngraph.opset6.ops import gather_elements
+from ngraph.opset8.ops import gather_nd
+from ngraph.opset1.ops import gather_tree
+from ngraph.opset7.ops import gelu
+from ngraph.opset9.ops import generate_proposals
+from ngraph.opset1.ops import greater
+from ngraph.opset1.ops import greater_equal
+from ngraph.opset9.ops import grid_sample
+from ngraph.opset1.ops import grn
+from ngraph.opset1.ops import group_convolution
+from ngraph.opset1.ops import group_convolution_backprop_data
+from ngraph.opset3.ops import gru_cell
+from ngraph.opset5.ops import gru_sequence
+from ngraph.opset1.ops import hard_sigmoid
+from ngraph.opset5.ops import hsigmoid
+from ngraph.opset4.ops import hswish
+from ngraph.opset7.ops import idft
+from ngraph.opset8.ops import if_op
+from ngraph.opset11.ops import interpolate
+from ngraph.opset9.ops import irdft
+from ngraph.opset10.ops import is_finite
+from ngraph.opset10.ops import is_inf
+from ngraph.opset10.ops import is_nan
+from ngraph.opset8.ops import i420_to_bgr
+from ngraph.opset8.ops import i420_to_rgb
+from ngraph.opset1.ops import less
+from ngraph.opset1.ops import less_equal
+from ngraph.opset1.ops import log
+from ngraph.opset1.ops import logical_and
+from ngraph.opset1.ops import logical_not
+from ngraph.opset1.ops import logical_or
+from ngraph.opset1.ops import logical_xor
+from ngraph.opset5.ops import log_softmax
+from ngraph.opset5.ops import loop
+from ngraph.opset1.ops import lrn
+from ngraph.opset4.ops import lstm_cell
+from ngraph.opset5.ops import lstm_sequence
+from ngraph.opset1.ops import matmul
+from ngraph.opset8.ops import matrix_nms
+from ngraph.opset8.ops import max_pool
+from ngraph.opset1.ops import maximum
+from ngraph.opset1.ops import minimum
+from ngraph.opset4.ops import mish
+from ngraph.opset1.ops import mod
+from ngraph.opset9.ops import multiclass_nms
+from ngraph.opset1.ops import multiply
+from ngraph.opset6.ops import mvn
+from ngraph.opset1.ops import negative
+from ngraph.opset9.ops import non_max_suppression
+from ngraph.opset3.ops import non_zero
+from ngraph.opset1.ops import normalize_l2
+from ngraph.opset1.ops import not_equal
+from ngraph.opset8.ops import nv12_to_bgr
+from ngraph.opset8.ops import nv12_to_rgb
+from ngraph.opset1.ops import one_hot
+from ngraph.opset1.ops import pad
+from ngraph.opset1.ops import parameter
+from ngraph.opset1.ops import power
+from ngraph.opset1.ops import prelu
+from ngraph.opset8.ops import prior_box
+from ngraph.opset1.ops import prior_box_clustered
+from ngraph.opset1.ops import psroi_pooling
+from ngraph.opset4.ops import proposal
+from ngraph.opset8.ops import random_uniform
+from ngraph.opset1.ops import range
+from ngraph.opset9.ops import rdft
+from ngraph.opset3.ops import read_value
+from ngraph.opset4.ops import reduce_l1
+from ngraph.opset4.ops import reduce_l2
+from ngraph.opset1.ops import reduce_logical_and
+from ngraph.opset1.ops import reduce_logical_or
+from ngraph.opset1.ops import reduce_max
+from ngraph.opset1.ops import reduce_mean
+from ngraph.opset1.ops import reduce_min
+from ngraph.opset1.ops import reduce_prod
+from ngraph.opset1.ops import reduce_sum
+from ngraph.opset1.ops import region_yolo
+from ngraph.opset2.ops import reorg_yolo
+from ngraph.opset1.ops import relu
+from ngraph.opset1.ops import reshape
+from ngraph.opset1.ops import result
+from ngraph.opset1.ops import reverse_sequence
+from ngraph.opset3.ops import rnn_cell
+from ngraph.opset5.ops import rnn_sequence
+from ngraph.opset9.ops import roi_align
+from ngraph.opset2.ops import roi_pooling
+from ngraph.opset7.ops import roll
+from ngraph.opset5.ops import round
+from ngraph.opset3.ops import scatter_elements_update
+from ngraph.opset3.ops import scatter_update
+from ngraph.opset1.ops import select
+from ngraph.opset1.ops import selu
+from ngraph.opset3.ops import shape_of
+from ngraph.opset3.ops import shuffle_channels
+from ngraph.opset1.ops import sigmoid
+from ngraph.opset1.ops import sign
+from ngraph.opset1.ops import sin
+from ngraph.opset1.ops import sinh
+from ngraph.opset8.ops import slice
+from ngraph.opset8.ops import softmax
+from ngraph.opset4.ops import softplus
+from ngraph.opset9.ops import softsign
+from ngraph.opset2.ops import space_to_batch
+from ngraph.opset1.ops import space_to_depth
+from ngraph.opset1.ops import split
+from ngraph.opset1.ops import sqrt
+from ngraph.opset1.ops import squared_difference
+from ngraph.opset1.ops import squeeze
+from ngraph.opset1.ops import strided_slice
+from ngraph.opset1.ops import subtract
+from ngraph.opset4.ops import swish
+from ngraph.opset1.ops import tan
+from ngraph.opset1.ops import tanh
+from ngraph.opset1.ops import tensor_iterator
+from ngraph.opset1.ops import tile
+from ngraph.opset3.ops import topk
+from ngraph.opset1.ops import transpose
+from ngraph.opset10.ops import unique
+from ngraph.opset1.ops import unsqueeze
+from ngraph.opset1.ops import variadic_split
diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py
new file mode 100644
index 00000000000000..434b778b246cf8
--- /dev/null
+++ b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Factory functions for all openvino ops."""
+from functools import partial
+from typing import List, Optional
+
+from ngraph.impl import Node
+from ngraph.opset_utils import _get_node_factory
+from ngraph.utils.decorators import nameable_op
+from ngraph.utils.types import (
+    NodeInput,
+    as_nodes,
+)
+
+_get_node_factory_opset11 = partial(_get_node_factory, "opset11")
+
+# -------------------------------------------- ops ------------------------------------------------
+
+
+@nameable_op
+def interpolate(
+    image: NodeInput,
+    scales_or_sizes: NodeInput,
+    mode: str,
+    shape_calculation_mode: str,
+    pads_begin: Optional[List[int]] = None,
+    pads_end: Optional[List[int]] = None,
+    coordinate_transformation_mode: str = "half_pixel",
+    nearest_mode: str = "round_prefer_floor",
+    antialias: bool = False,
+    cube_coeff: float = -0.75,
+    axes: Optional[NodeInput] = None,
+    name: Optional[str] = None,
+) -> Node:
+    """Perfors the interpolation of the input tensor.
+
+    :param  image:         The node providing input tensor with data for interpolation.
+    :param  scales_or_sizes:
+                           1D tensor providing information used to calculate the output shape
+                           of the operation. It might contain floats (scales) or integers(sizes).
+    :param  mode:          Specifies type of interpolation. Possible values are: nearest, linear,
+                           linear_onnx, cubic, bilinear_pillow, bicubic_pillow.
+    :param  shape_calculation_mode:
+                           Specifies how the scales_or_sizes input should be interpreted.
+    :param  pads_begin:    Specifies the number of pixels to add to the beginning of the image
+                           being interpolated. Default is None.
+    :param  pads_end:      Specifies the number of pixels to add to the end of the image being
+                           interpolated. Default is None.
+    :param  coordinate_transformation_mode:
+                           Specifies how to transform the coordinate in the resized tensor to the
+                           coordinate in the original tensor. Default is "half_pixel".
+    :param  nearest_mode:  Specifies round mode when mode == nearest and is used only when
+                           mode == nearest. Default is "round_prefer_floor".
+    :param  antialias:     Specifies whether to perform anti-aliasing. Default is False.
+    :param  cube_coeff:    Specifies the parameter a for cubic interpolation. Default is -0.75.
+    :param  axes:          1D tensor specifying dimension indices where interpolation is applied.
+                           The default is None.
+    :param  name:          Optional name for the output node. The default is None.
+    :return: Node representing the interpolation operation.
+    """
+    attrs = {
+        "mode": mode,
+        "shape_calculation_mode": shape_calculation_mode,
+        "coordinate_transformation_mode": coordinate_transformation_mode,
+        "nearest_mode": nearest_mode,
+        "antialias": antialias,
+        "cube_coeff": cube_coeff,
+    }
+
+    attrs["pads_begin"] = [] if pads_begin is None else pads_begin
+    attrs["pads_end"] = [] if pads_end is None else pads_end
+
+    inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes)
+
+    return _get_node_factory_opset11().create("Interpolate", inputs, attrs)
diff --git a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py
index 6aa951a65b66b7..0e3d2cc09cecc2 100644
--- a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py
+++ b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py
@@ -12,7 +12,7 @@
 
 from ngraph.exceptions import UserInputError
 
-DEFAULT_OPSET = "opset10"
+DEFAULT_OPSET = "opset11"
 
 
 class NodeFactory(object):
diff --git a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp
index 281525cca95832..2108a7a057bb3c 100644
--- a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp
@@ -82,7 +82,7 @@ class NodeFactory {
         return it->second();
     }
 
-    const ngraph::OpSet& m_opset = ngraph::get_opset10();
+    const ngraph::OpSet& m_opset = ngraph::get_opset11();
     std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>> m_variables;
 };
 }  // namespace
diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py
index 3c2937c214ed70..9241819e87135c 100644
--- a/src/bindings/python/src/openvino/runtime/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/__init__.py
@@ -56,6 +56,7 @@
 from openvino.runtime import opset8
 from openvino.runtime import opset9
 from openvino.runtime import opset10
+from openvino.runtime import opset11
 
 # Import properties API
 from openvino._pyopenvino import properties
@@ -66,19 +67,19 @@
 
 
 # Extend Node class to support binary operators
-Node.__add__ = opset10.add
-Node.__sub__ = opset10.subtract
-Node.__mul__ = opset10.multiply
-Node.__div__ = opset10.divide
-Node.__truediv__ = opset10.divide
-Node.__radd__ = lambda left, right: opset10.add(right, left)
-Node.__rsub__ = lambda left, right: opset10.subtract(right, left)
-Node.__rmul__ = lambda left, right: opset10.multiply(right, left)
-Node.__rdiv__ = lambda left, right: opset10.divide(right, left)
-Node.__rtruediv__ = lambda left, right: opset10.divide(right, left)
-Node.__eq__ = opset10.equal
-Node.__ne__ = opset10.not_equal
-Node.__lt__ = opset10.less
-Node.__le__ = opset10.less_equal
-Node.__gt__ = opset10.greater
-Node.__ge__ = opset10.greater_equal
+Node.__add__ = opset11.add
+Node.__sub__ = opset11.subtract
+Node.__mul__ = opset11.multiply
+Node.__div__ = opset11.divide
+Node.__truediv__ = opset11.divide
+Node.__radd__ = lambda left, right: opset11.add(right, left)
+Node.__rsub__ = lambda left, right: opset11.subtract(right, left)
+Node.__rmul__ = lambda left, right: opset11.multiply(right, left)
+Node.__rdiv__ = lambda left, right: opset11.divide(right, left)
+Node.__rtruediv__ = lambda left, right: opset11.divide(right, left)
+Node.__eq__ = opset11.equal
+Node.__ne__ = opset11.not_equal
+Node.__lt__ = opset11.less
+Node.__le__ = opset11.less_equal
+Node.__gt__ = opset11.greater
+Node.__ge__ = opset11.greater_equal
diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py
new file mode 100644
index 00000000000000..79c7068bf83d87
--- /dev/null
+++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino.runtime.opset1.ops import absolute
+from openvino.runtime.opset1.ops import absolute as abs
+from openvino.runtime.opset1.ops import acos
+from openvino.runtime.opset4.ops import acosh
+from openvino.runtime.opset8.ops import adaptive_avg_pool
+from openvino.runtime.opset8.ops import adaptive_max_pool
+from openvino.runtime.opset1.ops import add
+from openvino.runtime.opset1.ops import asin
+from openvino.runtime.opset4.ops import asinh
+from openvino.runtime.opset3.ops import assign
+from openvino.runtime.opset1.ops import atan
+from openvino.runtime.opset4.ops import atanh
+from openvino.runtime.opset1.ops import avg_pool
+from openvino.runtime.opset5.ops import batch_norm_inference
+from openvino.runtime.opset2.ops import batch_to_space
+from openvino.runtime.opset1.ops import binary_convolution
+from openvino.runtime.opset3.ops import broadcast
+from openvino.runtime.opset3.ops import bucketize
+from openvino.runtime.opset1.ops import ceiling
+from openvino.runtime.opset1.ops import ceiling as ceil
+from openvino.runtime.opset1.ops import clamp
+from openvino.runtime.opset1.ops import concat
+from openvino.runtime.opset1.ops import constant
+from openvino.runtime.opset1.ops import convert
+from openvino.runtime.opset1.ops import convert_like
+from openvino.runtime.opset1.ops import convolution
+from openvino.runtime.opset1.ops import convolution_backprop_data
+from openvino.runtime.opset1.ops import cos
+from openvino.runtime.opset1.ops import cosh
+from openvino.runtime.opset1.ops import ctc_greedy_decoder
+from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len
+from openvino.runtime.opset4.ops import ctc_loss
+from openvino.runtime.opset3.ops import cum_sum
+from openvino.runtime.opset3.ops import cum_sum as cumsum
+from openvino.runtime.opset8.ops import deformable_convolution
+from openvino.runtime.opset1.ops import deformable_psroi_pooling
+from openvino.runtime.opset1.ops import depth_to_space
+from openvino.runtime.opset8.ops import detection_output
+from openvino.runtime.opset7.ops import dft
+from openvino.runtime.opset1.ops import divide
+from openvino.runtime.opset7.ops import einsum
+from openvino.runtime.opset1.ops import elu
+from openvino.runtime.opset3.ops import embedding_bag_offsets_sum
+from openvino.runtime.opset3.ops import embedding_bag_packed_sum
+from openvino.runtime.opset3.ops import embedding_segments_sum
+from openvino.runtime.opset3.ops import extract_image_patches
+from openvino.runtime.opset1.ops import equal
+from openvino.runtime.opset1.ops import erf
+from openvino.runtime.opset1.ops import exp
+from openvino.runtime.opset9.ops import eye
+from openvino.runtime.opset1.ops import fake_quantize
+from openvino.runtime.opset1.ops import floor
+from openvino.runtime.opset1.ops import floor_mod
+from openvino.runtime.opset8.ops import gather
+from openvino.runtime.opset6.ops import gather_elements
+from openvino.runtime.opset8.ops import gather_nd
+from openvino.runtime.opset1.ops import gather_tree
+from openvino.runtime.opset7.ops import gelu
+from openvino.runtime.opset9.ops import generate_proposals
+from openvino.runtime.opset1.ops import greater
+from openvino.runtime.opset1.ops import greater_equal
+from openvino.runtime.opset9.ops import grid_sample
+from openvino.runtime.opset1.ops import grn
+from openvino.runtime.opset1.ops import group_convolution
+from openvino.runtime.opset1.ops import group_convolution_backprop_data
+from openvino.runtime.opset3.ops import gru_cell
+from openvino.runtime.opset5.ops import gru_sequence
+from openvino.runtime.opset1.ops import hard_sigmoid
+from openvino.runtime.opset5.ops import hsigmoid
+from openvino.runtime.opset4.ops import hswish
+from openvino.runtime.opset7.ops import idft
+from openvino.runtime.opset8.ops import if_op
+from openvino.runtime.opset11.ops import interpolate
+from openvino.runtime.opset9.ops import irdft
+from openvino.runtime.opset10.ops import is_finite
+from openvino.runtime.opset10.ops import is_inf
+from openvino.runtime.opset10.ops import is_nan
+from openvino.runtime.opset8.ops import i420_to_bgr
+from openvino.runtime.opset8.ops import i420_to_rgb
+from openvino.runtime.opset1.ops import less
+from openvino.runtime.opset1.ops import less_equal
+from openvino.runtime.opset1.ops import log
+from openvino.runtime.opset1.ops import logical_and
+from openvino.runtime.opset1.ops import logical_not
+from openvino.runtime.opset1.ops import logical_or
+from openvino.runtime.opset1.ops import logical_xor
+from openvino.runtime.opset5.ops import log_softmax
+from openvino.runtime.opset5.ops import loop
+from openvino.runtime.opset1.ops import lrn
+from openvino.runtime.opset4.ops import lstm_cell
+from openvino.runtime.opset5.ops import lstm_sequence
+from openvino.runtime.opset1.ops import matmul
+from openvino.runtime.opset8.ops import matrix_nms
+from openvino.runtime.opset8.ops import max_pool
+from openvino.runtime.opset1.ops import maximum
+from openvino.runtime.opset1.ops import minimum
+from openvino.runtime.opset4.ops import mish
+from openvino.runtime.opset1.ops import mod
+from openvino.runtime.opset9.ops import multiclass_nms
+from openvino.runtime.opset1.ops import multiply
+from openvino.runtime.opset6.ops import mvn
+from openvino.runtime.opset1.ops import negative
+from openvino.runtime.opset9.ops import non_max_suppression
+from openvino.runtime.opset3.ops import non_zero
+from openvino.runtime.opset1.ops import normalize_l2
+from openvino.runtime.opset1.ops import not_equal
+from openvino.runtime.opset8.ops import nv12_to_bgr
+from openvino.runtime.opset8.ops import nv12_to_rgb
+from openvino.runtime.opset1.ops import one_hot
+from openvino.runtime.opset1.ops import pad
+from openvino.runtime.opset1.ops import parameter
+from openvino.runtime.opset1.ops import power
+from openvino.runtime.opset1.ops import prelu
+from openvino.runtime.opset8.ops import prior_box
+from openvino.runtime.opset1.ops import prior_box_clustered
+from openvino.runtime.opset1.ops import psroi_pooling
+from openvino.runtime.opset4.ops import proposal
+from openvino.runtime.opset1.ops import range
+from openvino.runtime.opset8.ops import random_uniform
+from openvino.runtime.opset9.ops import rdft
+from openvino.runtime.opset3.ops import read_value
+from openvino.runtime.opset4.ops import reduce_l1
+from openvino.runtime.opset4.ops import reduce_l2
+from openvino.runtime.opset1.ops import reduce_logical_and
+from openvino.runtime.opset1.ops import reduce_logical_or
+from openvino.runtime.opset1.ops import reduce_max
+from openvino.runtime.opset1.ops import reduce_mean
+from openvino.runtime.opset1.ops import reduce_min
+from openvino.runtime.opset1.ops import reduce_prod
+from openvino.runtime.opset1.ops import reduce_sum
+from openvino.runtime.opset1.ops import region_yolo
+from openvino.runtime.opset2.ops import reorg_yolo
+from openvino.runtime.opset1.ops import relu
+from openvino.runtime.opset1.ops import reshape
+from openvino.runtime.opset1.ops import result
+from openvino.runtime.opset1.ops import reverse_sequence
+from openvino.runtime.opset3.ops import rnn_cell
+from openvino.runtime.opset5.ops import rnn_sequence
+from openvino.runtime.opset9.ops import roi_align
+from openvino.runtime.opset2.ops import roi_pooling
+from openvino.runtime.opset7.ops import roll
+from openvino.runtime.opset5.ops import round
+from openvino.runtime.opset3.ops import scatter_elements_update
+from openvino.runtime.opset3.ops import scatter_update
+from openvino.runtime.opset1.ops import select
+from openvino.runtime.opset1.ops import selu
+from openvino.runtime.opset3.ops import shape_of
+from openvino.runtime.opset3.ops import shuffle_channels
+from openvino.runtime.opset1.ops import sigmoid
+from openvino.runtime.opset1.ops import sign
+from openvino.runtime.opset1.ops import sin
+from openvino.runtime.opset1.ops import sinh
+from openvino.runtime.opset8.ops import slice
+from openvino.runtime.opset8.ops import softmax
+from openvino.runtime.opset4.ops import softplus
+from openvino.runtime.opset9.ops import softsign
+from openvino.runtime.opset2.ops import space_to_batch
+from openvino.runtime.opset1.ops import space_to_depth
+from openvino.runtime.opset1.ops import split
+from openvino.runtime.opset1.ops import sqrt
+from openvino.runtime.opset1.ops import squared_difference
+from openvino.runtime.opset1.ops import squeeze
+from openvino.runtime.opset1.ops import strided_slice
+from openvino.runtime.opset1.ops import subtract
+from openvino.runtime.opset4.ops import swish
+from openvino.runtime.opset1.ops import tan
+from openvino.runtime.opset1.ops import tanh
+from openvino.runtime.opset1.ops import tensor_iterator
+from openvino.runtime.opset1.ops import tile
+from openvino.runtime.opset3.ops import topk
+from openvino.runtime.opset1.ops import transpose
+from openvino.runtime.opset10.ops import unique
+from openvino.runtime.opset1.ops import unsqueeze
+from openvino.runtime.opset1.ops import variadic_split
diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/runtime/opset11/ops.py
new file mode 100644
index 00000000000000..2a54db0069ebd1
--- /dev/null
+++ b/src/bindings/python/src/openvino/runtime/opset11/ops.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Factory functions for all openvino ops."""
+from functools import partial
+from typing import List, Optional
+
+from openvino.runtime import Node
+from openvino.runtime.opset_utils import _get_node_factory
+from openvino.runtime.utils.decorators import nameable_op
+from openvino.runtime.utils.types import (
+    NodeInput,
+    as_nodes,
+)
+
+_get_node_factory_opset11 = partial(_get_node_factory, "opset11")
+
+# -------------------------------------------- ops ------------------------------------------------
+
+
+@nameable_op
+def interpolate(
+    image: NodeInput,
+    scales_or_sizes: NodeInput,
+    mode: str,
+    shape_calculation_mode: str,
+    pads_begin: Optional[List[int]] = None,
+    pads_end: Optional[List[int]] = None,
+    coordinate_transformation_mode: str = "half_pixel",
+    nearest_mode: str = "round_prefer_floor",
+    antialias: bool = False,
+    cube_coeff: float = -0.75,
+    axes: Optional[NodeInput] = None,
+    name: Optional[str] = None,
+) -> Node:
+    """Perfors the interpolation of the input tensor.
+
+    :param  image:         The node providing input tensor with data for interpolation.
+    :param  scales_or_sizes:
+                           1D tensor providing information used to calculate the output shape
+                           of the operation. It might contain floats (scales) or integers(sizes).
+    :param  mode:          Specifies type of interpolation. Possible values are: nearest, linear,
+                           linear_onnx, cubic, bilinear_pillow, bicubic_pillow.
+    :param  shape_calculation_mode:
+                           Specifies how the scales_or_sizes input should be interpreted.
+    :param  pads_begin:    Specifies the number of pixels to add to the beginning of the image
+                           being interpolated. Default is None.
+    :param  pads_end:      Specifies the number of pixels to add to the end of the image being
+                           interpolated. Default is None.
+    :param  coordinate_transformation_mode:
+                           Specifies how to transform the coordinate in the resized tensor to the
+                           coordinate in the original tensor. Default is "half_pixel".
+    :param  nearest_mode:  Specifies round mode when mode == nearest and is used only when
+                           mode == nearest. Default is "round_prefer_floor".
+    :param  antialias:     Specifies whether to perform anti-aliasing. Default is False.
+    :param  cube_coeff:    Specifies the parameter a for cubic interpolation. Default is -0.75.
+    :param  axes:          1D tensor specifying dimension indices where interpolation is applied.
+                           The default is None.
+    :param  name:          Optional name for the output node. The default is None.
+    :return: Node representing the interpolation operation.
+    """
+    attrs = {
+        "mode": mode,
+        "shape_calculation_mode": shape_calculation_mode,
+        "coordinate_transformation_mode": coordinate_transformation_mode,
+        "nearest_mode": nearest_mode,
+        "antialias": antialias,
+        "cube_coeff": cube_coeff,
+    }
+
+    attrs["pads_begin"] = [] if pads_begin is None else pads_begin
+    attrs["pads_end"] = [] if pads_end is None else pads_end
+
+    inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes)
+
+    return _get_node_factory_opset11().create("Interpolate", inputs, attrs)
diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
index a89c05ab0cffb6..f952bcf90fb4dc 100644
--- a/src/bindings/python/src/openvino/runtime/utils/node_factory.py
+++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py
@@ -13,7 +13,7 @@
 
 from openvino.runtime.exceptions import UserInputError
 
-DEFAULT_OPSET = "opset10"
+DEFAULT_OPSET = "opset11"
 
 
 class NodeFactory(object):
diff --git a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp
index bdf7c982b3e8fd..9aed62c2e00a17 100644
--- a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp
@@ -79,7 +79,7 @@ class NodeFactory {
         return it->second();
     }
 
-    const ov::OpSet& m_opset = ov::get_opset10();
+    const ov::OpSet& m_opset = ov::get_opset11();
     std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> m_variables;
 };
 }  // namespace
diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py
index 120f07562d4457..f76ed01641a6d5 100644
--- a/src/bindings/python/tests/test_graph/test_create_op.py
+++ b/src/bindings/python/tests/test_graph/test_create_op.py
@@ -11,7 +11,8 @@
 
 import openvino.runtime.opset1 as ov_opset1
 import openvino.runtime.opset5 as ov_opset5
-import openvino.runtime.opset10 as ov
+import openvino.runtime.opset10 as ov_opset10
+import openvino.runtime.opset11 as ov
 from openvino.runtime import Type
 
 np_types = [np.float32, np.int32]
@@ -2145,8 +2146,29 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode):
     axes = [2, 3]
     mode = "cubic"
 
-    node = ov.interpolate(image=image_node, output_shape=output_shape, scales=scales,
-                          axes=axes, mode=mode,
+    node = ov_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales,
+                                  axes=axes, mode=mode, shape_calculation_mode=shape_calculation_mode)
+    assert node.get_type_name() == "Interpolate"
+    assert node.get_output_size() == 1
+    assert list(node.get_output_shape(0)) == expected_shape
+
+
+@pytest.mark.parametrize(
+    ("expected_shape", "shape_calculation_mode", "input_value"),
+    [
+        ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)),
+        ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)),
+    ],
+)
+@pytest.mark.parametrize("dtype", np_types)
+def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value):
+
+    image_shape = [1, 3, 1024, 1024]
+    image_node = ov.parameter(image_shape, dtype, name="Image")
+    axes = [2, 3]
+    mode = "bilinear_pillow"
+
+    node = ov.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode,
                           shape_calculation_mode=shape_calculation_mode)
     assert node.get_type_name() == "Interpolate"
     assert node.get_output_size() == 1
diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
index 7b084bc77a81d0..09fda90564bd01 100644
--- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
+++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
@@ -11,6 +11,7 @@
 import ngraph.opset1 as ng_opset1
 import ngraph.opset5 as ng_opset5
 import ngraph.opset10 as ng_opset10
+import ngraph.opset11 as ng_opset11
 from ngraph.utils.types import make_constant_node
 from ngraph.exceptions import UserInputError
 from ngraph.impl import Type
@@ -2259,13 +2260,33 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode):
     mode = "cubic"
 
     node = ng_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales,
-                                  axes=axes,
-                                  mode=mode, shape_calculation_mode=shape_calculation_mode)
+                                  axes=axes,mode=mode, shape_calculation_mode=shape_calculation_mode)
     assert node.get_type_name() == "Interpolate"
     assert node.get_output_size() == 1
     assert list(node.get_output_shape(0)) == expected_shape
 
 
+@pytest.mark.parametrize(
+    ("expected_shape", "shape_calculation_mode", "input_value"),
+    [
+        ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)),
+        ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)),
+    ],
+)
+@pytest.mark.parametrize("dtype", np_types)
+def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value):
+
+    image_shape = [1, 3, 1024, 1024]
+    image_node = ng.parameter(image_shape, dtype, name="Image")
+    axes = [2, 3]
+    mode = "bilinear_pillow"
+
+    node = ng_opset11.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode,
+                                  shape_calculation_mode=shape_calculation_mode)
+    assert node.get_type_name() == "Interpolate"
+    assert node.get_output_size() == 1
+    assert list(node.get_output_shape(0)) == expected_shape
+    
 def test_is_finite_opset10():
     input_shape = [1, 2, 3, 4]
     input_node = ng.parameter(input_shape, np.float32, name="InputData")

From 6bf2fe11aeb891eb66db37932df281a982f90369 Mon Sep 17 00:00:00 2001
From: Kelvin Choi <kelvin.choi@intel.com>
Date: Thu, 23 Mar 2023 05:00:29 +0900
Subject: [PATCH 045/296] [GPU] Need to exclude fused mem_dep from
 shape_infer_dep (#16300)

---
 .../src/graph/include/program_node.h          |  5 ++
 .../intel_gpu/src/graph/primitive_inst.cpp    |  4 +
 .../intel_gpu/src/graph/program_node.cpp      | 16 ++++
 .../test_cases/deconvolution_gpu_test.cpp     | 73 +++++++++++++++++++
 4 files changed, 98 insertions(+)

diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h
index 1f72b49bd8b5d1..4d353c270706ef 100644
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@@ -91,6 +91,9 @@ struct program_node {
                 if (u->get_dependencies().size() <= dep_idx) {
                     continue;
                 }
+                if (u->is_fused_dep(dep_idx)) {
+                    continue;
+                }
                 if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
                     return true;
                 }
@@ -99,6 +102,8 @@ struct program_node {
         return false;
     }
 
+    bool is_fused_dep(size_t dep_idx) const;
+
     std::map<size_t, memory::ptr> get_const_memory_deps() const;
 
     virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index aae9e844a4eeb6..4acd2d02c808e6 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -190,6 +190,10 @@ void primitive_inst::update_shape() {
         }
         auto& dep = _node->get_dependency(i);
         auto dep_id = dep.id();
+        // exclude fused node from memory_deps
+        if (_node->is_fused_dep(i)) {
+            break;
+        }
         // Events may be not created for in-order queue, so take them for OOO queue only
         if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) {
             dependencies_events.push_back(_network.get_primitive_event(dep_id));
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 5b66ad11a25149..70cc56d4420f0a 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -378,6 +378,16 @@ bool program_node::has_padded_dependency() const {
     });
 }
 
+bool program_node::is_fused_dep(size_t dep_idx) const {
+    for (auto fused : get_fused_primitives()) {
+        if (dep_idx >= fused.dep_start_idx) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
     std::map<size_t, memory::ptr> mem_deps;
     for (auto& i : get_shape_infer_dependencies()) {
@@ -385,6 +395,12 @@ std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
         if (i >= get_dependencies().size())
             continue;
 
+        // exclude fused dependency
+        if (is_fused_dep(i)) {
+            continue;
+        }
+
+        // constant type only
         auto& dep = get_dependency(i);
         if (dep.is_type<data>()) {
             mem_deps.insert({i, dep.as<data>().get_attached_memory_ptr()});
diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
index a218640354ae95..1f47df7623195e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
@@ -7,6 +7,7 @@
 #include <intel_gpu/primitives/input_layout.hpp>
 #include <intel_gpu/primitives/deconvolution.hpp>
 #include <intel_gpu/primitives/crop.hpp>
+#include <intel_gpu/primitives/eltwise.hpp>
 #include <intel_gpu/primitives/reorder.hpp>
 #include <intel_gpu/primitives/data.hpp>
 
@@ -258,6 +259,78 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
     }
 }
 
+
+TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad_exclude_fused_mem_dep) {
+    //  Filter : 2x2
+    //  Input  : 2x2
+    //  Output : 3x3
+    //
+    //  Input:
+    //  8  0.5
+    //  6  9
+    //
+    //  Filter
+    //  -2   0.5
+    //   3.5 1.5
+    //
+    //  no bias
+    //
+    //
+    //  Output:
+    // -16.f, 3.f, 0.25f,
+    // 16.f, -1.25f, 5.25f,
+    // 21.f, 40.5f, 13.5f
+
+    auto& engine = get_test_engine();
+
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } });
+    auto elt_input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 9, 1, 1, 1 } });
+    auto in_layout = layout(ov::PartialShape::dynamic(4), data_types::f32, format::yxfb);
+
+    set_values(input, { 8.f, 0.5f, 6.f, 9.f });
+    set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
+    set_values(elt_input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
+
+    topology topology(
+        input_layout("input", in_layout),
+        input_layout("elt_input", elt_input->get_layout()),
+        reorder("reordered_input", input_info("input"), this->input_layout_format, data_types::f32),
+        reorder("reordered_elt_input", input_info("elt_input"), format::bfyx, data_types::f32),
+        data("weights", weights),
+        deconvolution("deconv", input_info("reordered_input"), { "weights" }),
+        eltwise("elt_scale", { input_info("deconv"), input_info("reordered_elt_input") }, eltwise_mode::prod),
+        reorder("plane_output", input_info("elt_scale"), format::bfyx, data_types::f32)
+    );
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    network network(engine, topology, config);
+    network.set_input_data("input", input);
+    network.set_input_data("elt_input", elt_input);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "plane_output");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
+
+    std::vector<float> expected_output_vec = {
+        -16.f, 3.f, 0.25f,
+        16.f, -1.25f, 5.25f,
+        21.f, 40.5f, 13.5f
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        ASSERT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
 TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filter : 2x2
     //  Input  : 2x2
     //  Output : 3x3

From a205c675db9d65f21427c73f6b8c38c7a2ec616d Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 23 Mar 2023 08:32:36 +0400
Subject: [PATCH 046/296] Fix leftovers after removing plugins.xml (#16487)

* Fixed comments

* Rename ie_plugins to ov_plugins

* Remove dependency from tests
---
 .../plugins/create_plugins_hpp.cmake          | 34 +++++++++----------
 cmake/developer_package/plugins/plugins.cmake | 26 +++++++-------
 .../developer_package/plugins/plugins.hpp.in  |  4 +--
 cmake/extra_modules.cmake                     |  4 +--
 scripts/setupvars/setupvars.sh                | 12 +++----
 src/common/util/CMakeLists.txt                |  9 ++---
 src/common/util/src/file_util.cpp             |  3 +-
 src/inference/CMakeLists.txt                  |  2 +-
 src/inference/src/core.cpp                    |  1 -
 src/inference/src/dev/core_impl.cpp           |  9 ++---
 src/inference/src/dev/core_impl.hpp           |  2 --
 src/inference/src/ie_core.cpp                 |  1 -
 src/inference/tests/unit/CMakeLists.txt       |  3 --
 13 files changed, 48 insertions(+), 62 deletions(-)

diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake
index 10adcac6c28f1f..1fedf858ce58ca 100644
--- a/cmake/developer_package/plugins/create_plugins_hpp.cmake
+++ b/cmake/developer_package/plugins/create_plugins_hpp.cmake
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-foreach(var IE_DEVICE_MAPPING OV_DYNAMIC IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN)
+foreach(var OV_DEVICE_MAPPING BUILD_SHARED_LIBS OV_PLUGINS_HPP_HEADER OV_PLUGINS_HPP_HEADER_IN)
     if(NOT DEFINED ${var})
         message(FATAL_ERROR "${var} is required, but not defined")
     endif()
@@ -10,11 +10,11 @@ endforeach()
 
 # configure variables
 
-set(IE_PLUGINS_DECLARATIONS "")
-set(IE_PLUGINS_MAP_DEFINITION
+set(OV_PLUGINS_DECLARATIONS "")
+set(OV_PLUGINS_MAP_DEFINITION
     "    static const std::map<Key, Value> plugins_hpp = {")
 
-foreach(dev_map IN LISTS IE_DEVICE_MAPPING)
+foreach(dev_map IN LISTS OV_DEVICE_MAPPING)
     string(REPLACE ":" ";" dev_map "${dev_map}")
     list(GET dev_map 0 mapped_dev_name)
     list(GET dev_map 1 actual_dev_name)
@@ -35,30 +35,30 @@ foreach(dev_map IN LISTS IE_DEVICE_MAPPING)
     set(dev_config "${dev_config}}")
 
 
-    if(NOT OV_DYNAMIC)
+    if(NOT BUILD_SHARED_LIBS)
         # common
-        set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}")
-        set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}")
+        set(_OV_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}")
+        set(_OV_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}")
 
         # declarations
-        set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
-        IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});")
+        set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS}
+        IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_PLUGIN_FUNC});")
         if(${actual_dev_name}_AS_EXTENSION)
-            set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS}
-            IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});")
+            set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS}
+            IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_EXTENSION_FUNC});")
         else()
-            set(_IE_CREATE_EXTENSION_FUNC "nullptr")
+            set(_OV_CREATE_EXTENSION_FUNC "nullptr")
         endif()
 
-        set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
-        { \"${mapped_dev_name}\", Value { ${_IE_CREATE_PLUGIN_FUNC}, ${_IE_CREATE_EXTENSION_FUNC}, ${dev_config} } },")
+        set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION}
+        { \"${mapped_dev_name}\", Value { ${_OV_CREATE_PLUGIN_FUNC}, ${_OV_CREATE_EXTENSION_FUNC}, ${dev_config} } },")
     else()
-        set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
+        set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION}
         { \"${mapped_dev_name}\", Value { \"${actual_dev_name}\", ${dev_config} } },")
     endif()
 endforeach()
 
-set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION}
+set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION}
     };\n")
 
-configure_file("${IE_PLUGINS_HPP_HEADER_IN}" "${IE_PLUGINS_HPP_HEADER}" @ONLY)
+configure_file("${OV_PLUGINS_HPP_HEADER_IN}" "${OV_PLUGINS_HPP_HEADER}" @ONLY)
diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
index 7f00cc70269861..0d8db5561e5ada 100644
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@@ -281,9 +281,9 @@ function(ie_target_link_plugins TARGET_NAME)
 endfunction()
 
 #
-# ie_generate_plugins_hpp()
+# ov_generate_plugins_hpp()
 #
-function(ie_generate_plugins_hpp)
+function(ov_generate_plugins_hpp)
     set(device_mapping)
     set(device_configs)
     set(as_extension)
@@ -321,22 +321,22 @@ function(ie_generate_plugins_hpp)
         endif()
     endforeach()
 
-    # add plugins to libraries including ie_plugins.hpp
+    # add plugins to libraries including ov_plugins.hpp
     ie_target_link_plugins(openvino)
     if(TARGET inference_engine_s)
         ie_target_link_plugins(inference_engine_s)
     endif()
 
-    set(ie_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ie_plugins.hpp")
+    set(ov_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ov_plugins.hpp")
     set(plugins_hpp_in "${IEDevScripts_DIR}/plugins/plugins.hpp.in")
 
-    add_custom_command(OUTPUT "${ie_plugins_hpp}"
+    add_custom_command(OUTPUT "${ov_plugins_hpp}"
                        COMMAND
                         "${CMAKE_COMMAND}"
-                        -D "IE_DEVICE_MAPPING=${device_mapping}"
-                        -D "OV_DYNAMIC=${BUILD_SHARED_LIBS}"
-                        -D "IE_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}"
-                        -D "IE_PLUGINS_HPP_HEADER=${ie_plugins_hpp}"
+                        -D "BUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}"
+                        -D "OV_DEVICE_MAPPING=${device_mapping}"
+                        -D "OV_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}"
+                        -D "OV_PLUGINS_HPP_HEADER=${ov_plugins_hpp}"
                         ${device_configs}
                         ${as_extension}
                         -P "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake"
@@ -344,13 +344,13 @@ function(ie_generate_plugins_hpp)
                          "${plugins_hpp_in}"
                          "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake"
                        COMMENT
-                         "Generate ie_plugins.hpp for build"
+                         "Generate ov_plugins.hpp for build"
                        VERBATIM)
 
     # for some reason dependency on source files does not work
     # so, we have to use explicit target and make it dependency for inference_engine
-    add_custom_target(_ie_plugins_hpp DEPENDS ${ie_plugins_hpp})
-    add_dependencies(inference_engine_obj _ie_plugins_hpp)
+    add_custom_target(_ov_plugins_hpp DEPENDS ${ov_plugins_hpp})
+    add_dependencies(inference_engine_obj _ov_plugins_hpp)
 
     # add dependency for object files
     get_target_property(sources inference_engine_obj SOURCES)
@@ -367,5 +367,5 @@ function(ie_generate_plugins_hpp)
     endforeach()
 
     # add dependency on header file generation for all inference_engine source files
-    set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ie_plugins_hpp})
+    set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ov_plugins_hpp})
 endfunction()
diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in
index d351bcfb76f3d0..224f77c8cb980b 100644
--- a/cmake/developer_package/plugins/plugins.hpp.in
+++ b/cmake/developer_package/plugins/plugins.hpp.in
@@ -11,7 +11,7 @@
 
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 
-@IE_PLUGINS_DECLARATIONS@
+@OV_PLUGINS_DECLARATIONS@
 
 struct Value {
     InferenceEngine::CreatePluginEngineFunc * m_create_plugin_func;
@@ -33,6 +33,6 @@ using PluginsStaticRegistry = std::map<Key, Value>;
 
 
 inline const std::map<Key, Value> getCompiledPluginsRegistry() {
-@IE_PLUGINS_MAP_DEFINITION@
+@OV_PLUGINS_MAP_DEFINITION@
     return plugins_hpp;
 }
diff --git a/cmake/extra_modules.cmake b/cmake/extra_modules.cmake
index 7b843341c11159..a5b1cd22e82ccf 100644
--- a/cmake/extra_modules.cmake
+++ b/cmake/extra_modules.cmake
@@ -169,9 +169,9 @@ ov_generate_dev_package_config()
 # with all imported developer targets
 register_extra_modules()
 
-# for static libraries case we need to generate final ie_plugins.hpp
+# for static libraries case we need to generate final ov_plugins.hpp
 # with all the information about plugins
-ie_generate_plugins_hpp()
+ov_generate_plugins_hpp()
 
 # used for static build
 ov_generate_frontends_hpp()
diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh
index 41789160e69a59..3cdf4987d732b7 100755
--- a/scripts/setupvars/setupvars.sh
+++ b/scripts/setupvars/setupvars.sh
@@ -36,15 +36,15 @@ if [ -e "$INSTALLDIR/runtime" ]; then
     export OpenVINO_DIR=$INSTALLDIR/runtime/cmake
 
     system_type=$(ls "$INSTALLDIR/runtime/lib/")
-    IE_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type
+    OV_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type
 
     if [[ "$OSTYPE" == "darwin"* ]]; then
-        export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}
-        export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
-        export PKG_CONFIG_PATH=${IE_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
+        export DYLD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}
+        export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
+        export PKG_CONFIG_PATH=${OV_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
     else
-        export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
-        export PKG_CONFIG_PATH=$IE_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
+        export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
+        export PKG_CONFIG_PATH=$OV_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
     fi
 
     if [ -e "$INSTALLDIR/runtime/3rdparty/tbb" ]; then
diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt
index 160be0259b8b30..4b538aab7c7461 100644
--- a/src/common/util/CMakeLists.txt
+++ b/src/common/util/CMakeLists.txt
@@ -24,12 +24,9 @@ endif()
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 
-set(MIXED_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp")
-
-set_property(SOURCE ${MIXED_SRC}
-    APPEND PROPERTY INCLUDE_DIRECTORIES
-        $<TARGET_PROPERTY:ngraph_obj,INTERFACE_INCLUDE_DIRECTORIES>)
+set_source_files_properties(
+    "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp"
+    PROPERTIES COMPILE_DEFINITIONS OpenVINO_VERSION="${OpenVINO_VERSION}")
 
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${PUBLIC_HEADERS})
diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp
index f39f2dd3c677d9..0de2ab70d377b4 100644
--- a/src/common/util/src/file_util.cpp
+++ b/src/common/util/src/file_util.cpp
@@ -12,7 +12,6 @@
 #include <fstream>
 #include <sstream>
 
-#include "openvino/core/version.hpp"
 #include "openvino/util/common_util.hpp"
 
 #ifdef _WIN32
@@ -512,7 +511,7 @@ ov::util::FilePath ov::util::get_compiled_plugin_path(const std::string& plugin)
 
     // 1. in openvino-X.Y.Z folder relative to libopenvino.so
     std::ostringstream str;
-    str << "openvino-" << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH;
+    str << "openvino-" << OpenVINO_VERSION;
     const auto sub_folder = str.str();
 
     std::string abs_file_path = ov::util::path_join({ov_library_path, sub_folder, plugin});
diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt
index ca27952acad912..25d4272b3d3620 100644
--- a/src/inference/CMakeLists.txt
+++ b/src/inference/CMakeLists.txt
@@ -152,7 +152,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE
 
 target_include_directories(${TARGET_NAME}_obj PRIVATE
     "${CMAKE_CURRENT_SOURCE_DIR}/src"
-    # for static ie_plugins.hpp
+    # for static ov_plugins.hpp
     "${CMAKE_CURRENT_BINARY_DIR}"
     # for ie_ir_version.hpp
     $<$<TARGET_EXISTS:inference_engine_ir_v7_reader>:$<TARGET_PROPERTY:inference_engine_ir_v7_reader,SOURCE_DIR>>
diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp
index fef2652b275d17..45c9e9665efb17 100644
--- a/src/inference/src/core.cpp
+++ b/src/inference/src/core.cpp
@@ -9,7 +9,6 @@
 #include "dev/converter_utils.hpp"
 #include "dev/core_impl.hpp"
 #include "ie_itt.hpp"
-#include "ie_plugins.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "so_extension.hpp"
 
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index d97a89f8f79411..3e696487e9d6a1 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -36,6 +36,7 @@
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
+#include "ov_plugins.hpp"
 #include "preprocessing/preprocessing.hpp"
 #include "xml_parse_utils.h"
 
@@ -316,12 +317,12 @@ void ov::CoreImpl::register_compile_time_plugins() {
     std::lock_guard<std::mutex> lock(get_mutex());
 
     const decltype(::getCompiledPluginsRegistry())& plugins = getCompiledPluginsRegistry();
-#ifdef OPENVINO_STATIC_LIBRARY
     for (const auto& plugin : plugins) {
         const auto& deviceName = plugin.first;
         if (deviceName.find('.') != std::string::npos) {
             OPENVINO_THROW("Device name must not contain dot '.' symbol");
         }
+#ifdef OPENVINO_STATIC_LIBRARY
         if (pluginRegistry.find(deviceName) == pluginRegistry.end()) {
             const auto& value = plugin.second;
             ov::AnyMap config = any_copy(value.m_default_config);
@@ -329,20 +330,16 @@ void ov::CoreImpl::register_compile_time_plugins() {
             pluginRegistry[deviceName] = desc;
             add_mutex(deviceName);
         }
-    }
 #else
-    for (const auto& plugin : plugins) {
-        const auto& deviceName = plugin.first;
         const auto& pluginPath = ov::util::get_compiled_plugin_path(plugin.second.m_plugin_path);
-
         if (pluginRegistry.find(deviceName) == pluginRegistry.end() && ov::util::file_exists(pluginPath)) {
             ov::AnyMap config = any_copy(plugin.second.m_default_config);
             PluginDescriptor desc{pluginPath, config};
             pluginRegistry[deviceName] = desc;
             add_mutex(deviceName);
         }
-    }
 #endif
+    }
 }
 
 void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_file, const bool& by_abs_path) {
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 8fe7768dc6c91a..c711e416484050 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -15,7 +15,6 @@
 #include "ie_cache_manager.hpp"
 #include "ie_extension.h"
 #include "ie_icore.hpp"
-#include "ie_plugins.hpp"
 #include "multi-device/multi_device_config.hpp"
 #include "openvino/core/any.hpp"
 #include "openvino/core/extension.hpp"
@@ -23,7 +22,6 @@
 #include "openvino/runtime/common.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
-#include "openvino/util/file_util.hpp"
 
 namespace ov {
 
diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp
index de604f6fab4f21..139c12d763145c 100644
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@@ -30,7 +30,6 @@
 #include "ie_network_reader.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "ie_plugin_config.hpp"
-#include "ie_plugins.hpp"
 #include "ie_remote_context.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/ngraph.hpp"
diff --git a/src/inference/tests/unit/CMakeLists.txt b/src/inference/tests/unit/CMakeLists.txt
index 36889d8548ede8..02fed6cfd7756a 100644
--- a/src/inference/tests/unit/CMakeLists.txt
+++ b/src/inference/tests/unit/CMakeLists.txt
@@ -11,9 +11,6 @@ ov_add_test_target(
             template_extension
         LINK_LIBRARIES
             unitTestUtils
-        INCLUDES
-        # for static ie_plugins.hpp
-        "${CMAKE_BINARY_DIR}/src/inference/"
         ADD_CLANG_FORMAT
         LABELS
             OV

From a20b3631fb5d253bde84d82087cb11b3dee78c59 Mon Sep 17 00:00:00 2001
From: Jade Cho <jade.cho@intel.com>
Date: Thu, 23 Mar 2023 13:55:55 +0900
Subject: [PATCH 047/296] Support float64 data type as input of benchmark_app
 (#16435)

---
 samples/cpp/benchmark_app/remote_tensors_filling.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/samples/cpp/benchmark_app/remote_tensors_filling.cpp b/samples/cpp/benchmark_app/remote_tensors_filling.cpp
index fa139d7485f141..9301a8d113210f 100644
--- a/samples/cpp/benchmark_app/remote_tensors_filling.cpp
+++ b/samples/cpp/benchmark_app/remote_tensors_filling.cpp
@@ -40,8 +40,10 @@ void fill_buffer_random(void* inputBuffer,
 void fill_buffer(void* inputBuffer, size_t elementsNum, const ov::element::Type& type) {
     if (type == ov::element::f32) {
         fill_buffer_random<float, float>(inputBuffer, elementsNum);
+    } else if (type == ov::element::f64) {
+        fill_buffer_random<double, double>(inputBuffer, elementsNum);
     } else if (type == ov::element::f16) {
-        fill_buffer_random<short, short>(inputBuffer, elementsNum);
+        fill_buffer_random<ov::float16, float>(inputBuffer, elementsNum);
     } else if (type == ov::element::i32) {
         fill_buffer_random<int32_t, int32_t>(inputBuffer, elementsNum);
     } else if (type == ov::element::i64) {

From 17174a3839f4fb85350b499c620e18facc271474 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Thu, 23 Mar 2023 08:39:46 +0100
Subject: [PATCH 048/296] DOCS shift to rst - Troubleshooting (#16483)

* troubleshooting
* code-block fix
---
 .../installing-openvino-yocto.md              |  2 +-
 docs/install_guides/troubleshooting-issues.md | 75 ++++++++++------
 docs/install_guides/troubleshooting-steps.md  | 89 +++++++++++--------
 docs/install_guides/troubleshooting.md        | 12 ++-
 4 files changed, 104 insertions(+), 74 deletions(-)

diff --git a/docs/install_guides/installing-openvino-yocto.md b/docs/install_guides/installing-openvino-yocto.md
index 2aa69d56d3db52..479d03f2e87c49 100644
--- a/docs/install_guides/installing-openvino-yocto.md
+++ b/docs/install_guides/installing-openvino-yocto.md
@@ -116,7 +116,7 @@ If the image build is successful, it will return the list of packages as below:
 Additional Resources
 ####################
 
-- :ref:`Troubleshooting Guide <yocto-install-issues>`
+- :ref:`Troubleshooting Guide <yocto_install_issues>`
 - `Yocto Project <https://docs.yoctoproject.org/>`__ - official documentation webpage
 - `BitBake Tool <https://docs.yoctoproject.org/bitbake/>`__
 - `Poky <https://git.yoctoproject.org/poky>`__
diff --git a/docs/install_guides/troubleshooting-issues.md b/docs/install_guides/troubleshooting-issues.md
index fd539d6ea64845..a381f0f46c1517 100644
--- a/docs/install_guides/troubleshooting-issues.md
+++ b/docs/install_guides/troubleshooting-issues.md
@@ -1,52 +1,71 @@
 # Issues & Solutions for OpenVINO™ Installation & Configuration {#openvino_docs_get_started_guide_troubleshooting_issues}
 
+@sphinxdirective
+
 This page lists issues that you may encounter during the installation and configuration of OpenVINO™, as well as their possible solutions.
 
-## <a name="install-for-prc"></a>Errors with Installing via PIP for Users in China
+.. _install_for_prc:
+
+Errors with Installing via PIP for Users in China
+#################################################
 
 Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try one of the following options:
    
 * Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: 
 
-   ``` sh
-   pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/
-   ```
-   Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``.
-   You can also run the following command to install specific framework. For example:
+  .. code-block:: sh
+     
+     pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/
+  
+  Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``.
+  You can also run the following command to install specific framework. For example:
+   
+  .. code-block:: sh
+     
+     pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/
    
-   ```
-   pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/
-   ```
 
 * For C++ developers, if you have installed OpenVINO Runtime via APT, YUM, or the archive file, and then installed OpenVINO Development Tools via PyPI, you may run into issues. To resolve that, install the components in ``requirements.txt`` by using the following command: 
-   ``` sh
-   pip install -r <INSTALL_DIR>/tools/requirements.txt
-   ```
-  For APT and YUM users, replace the `INSTALL_DIR` with `/usr/share/openvino`.
+   
+  .. code-block:: sh
+     
+     pip install -r <INSTALL_DIR>/tools/requirements.txt
+   
+  For APT and YUM users, replace the ``INSTALL_DIR`` with ``/usr/share/openvino``.
 
 <!-- this part was from Docker installation -->
 
-## Issues with Installing OpenVINO on Linux from Docker
+Issues with Installing OpenVINO on Linux from Docker
+####################################################
+
+.. _proxy-issues:
 
-### <a name="proxy-issues"></a>Proxy Issues
+Proxy Issues
+++++++++++++
 
-If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Docker guide](https://docs.docker.com/network/proxy/) for more details.
+If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the `Docker guide <https://docs.docker.com/network/proxy/>`__ for more details.
 
+.. _yocto_install_issues:
 
-@anchor yocto-install-issues
-## Issues with Creating a Yocto Image for OpenVINO
+Issues with Creating a Yocto Image for OpenVINO
+###############################################
 
-### Error while adding "meta-intel" layer
+Error while adding "meta-intel" layer
++++++++++++++++++++++++++++++++++++++
 
-When using the `bitbake-layers add-layer meta-intel` command, the following error might occur:
-```sh
-NOTE: Starting bitbake server...
-ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd
-```
+When using the ``bitbake-layers add-layer meta-intel`` command, the following error might occur:
 
-To resolve the issue, install the `chrpath diffstat zstd` tools:
+.. code-block:: sh
+   
+   NOTE: Starting bitbake server...
+   ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd
+
+
+To resolve the issue, install the ``chrpath diffstat zstd`` tools:
+
+.. code-block:: sh
+   
+   sudo apt-get install chrpath diffstat zstd
 
-```sh
-sudo apt-get install chrpath diffstat zstd
-```
+@endsphinxdirective
 
diff --git a/docs/install_guides/troubleshooting-steps.md b/docs/install_guides/troubleshooting-steps.md
index ec4049f445325b..fd5f03c73b21d2 100644
--- a/docs/install_guides/troubleshooting-steps.md
+++ b/docs/install_guides/troubleshooting-steps.md
@@ -1,56 +1,69 @@
 # Troubleshooting Steps for OpenVINO™ Installation and Configurations {#openvino_docs_get_started_guide_troubleshooting_steps}
 
+@sphinxdirective
+
 If you run into issues while installing or configuring OpenVINO™, you can try the following methods to do some quick checks first. 
 
-## Check the versions of OpenVINO Runtime and Developement Tools
+Check the versions of OpenVINO Runtime and Developement Tools
+#############################################################
 
 * To check the version of OpenVINO Development Tools, use the following command:
-   ```sh
-   mo --version
-   ```
+   
+  .. code-block:: sh
+     
+     mo --version
+   
 * To check the version of OpenVINO Runtime, use the following code:
-   ```sh
-   from openvino.runtime import get_version get_version()
-   ```
+   
+  .. code-block:: sh
+     
+     from openvino.runtime import get_version get_version()
+   
 
-## Check the versions of Python and PIP
+Check the versions of Python and PIP
+####################################
 
-To check your Python version, run `python -VV` or `python --version`. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher.
+To check your Python version, run ``python -VV`` or ``python --version``. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher.
 
 If your Python version does not meet the requirements, update Python:
 
-* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from <https://www.python.org/>.
-* For Linux and macOS systems, download and install a proper Python version from <https://www.python.org/>. See the [Python Beginners' Guide](https://wiki.python.org/moin/BeginnersGuide/Download) for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website.
+* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from `official website <https://www.python.org/>`__ .
+* For Linux and macOS systems, download and install a proper Python version from `official website <https://www.python.org/>`__ . See the `Python Beginners' Guide <https://wiki.python.org/moin/BeginnersGuide/Download>`__ for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website.
 
 For PIP, make sure that you have installed the latest version. To check and upgrade your PIP version, run the following command:
-```sh
-python -m pip install --upgrade pip
-```
+
+.. code-block:: sh
+   
+   python -m pip install --upgrade pip
 
 <!--## Check the special tips for Anaconda installation-->
 
 <!--add this part in future-->
 
-
-## Check if required external dependencies are installed (for pre-2022.2 releases)
+Check if required external dependencies are installed (for pre-2022.2 releases)
+###############################################################################
 
 For OpenVINO releases prior to 2022.2:
-- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to [install OpenVINO Development Tools](installing-model-dev-tools.md), make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages.
-- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its [official support page](https://docs.microsoft.com/en-us/visualstudio/install/modify-visual-studio?view=vs-2019).
 
-## Check if environment variables are set correctly 
+- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to :doc:`install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>`, make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages.
+- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its `official support page <https://docs.microsoft.com/en-us/visualstudio/install/modify-visual-studio?view=vs-2019>`__ .
 
-- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with `setupvars` from `.bashrc`. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables.
-- If you have installed OpenVINO before, you probably have added `setupvars` to your `PATH /.bashrc` or Windows environment variables. After restarting your environment, you should see similar information as below: 
-```sh
-[setupvars.sh] OpenVINO™ environment initialized
-```
-   - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct <INSTALL_DIR> or you are trying to activate in the correct directory.
-   - If you added it to a `.bashrc` file, make sure that the command is correctly written and the file is found in the `~/.bashrc` folder.
+Check if environment variables are set correctly
+################################################
 
-## Verify that OpenVINO is correctly installed
+- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with ``setupvars`` from ``.bashrc``. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables.
+- If you have installed OpenVINO before, you probably have added ``setupvars`` to your ``PATH /.bashrc`` or Windows environment variables. After restarting your environment, you should see similar information as below: 
 
-@sphinxdirective
+  .. code-block:: sh
+     
+     [setupvars.sh] OpenVINO™ environment initialized
+     
+
+  - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct <INSTALL_DIR> or you are trying to activate in the correct directory.
+  - If you added it to a ``.bashrc`` file, make sure that the command is correctly written and the file is found in the ``~/.bashrc`` folder.
+
+Verify that OpenVINO is correctly installed
+###########################################
 
 * For Python developers, to verify if OpenVINO is correctly installed, use the following command:
 
@@ -66,24 +79,24 @@ For OpenVINO releases prior to 2022.2:
 
 * If you installed OpenVINO Runtime from YUM, use the ``yum list installed 'openvino*'`` command to list the installed OpenVINO packages.
 
-@endsphinxdirective
-
-## Check if GPU drvier is installed
+Check if GPU drvier is installed
+################################
 
-[Additional configurations](configurations-header.md) may be required in order to use OpenVINO with different hardware such as Intel® GPUs.
+:doc:`Additional configurations <openvino_docs_install_guides_configurations_header>` may be required in order to use OpenVINO with different hardware such as Intel® GPUs.
 
-To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see [additional configurations for GPU](configurations-for-intel-gpu.md).
+To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see :doc:`additional configurations for GPU <openvino_docs_install_guides_configurations_for_intel_gpu>`.
 
-## Check firewall and network settings
+Check firewall and network settings
+###################################
 
 Make sure that your firewall and network settings are configured correctly. For example, consider configuring system-wide proxy settings and specifying options for using PIP behind the proxy: 
 
-@sphinxdirective
+.. code-block:: sh
+
+   pip install --proxy http://address:port --trusted-host pypi.org openvino 
 
-   .. code-block:: sh
 
-      pip install --proxy http://address:port --trusted-host pypi.org openvino 
+For specific issues, see :ref:`Errors with Installing via PIP for Users in China <install_for_prc>` and :ref:`proxy issues with installing OpenVINO on Linux from Docker <proxy-issues>`. 
 
 @endsphinxdirective
 
-For specific issues, see <a href="openvino_docs_get_started_guide_troubleshooting_issues.html#install-for-prc">Errors with Installing via PIP for Users in China</a> and <a href="openvino_docs_get_started_guide_troubleshooting_issues.html#proxy-issues">proxy issues with installing OpenVINO on Linux from Docker</a>. 
\ No newline at end of file
diff --git a/docs/install_guides/troubleshooting.md b/docs/install_guides/troubleshooting.md
index 9963a579978025..99e3fd7ca8ea97 100644
--- a/docs/install_guides/troubleshooting.md
+++ b/docs/install_guides/troubleshooting.md
@@ -9,16 +9,14 @@
    Issues & Solutions <openvino_docs_get_started_guide_troubleshooting_issues>
    Troubleshooting Steps <openvino_docs_get_started_guide_troubleshooting_steps>
 
-@endsphinxdirective
-
-@sphinxdirective
 
 .. _troubleshooting guide for install:
 
-@endsphinxdirective
-
 This guide provides general troubleshooting steps and solutions to possible issues that can be encountered while installing and configuring OpenVINO™.
 
-The [Issues & Solutions](./troubleshooting-issues.md) page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution.
+The :doc:`Issues & Solutions <openvino_docs_get_started_guide_troubleshooting_issues>` page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution.
+
+The :doc:`Troubleshooting Steps <openvino_docs_get_started_guide_troubleshooting_steps>` page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue.
+
+@endsphinxdirective
 
-The [Troubleshooting Steps](./troubleshooting-steps.md) page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue.

From aaa4a4c2105f0e9268ecf0dbd16d59e9cf64ca4c Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Thu, 23 Mar 2023 11:49:46 +0400
Subject: [PATCH 049/296] [TF FE] Skip Assert operation and add test (#16484)

At the conversion stage we can't resolve Assert node because the condition
is computed only during inference time.

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../tensorflow/src/op/lookup_table_insert.cpp | 24 ------------
 src/frontends/tensorflow/src/op_table.cpp     |  7 ++--
 .../tests/convert_tricky_models.cpp           | 11 ++++++
 .../gen_scripts/generate_model_with_assert.py | 38 +++++++++++++++++++
 .../include/common_op_table.hpp               |  1 -
 .../tensorflow_common/src/op/assert.cpp       | 36 ------------------
 .../tensorflow_common/src/op/no_op.cpp        |  2 +-
 7 files changed, 53 insertions(+), 66 deletions(-)
 delete mode 100644 src/frontends/tensorflow/src/op/lookup_table_insert.cpp
 create mode 100644 src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py
 delete mode 100644 src/frontends/tensorflow_common/src/op/assert.cpp

diff --git a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp b/src/frontends/tensorflow/src/op/lookup_table_insert.cpp
deleted file mode 100644
index 3fb679e170be38..00000000000000
--- a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "common_op_table.hpp"
-#include "openvino/frontend/tensorflow/node_context.hpp"
-#include "utils.hpp"
-
-namespace ov {
-namespace frontend {
-namespace tensorflow {
-namespace op {
-
-OutputVector translate_lookup_table_insert_op(const ov::frontend::tensorflow::NodeContext& node) {
-    // auto-pruning of unsupported sub-graphs that contain
-    // operations working with dictionaries
-    default_op_checks(node, 3, {"LookupTableInsert", "LookupTableInsertV2"});
-    return {};
-}
-
-}  // namespace op
-}  // namespace tensorflow
-}  // namespace frontend
-}  // namespace ov
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index dddab20848841f..26a2a332345f99 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -26,7 +26,6 @@ TF_OP_CONVERTER(translate_gru_block_cell_op);
 TF_OP_CONVERTER(translate_hash_table_op);
 TF_OP_CONVERTER(translate_iterator_get_next_op);
 TF_OP_CONVERTER(translate_iterator_op);
-TF_OP_CONVERTER(translate_lookup_table_insert_op);
 TF_OP_CONVERTER(translate_partitioned_call_op);
 TF_OP_CONVERTER(translate_queue_dequeue_op);
 TF_OP_CONVERTER(translate_queue_dequeue_many_op);
@@ -105,7 +104,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"AddN", translate_add_n_op},
         {"ArgMax", translate_arg_max_op},
         {"ArgMin", translate_arg_min_op},
-        {"Assert", translate_assert_op},
+        {"Assert", translate_no_op},
         {"AvgPool", translate_avg_pool_op},
         {"AvgPool3D", translate_avg_pool_op},
         {"BatchMatMul", translate_batch_mat_mul_op},
@@ -164,8 +163,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"ListDiff", translate_list_diff_op},
         {"LogSoftmax", translate_log_softmax_op},
         {"Log1p", translate_log_1p_op},
-        {"LookupTableInsert", translate_lookup_table_insert_op},
-        {"LookupTableInsertV2", translate_lookup_table_insert_op},
+        {"LookupTableInsert", translate_no_op},
+        {"LookupTableInsertV2", translate_no_op},
         {"LRN", translate_lrn_op},
         {"MatMul", translate_mat_mul_op},
         {"MatrixDiag", translate_matrix_diag_op},
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
index 6dd2a5a510b325..e1bde1af03784f 100644
--- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp
+++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -346,6 +346,7 @@ TEST_F(TransformationTestsF, ModelWithIteratorGetNextAndUnsupportedOp) {
         model_ref = make_shared<Model>(OutputVector{add}, ParameterVector{x, y});
     }
 }
+
 TEST_F(TransformationTestsF, ModelWithMultioutputBodyGraphNode) {
     { model = convert_model("partitioned_call2/partitioned_call2.pb"); }
     {
@@ -376,3 +377,13 @@ TEST_F(TransformationTestsF, ModelWithEmptyTensorListAndPushBack) {
         model_ref = make_shared<Model>(OutputVector{recover_item}, ParameterVector{x});
     }
 }
+
+TEST_F(TransformationTestsF, ModelWithAssertNode) {
+    { model = convert_model("model_with_assert/model_with_assert.pb"); }
+    {
+        auto x = make_shared<Parameter>(i32, PartialShape{Dimension::dynamic()});
+        auto y = make_shared<Parameter>(i32, PartialShape{Dimension::dynamic()});
+        auto add = make_shared<Add>(x, y);
+        model_ref = make_shared<Model>(OutputVector{add}, ParameterVector{x, y});
+    }
+}
diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py
new file mode 100644
index 00000000000000..79c6d84bf2a2c5
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#
+# model with Assert node generator
+#
+
+import os
+import sys
+
+import numpy as np
+import tensorflow as tf
+
+
+def main():
+    tf.compat.v1.reset_default_graph()
+
+    # Create the graph and model
+    with tf.compat.v1.Session() as sess:
+        x = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='x')
+        y = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='y')
+        tf.raw_ops.AddV2(x=x, y=y)
+        shape1 = tf.raw_ops.Shape(input=x)
+        shape2 = tf.raw_ops.Shape(input=y)
+        equal = tf.raw_ops.Equal(x=shape1, y=shape2)
+        axis = tf.constant([0], dtype=tf.int32)
+        all_equal = tf.raw_ops.All(input=equal, axis=axis)
+        message = tf.constant("Shapes of operands are incompatible", dtype=tf.string)
+        tf.raw_ops.Assert(condition=all_equal, data=[message])
+
+        tf.compat.v1.global_variables_initializer()
+        tf_net = sess.graph_def
+
+    tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "model_with_assert"), "model_with_assert.pb", False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
index eeec128e8c1de3..20fefeabd811b4 100644
--- a/src/frontends/tensorflow_common/include/common_op_table.hpp
+++ b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -34,7 +34,6 @@ OP_T_CONVERTER(translate_direct_reduce_op);
 OP_CONVERTER(translate_add_n_op);
 OP_CONVERTER(translate_arg_max_op);
 OP_CONVERTER(translate_arg_min_op);
-OP_CONVERTER(translate_assert_op);
 OP_CONVERTER(translate_avg_pool_op);
 OP_CONVERTER(translate_batch_mat_mul_op);
 OP_CONVERTER(translate_batch_to_space_nd_op);
diff --git a/src/frontends/tensorflow_common/src/op/assert.cpp b/src/frontends/tensorflow_common/src/op/assert.cpp
deleted file mode 100644
index 5275e85a8c2edc..00000000000000
--- a/src/frontends/tensorflow_common/src/op/assert.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <numeric>
-
-#include "common_op_table.hpp"
-#include "openvino/core/validation_util.hpp"
-
-using namespace std;
-
-namespace ov {
-namespace frontend {
-namespace tensorflow {
-namespace op {
-
-OutputVector translate_assert_op(const NodeContext& node) {
-    default_op_checks(node, 1, {"Assert"});
-    auto cond = node.get_input(0);
-    auto cond_const = get_constant_from_source(cond);
-    TENSORFLOW_OP_VALIDATION(node,
-                             cond_const,
-                             "[TensorFlow Frontend] The condition must be constant for further model conversion.");
-    auto cond_values = cond_const->cast_vector<bool>();
-    TENSORFLOW_OP_VALIDATION(node,
-                             cond_values.size() == 1,
-                             "[TensorFlow Frontend] Incorrect model - the condition must have one element.");
-    TENSORFLOW_OP_VALIDATION(node,
-                             cond_values[0],
-                             "[TensorFlow Frontend] The condition must be true for further model conversion.");
-    return {};
-}
-}  // namespace op
-}  // namespace tensorflow
-}  // namespace frontend
-}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/op/no_op.cpp b/src/frontends/tensorflow_common/src/op/no_op.cpp
index 9d8552b6f71c24..388d5c5c6af573 100644
--- a/src/frontends/tensorflow_common/src/op/no_op.cpp
+++ b/src/frontends/tensorflow_common/src/op/no_op.cpp
@@ -15,7 +15,7 @@ namespace op {
 
 OutputVector translate_no_op(const NodeContext& node) {
     // the operation does nothing in terms of data generation
-    default_op_checks(node, 0, {"NoOp", "SaveV2"});
+    default_op_checks(node, 0, {"NoOp", "SaveV2", "Assert", "LookupTableInsert", "LookupTableInsertV2"});
     return {};
 }
 }  // namespace op

From 66ae71454aa20592b5114cdd69d43e046f2cc70a Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 23 Mar 2023 10:09:43 +0100
Subject: [PATCH 050/296] DOCS shift to rst - Install OpenVINO on Windows
 (#16502)

---
 .../installing-openvino-docker-windows.md     | 264 +++++++++++-------
 ...nstalling-openvino-from-archive-windows.md | 224 +++++++++------
 .../installing-openvino-windows-header.md     |  14 +-
 3 files changed, 305 insertions(+), 197 deletions(-)

diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md
index b42034cb7bc9b2..d10bf08df67a2e 100644
--- a/docs/install_guides/installing-openvino-docker-windows.md
+++ b/docs/install_guides/installing-openvino-docker-windows.md
@@ -1,97 +1,124 @@
 # Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows}
 
+@sphinxdirective
+
 This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices.
 
-## <a name="system-requirements-docker-windows"></a>System Requirements
+.. _system-requirements-docker-windows:
+
+System Requirements
+####################
+
 
-@sphinxdirective
 .. tab:: Target Operating System with Python Versions
 
-  +------------------------------------+--------------------------+
-  | Operating System                   | Supported Python Version |
-  +====================================+==========================+
-  | Windows Server Core base LTSC 2019 | 3.8                      |
-  +------------------------------------+--------------------------+
-  | Windows 10, version 20H2           | 3.8                      |
-  +------------------------------------+--------------------------+
+   .. list-table::
+      :header-rows: 1
+
+      * - Operating System
+        - Supported Python Version
+      * - Windows Server Core base LTSC 2019
+        - 3.8
+      * - Windows 10, version 20H2
+        - 3.8
 
 .. tab:: Host Operating Systems
 
-  * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions
-  * Windows Server 2016 or higher
+   * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions
+   * Windows Server 2016 or higher
 
-@endsphinxdirective
 
-### Additional Requirements for GPU
+Additional Requirements for GPU
++++++++++++++++++++++++++++++++
 
 To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met:
 
-- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration):
+- `Windows requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration>`__:
+
   - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
-  - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported.
+  - The container base image must be ``mcr.microsoft.com/windows:1809`` or higher. Windows Server Core and Nano Server container images are not currently supported.
   - The container host must be running Docker Engine 19.03 or higher.
   - The container host must have GPU running display drivers of version WDDM 2.5 or higher.
+
 - GPU requirement for OpenVINO: Intel Graphics Driver for Windows of version 15.65 or higher.
-- [Docker isolation mode requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container):
+- `Docker isolation mode requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container>`__:
+
   - Windows host and container version tags must match.
-  - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility).
+  - `Windows host and container isolation process support <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
 
-## Installation Flow
+Installation Flow
+####################
 
 There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs:
+
 * Use a prebuilt image. Do the following steps:
-  1. <a href="#get-prebuilt-image-docker-windows">Get a prebuilt image from provided sources</a>.
-  2. <a href="#run-image-docker-windows">Run the image on different devices</a>.
+
+  1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__.
+  2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
+
 * If you want to customize your image, you can also build a Docker image manually by using the following steps:
-  1. <a href="#prepare-dockerfile-windows">Prepare a Dockerfile</a>.
-  2. <a href="#configure-image-docker-windows">Configure the Docker image</a>.
-  3. <a href="#run-image-docker-windows">Run the image on different devices</a>.
 
-## <a name="get-prebuilt-image-docker-windows"></a>Getting a Prebuilt Image from Provided Sources
+  1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__.
+  2. `Configure the Docker image <#configuring-the-docker-image-for-different-devices>`__.
+  3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
+
+Getting a Prebuilt Image from Provided Sources
+##############################################
 
 You can find prebuilt images on:
 
-- [Docker Hub](https://hub.docker.com/u/openvino)
-- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino)
+- `Docker Hub <https://hub.docker.com/u/openvino>`__
+- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
 
-## <a name="prepare-dockerfile-windows"></a>Preparing a Dockerfile
+Preparing a Dockerfile
+######################
 
-You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
+You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__ or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
 
-## <a name="configure-image-docker-windows"></a>Configuring the Docker Image for Different Devices
+Configuring the Docker Image for Different Devices
+##################################################
 
-### Installing Additional Dependencies for CPU
+Installing Additional Dependencies for CPU
+++++++++++++++++++++++++++++++++++++++++++
 
-#### Installing CMake
+Installing CMake
+----------------
+
+To add CMake to the image, add the following commands to the Dockerfile:
+
+.. code-block:: bat
 
-   To add CMake to the image, add the following commands to the Dockerfile:
-   ```bat
    RUN powershell.exe -Command `
        Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; `
        Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; `
        Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force
 
    RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%"
-   ```
 
-   In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image:
-   ```bat
+
+In case of proxy issues, please add the ``ARG HTTPS_PROXY`` and ``-Proxy %%HTTPS_PROXY%`` settings to the ``powershell.exe`` command to the Dockerfile. Then build a Docker image:
+
+.. code-block:: bat
+
    docker build . -t <image_name> `
    --build-arg HTTPS_PROXY=<https://your_proxy_server:port>
-   ```   
-   
-#### Installing Microsoft Visual Studio Build Tools
 
-   You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools.
-   
-   Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license.
-   
-   Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses.
 
-   To add MSBuild 2019 to the image, add the following commands to the Dockerfile:
-   ```bat
-   RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe
+Installing Microsoft Visual Studio Build Tools
+----------------------------------------------
+
+You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the `offline <https://docs.microsoft.com/en-us/visualstudio/installcreate-an-offline-installation-of-visual-studio?view=vs-2019>`__ or `online <https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019>`__ installers for Build Tools.
+
+Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license.
+
+Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses.
+
+To add MSBuild 2019 to the image, add the following commands to the Dockerfile:
 
+.. code-block:: bat
+
+   RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe
+   
    RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache `
         --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" `
         --add Microsoft.VisualStudio.Workload.MSBuildTools `
@@ -101,75 +128,110 @@ You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoo
         --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 `
         --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 `
         --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned
-   ```
 
-   In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019).
 
-### <a name="config-image-for-gpu"></a>Configuring the Image for GPU
+In case of proxy issues, please use the `offline installer for Build Tools <https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studioview=vs-2019>`__.
+
+Configuring the Image for GPU
++++++++++++++++++++++++++++++
+
+.. note::
+
+   Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and do the following steps to build the image manually.
+
+1. Reuse one of `available Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__. You can also use your own Dockerfile.
+2. Check your `Windows host and container isolation process compatibility <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
+3. Find the appropriate Windows container base image on `DockerHub <https://hub.docker.com/_/microsoft-windows>`__ and set up your host/container version in the ``FROM`` Dockerfile instruction.
+
+   For example, in the ``openvino_c_dev_<version>.dockerfile``, change:
+
+   .. code-block:: bat
 
-> **NOTE**: Since GPU is not supported in <a href="#get-prebuilt-image-docker-windows">prebuilt images</a> or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in <a href="#system-requirements">System Requirements</a> are met, and do the following steps to build the image manually.
+      FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
 
-1. Reuse one of [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles). You can also use your own Dockerfile. 
-2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility).
-3. Find the appropriate Windows container base image on [DockerHub](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction.  
-   For example, in the `openvino_c_dev_<version>.dockerfile`, change:  
-   ```bat
-   FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
-   ```
    to:
-   ```bat
-   FROM mcr.microsoft.com/windows:20H2
-   ```
+
+   .. code-block:: bat
+
+      FROM mcr.microsoft.com/windows:20H2
+
+
 4. Build the Docker image by running the following command:
-   ```bat
-   docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
-   ```
-5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory:
-   ```bat
-   mkdir C:\tmp
-   copy C:\Windows\System32\OpenCL.dll C:\tmp
-   ```
 
-## <a name="run-image-docker-windows"></a>Running the Docker Image on Different Devices
+   .. code-block:: bat
+
+      docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
+
+
+5. Copy ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder to any ``temp`` directory:
+
+   .. code-block:: bat
+
+      mkdir C:\tmp
+      copy C:\Windows\System32\OpenCL.dll C:\tmp
 
-### Running the Image on CPU
+
+Running the Docker Image on Different Devices
+#############################################
+
+Running the Image on CPU
+++++++++++++++++++++++++
 
 To start the interactive session, run the following command:
-```bat
-docker run -it --rm <image_name>
-```
+
+.. code-block:: bat
+
+   docker run -it --rm <image_name>
+
 
 If you want to try some samples, run the image with the following command:
-```bat
-docker run -it --rm <image_name> 
-cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU"
-```
 
-### Running the Image on GPU
+.. code-block:: bat
+
+   docker run -it --rm <image_name> 
+   cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU"
+
+
+Running the Image on GPU
+++++++++++++++++++++++++
+
+.. note::
 
-> **NOTE**: Since GPU is not supported in <a href="#get-prebuilt-image-docker-windows">prebuilt images</a> or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in <a href="#system-requirements">System Requirements</a> are met, and <a href="#config-image-for-gpu">configure and build the image manually</a> before you can run inferences on a GPU.
+   Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and `configure and build the image manually <#configuring-the-image-for-gpu>`__ before you can run inferences on a GPU.
 
 
 1. To try inference on a GPU, run the image with the following command:
-   ```bat
-   docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
-   ```
+
+   .. code-block:: bat
+
+      docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.   inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
+
+
    where
-   - `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device.
-   - `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression.
-   - `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder.
-2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
-   ```bat
-   copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
-   ```
-   For example, run the `Hello Classification Python` sample with the following command:
-   ```bat
-   omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU
-   ```
-
-
-## Additional Resources
-
-- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
-- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
+
+   - ``--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599`` is a reserved interface class GUID for a GPU device.
+   - ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409`` is the path to OpenCL driver home directory. To find it on your PC, run the ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*`` regular expression.
+   - ``C:\tmp`` is the folder with the copy of ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder.
+
+2. Copy ``OpenCL.dll`` to the ``C:\Windows\System32`` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
+
+   .. code-block:: bat
+
+      copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.   inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
+
+
+   For example, run the ``Hello Classification Python`` sample with the following command:
+
+   .. code-block:: bat
+
+      omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/   car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU
+
+
+Additional Resources
+####################
+
+- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
+- Intel® Distribution of OpenVINO™ toolkit home page: `https://software.intel.com/en-us/openvino-toolkit <https://software.intel.com/en-us/openvino-toolkit>`__
+- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
+
+@endsphinxdirective
diff --git a/docs/install_guides/installing-openvino-from-archive-windows.md b/docs/install_guides/installing-openvino-from-archive-windows.md
index 2300a1dceb8423..a0bc2b7649c94e 100644
--- a/docs/install_guides/installing-openvino-from-archive-windows.md
+++ b/docs/install_guides/installing-openvino-from-archive-windows.md
@@ -1,112 +1,158 @@
 # Install OpenVINO™ Runtime on Windows from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_windows}
 
-With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. 
+@sphinxdirective
 
-Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI.
+With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples.
 
-> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only.
+Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions on how to install OpenVINO Runtime for Python using PyPI.
 
-See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release.
+.. note::
 
-## System Requirements
+   Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org <https://pypi.org/project/openvino-dev/>`__ only.
+
+
+See the `Release Notes <https://software.intel.com/en-us/articles/OpenVINO-RelNotes>`__ for more information on updates in the latest release.
+
+System Requirements
+####################
 
-@sphinxdirective
 .. tab:: System Requirements
 
    | Full requirement listing is available in:
    | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
-  
+
 .. tab:: Processor Notes
 
-  Processor graphics are not included in all processors. 
-  See `Product Specifications`_ for information about your processor.
-  
-  .. _Product Specifications: https://ark.intel.com/
+   Processor graphics are not included in all processors.
+   See `Product Specifications`_ for information about your processor.
+
+   .. _Product Specifications: https://ark.intel.com/
 
 .. tab:: Software
 
-  * `Microsoft Visual Studio 2019 with MSBuild <https://visualstudio.microsoft.com/vs/older-downloads/>`_ or `Microsoft Visual Studio 2022 <http://visualstudio.microsoft.com/downloads/>`_
-  * `CMake 3.14 or higher, 64-bit <https://cmake.org/download/>`_ (optional, only required for building sample applications)
-  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/windows/>`_
+   * `Microsoft Visual Studio 2019 with MSBuild <https://visualstudio.microsoft.com/vs/older-downloads/>`_ or `Microsoft Visual Studio 2022 <http://visualstudio.microsoft.com/  downloads/>`_
+   * `CMake 3.14 or higher, 64-bit <https://cmake.org/download/>`_ (optional, only required for building sample applications)
+   * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/windows/>`_
 
-  .. note::
-     To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide <https://docs.microsoft.com/en-us/visualstudio/install/install-visual-studio?view=vs-2019>`_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**.
+   .. note::
 
-  .. note::
-    You can either use `cmake<version>.msi` which is the installation wizard or `cmake<version>.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables.
-  
-  .. important::
-    When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python <https://docs.python.org/3/using/windows.html#installation-steps>`_ to your `PATH` environment variable.
+      To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide <https://docs.microsoft.com/en-us/visualstudio/install/install-visual-studio?view=vs-2019>`_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**.
 
-@endsphinxdirective
+   .. note::
 
-## Installing OpenVINO Runtime
+      You can either use `cmake<version>.msi` which is the installation wizard or `cmake<version>.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables.
 
-### <a name="install-openvino-archive-windows"></a>Step 1: Download and Install OpenVINO Core Components
+   .. important::
+
+       When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python <https://docs.python.org/3/using/windows.html#installation-steps>`_ to your `PATH` environment variable.
+
+
+
+Installing OpenVINO Runtime
+###########################
+
+.. _install-openvino-archive-windows:
+
+Step 1: Download and Install OpenVINO Core Components
++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+1. Create an ``Intel`` folder in the ``C:\Program Files (x86)\`` directory. Skip this step if the folder already exists.
 
-1. Create an `Intel` folder in the `C:\Program Files (x86)\` directory. Skip this step if the folder already exists.
-   
    You can also do this via command-lines. Open a new command prompt window as administrator by right-clicking **Command Prompt** from the Start menu and select **Run as administrator**, and then run the following command:
-   ```sh
-   mkdir "C:\Program Files (x86)\Intel"
-   ```
-   > **NOTE**: `C:\Program Files (x86)\Intel` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer.
 
-2. Download the [OpenVINO Runtime archive file for Windows](https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/) to your local `Downloads` folder.
-   
+   .. code-block:: sh
+
+      mkdir "C:\Program Files (x86)\Intel"
+
+
+   .. note::
+
+      ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer.
+
+
+2. Download the `OpenVINO Runtime archive file for Windows <https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/>`__ to your local ``Downloads`` folder.
+
    If you prefer using command-lines, run the following commands in the command prompt window you opened:
-   ```sh
-   cd <user_home>/Downloads
-   curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022.3.0.zip
-   ```
-   > **NOTE**: A `.sha256` file is provided together with the archive file to validate your download process. To do that, download the `.sha256` file from the same repository and run `CertUtil -hashfile openvino_2022.3.0.zip SHA256`. Compare the returned value in the output with what's in the `.sha256` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket [here](https://www.intel.com/content/www/us/en/support/contact-intel.html).
+
+   .. code-block:: sh
+
+      cd <user_home>/Downloads
+      curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022.   3.0.zip
 
 
-3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the `C:\Program Files (x86)\Intel` directory.
-   
+   .. note::
+
+      A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2022.3.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here <https://www.intel.com/content/www/us/en/support/contact-intel.html>`__.
+
+
+3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory.
+
    To do this step using command-lines, run the following commands in the command prompt window you opened:
-   ```sh
-   tar -xf openvino_2022.3.0.zip
-   ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0
-   move openvino_2022.3.0 "C:\Program Files (x86)\Intel"
-   ```
+
+   .. code-block:: sh
+
+      tar -xf openvino_2022.3.0.zip
+      ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0
+      move openvino_2022.3.0 "C:\Program Files (x86)\Intel"
+
 
 4. For simplicity, it is useful to create a symbolic link. Open a command prompt window as administrator (see Step 1 for how to do this) and run the following commands:
-   ```sh
-   cd C:\Program Files (x86)\Intel
-   mklink /D openvino_2022 openvino_2022.3.0
-   ```
-   > **NOTE**: If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. If you want to override it, nagivate to the `C:\Program Files (x86)\Intel` folder and delete the existing linked folder before running the `mklink` command.
 
+   .. code-block:: sh
+
+      cd C:\Program Files (x86)\Intel
+      mklink /D openvino_2022 openvino_2022.3.0
 
-Congratulations, you finished the installation! The `C:\Program Files (x86)\Intel\openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `<INSTALL_DIR>` throughout the OpenVINO documentation.
 
-### <a name="set-the-environment-variables-windows"></a>Step 2: Configure the Environment
+   .. note::
 
-You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the `setupvars.bat` batch file to temporarily set your environment variables. If your <INSTALL_DIR> is not `C:\Program Files (x86)\Intel\openvino_2022`, use the correct directory instead.
+      If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. If you want to override it, nagivate to the ``C:\Program Files (x86)\Intel`` folder and delete the existing linked folder before running the ``mklink`` command.
 
-```sh
-"C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat"
-```
 
-> **Important**: The above command must be re-run every time a new Command Prompt window is opened.
+Congratulations, you finished the installation! The ``C:\Program Files (x86)\Intel\openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ``<INSTALL_DIR>`` throughout the OpenVINO documentation.
+
+.. _set-the-environment-variables-windows:
+
+Step 2: Configure the Environment
++++++++++++++++++++++++++++++++++
+
+You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the ``setupvars.bat`` batch file to temporarily set your environment variables. If your ``<INSTALL_DIR>`` is not ``C:\Program Files (x86)\Intel\openvino_2022``, use the correct directory instead.
+
+.. code-block: sh
+
+   "C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat"
+
+
+.. important::
+
+   The above command must be re-run every time a new Command Prompt window is opened.
+
+
+.. note::
+
+   If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described `here <https://docs.python.org/3/using/windows.html#finding-the-python-executable>`__). Check your system environment variables, and add Python if necessary.
 
-> **NOTE**: If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described [here](https://docs.python.org/3/using/windows.html#finding-the-python-executable)). Check your system environment variables, and add Python if necessary.
 
 The environment variables are set. Continue to the next section if you want to download any additional components.
 
-### <a name="model-optimizer-windows">Step 3 (Optional): Install Additional Components</a>
+.. _model-optimizer-windows:
+
+Step 3 (Optional): Install Additional Components
+++++++++++++++++++++++++++++++++++++++++++++++++
 
 OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately.
 
-See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions.
+See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for step-by-step installation instructions.
 
-OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
+OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`.
+
+.. _optional-steps-windows:
+
+Step 4 (Optional): Configure Inference on non-CPU Devices
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### <a name="optional-steps-windows"></a>Step 4 (Optional): Configure Inference on non-CPU Devices
 OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices.
 
-@sphinxdirective
 .. tab:: GPU
 
    To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide <gpu guide windows>`.
@@ -114,69 +160,67 @@ OpenVINO Runtime has a plugin architecture that enables you to run inference on
 .. tab:: GNA
 
    To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide <gna guide windows>`.
-   
-@endsphinxdirective
 
-## <a name="get-started-windows"></a>What's Next?
+
+.. _get-started-windows:
+
+What's Next?
+####################
+
 Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
 
-@sphinxdirective
+
 .. tab:: Get started with Python
 
-   Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
-   
+   Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`__ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
+
    .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif
       :width: 400
 
    Visit the :ref:`Tutorials <notebook tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
-   
-   * `OpenVINO Python API Tutorial <https://docs.openvino.ai/nightly/notebooks/002-openvino-api-with-output.html>`_
-   * `Basic image classification program with Hello Image Classification <https://docs.openvino.ai/nightly/notebooks/001-hello-world-with-output.html>`_
-   * `Convert a PyTorch model and use it for image background removal <https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html>`_
+
+   * `OpenVINO Python API Tutorial <https://docs.openvino.ai/nightly/notebooks/002-openvino-api-with-output.html>`__
+   * `Basic image classification program with Hello Image Classification <https://docs.openvino.ai/nightly/notebooks/001-hello-world-with-output.html>`__
+   * `Convert a PyTorch model and use it for image background removal <https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html>`__
 
 .. tab:: Get started with C++
 
    Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
-   
+
    .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
       :width: 400
 
    Visit the :ref:`Samples <code samples>` page for other C++ example applications to get you started with OpenVINO, such as:
-   
+
    * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
    * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
 
-@endsphinxdirective
-
-## <a name="uninstall-from-windows"></a>Uninstalling OpenVINO Runtime
 
-To uninstall OpenVINO, follow the steps on the [Uninstalling page](uninstalling-openvino.md).
+.. _uninstall-from-windows:
 
-## Additional Resources
+Uninstalling OpenVINO Runtime
+#############################
 
-## Additional Resources
+To uninstall OpenVINO, follow the steps on the :doc:`Uninstalling page <openvino_docs_install_guides_uninstalling_openvino>`
 
-@sphinxdirective
+Additional Resources
+####################
 
+* `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration <troubleshooting guide for install>`
 * Converting models for use with OpenVINO™: :ref:`Model Optimizer Developer Guide <deep learning model optimizer>`
 * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide <deep learning openvino runtime>`
 * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview <code samples>`
 * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models <model zoo>`
-* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ 
-      
-<!---  
+* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit <https://github.com/intel-iot-devkit>`__
+
+<!---
    To learn more about converting models from specific frameworks, go to: 
    * :ref:`Convert Your Caffe Model <convert model caffe>`
    * :ref:`Convert Your TensorFlow Model <convert model tf>`
    * :ref:`Convert Your Apache MXNet Model <convert model mxnet>`
    * :ref:`Convert Your Kaldi Model <convert model kaldi>`
    * :ref:`Convert Your ONNX Model <convert model onnx>`
---->    
-.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit
+--->
 
 @endsphinxdirective
-
-## Additional Resources
-
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-windows-header.md b/docs/install_guides/installing-openvino-windows-header.md
index 2b0489fd384379..f5480a6a350f3a 100644
--- a/docs/install_guides/installing-openvino-windows-header.md
+++ b/docs/install_guides/installing-openvino-windows-header.md
@@ -10,12 +10,14 @@
    From PyPI <openvino_docs_install_guides_installing_openvino_pip>
    Using Docker <openvino_docs_install_guides_installing_openvino_docker_windows>
 
-@endsphinxdirective
 
-If you want to install OpenVINO™ Runtime on Windows, you have the following options: 
+If you want to install OpenVINO™ Runtime on Windows, you have the following options:
+
+* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_windows>`
+* :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>`
+* :doc:`Install OpenVINO with Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`
 
-* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-windows.md)
-* [Install OpenVINO from PyPI](installing-openvino-pip.md)
-* [Install OpenVINO with Docker](installing-openvino-docker-windows.md)
+For a full selection of distribution channels, 
+see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 
-For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
+@endsphinxdirective

From 5fa95ff19d815af8cf6f2dbea7ace34865485cf6 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 23 Mar 2023 10:12:13 +0100
Subject: [PATCH 051/296] DOCS shift to rst - Protecting Deep Learning Model
 (#16474)

---
 .../deployment_guide_introduction.md          |  8 +-
 .../deployment/deployment_intro.md            | 76 +++++++++------
 .../images}/deployment_simplified.svg         |  0
 .../range_supervision}/img_combined_2.png     |  0
 .../images/range_supervision}/scheme3.svg     |  0
 .../quantization/range_supervision/README.md  | 95 +++++++++++--------
 6 files changed, 106 insertions(+), 73 deletions(-)
 rename docs/{img => _static/images}/deployment_simplified.svg (100%)
 rename {tools/pot/docs/range_supervision/images => docs/_static/images/range_supervision}/img_combined_2.png (100%)
 rename {tools/pot/docs/range_supervision/images => docs/_static/images/range_supervision}/scheme3.svg (100%)

diff --git a/docs/Documentation/deployment_guide_introduction.md b/docs/Documentation/deployment_guide_introduction.md
index 4e3162d6de5383..6496a3cf494250 100644
--- a/docs/Documentation/deployment_guide_introduction.md
+++ b/docs/Documentation/deployment_guide_introduction.md
@@ -9,12 +9,9 @@
    Run and Deploy Locally <openvino_deployment_guide>
    Deploy via Model Serving <ovms_what_is_openvino_model_server>
 
-@endsphinxdirective
-
 
 Once you have a model that meets both OpenVINO™ and your requirements, you can choose how to deploy it with your application.
 
-@sphinxdirective
 .. panels::
 
     :doc:`Deploy via OpenVINO Runtime <openvino_deployment_guide>` 
@@ -30,8 +27,7 @@ Once you have a model that meets both OpenVINO™ and your requirements, you can
     Deployment via OpenVINO Model Server allows the application to connect to the inference server set up remotely. 
     This way inference can use external resources instead of those available to the application itself. 
 
-@endsphinxdirective
-
 
+Apart from the default deployment options, you may also :doc:`deploy your application for the TensorFlow framework with OpenVINO Integration <ovtf_integration>`
 
-Apart from the default deployment options, you may also [deploy your application for the TensorFlow framework with OpenVINO Integration](./openvino_ecosystem_ovtf.md).
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/OV_Runtime_UG/deployment/deployment_intro.md b/docs/OV_Runtime_UG/deployment/deployment_intro.md
index df629a51e97574..fc9f4581c3792d 100644
--- a/docs/OV_Runtime_UG/deployment/deployment_intro.md
+++ b/docs/OV_Runtime_UG/deployment/deployment_intro.md
@@ -11,47 +11,69 @@
    Deploy Application with Deployment Manager <openvino_docs_install_guides_deployment_manager_tool>
    Local Distribution Libraries <openvino_docs_deploy_local_distribution>
 
-@endsphinxdirective
 
-> **NOTE**: Note that [running inference in OpenVINO Runtime](../openvino_intro.md) is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and [develop your application properly](../integrate_with_your_application.md)
+.. note::
 
+   Note that :doc:`running inference in OpenVINO Runtime <openvino_docs_OV_UG_OV_Runtime_User_Guide>` is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and :doc:`develop your application properly <openvino_docs_OV_UG_Integrate_OV_with_your_application>`.
 
-## Local Deployment Options
+Local Deployment Options
+########################
 
 - Set a dependency on the existing prebuilt packages, also called "centralized distribution":
-    - using Debian / RPM packages - a recommended way for Linux operating systems;
-    - using PIP package manager on PyPI - the default approach for Python-based applications;
-    - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to [Installing OpenVINO on Linux from Docker](../../install_guides/installing-openvino-docker-linux.md) and [Installing OpenVINO on Windows from Docker](../../install_guides/installing-openvino-docker-windows.md). 
-Furthermore, to customize your OpenVINO Docker image, use the [Docker CI Framework](https://github.com/openvinotoolkit/docker_ci) to generate a Dockerfile and built the image. 
+
+  - using Debian / RPM packages - a recommended way for Linux operating systems;
+  - using PIP package manager on PyPI - the default approach for Python-based applications;
+  - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker <openvino_docs_install_guides_installing_openvino_docker_linux>` and :doc:`Installing OpenVINO on Windows from Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`.
+
+Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework <https://github.com/openvinotoolkit/docker_ci>` to generate a Dockerfile and built the image.
+
 - Grab a necessary functionality of OpenVINO together with your application, also called "local distribution":
-    - using [OpenVINO Deployment Manager](deployment-manager-tool.md) - providing a convenient way for creating a distribution package;
-    - using the advanced [local distribution](local-distribution.md) approach;
-    - using [a static version of OpenVINO Runtime linked to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md).
+
+  - using :doc:`OpenVINO Deployment Manager <openvino_docs_install_guides_deployment_manager_tool>` - providing a convenient way for creating a distribution package;
+  - using the advanced :doc:`local distribution <openvino_docs_deploy_local_distribution>` approach;
+  - using `a static version of OpenVINO Runtime linked to the final app <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md>`__.
 
 The table below shows which distribution type can be used for what target operating system:
 
-| Distribution type | Operating systems |
-|------- ---------- | ----------------- |
-| Debian packages | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit |
-| RMP packages | Red Hat Enterprise Linux 8, 64-bit |
-| Docker images | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit |
-| PyPI (PIP package manager) | See [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/) |
-| [OpenVINO Deployment Manager](deployment-manager-tool.md) | All operating systems |
-| [Local distribution](local-distribution.md) | All operating systems |
-| [Build OpenVINO statically and link to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) | All operating systems |
+.. list-table::
+   :header-rows: 1
+
+   * - Distribution type
+     - Operating systems
+   * - Debian packages
+     - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit
+   * - RMP packages
+     - Red Hat Enterprise Linux 8, 64-bit
+   * - Docker images
+     - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit
+   * - PyPI (PIP package manager)
+     - See https://pypi.org/project/openvino
+   * - :doc:`OpenVINO Deployment Manager <openvino_docs_install_guides_deployment_manager_tool>`
+     - All operating systems
+   * - :doc:`Libraries for Local Distribution <openvino_docs_deploy_local_distribution>`
+     - All operating systems
+   * - `Build OpenVINO statically and link to the final app <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md>`__
+     - All operating systems
 
-## Granularity of Major Distribution Types
 
-The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a [single 'openvino' package](https://pypi.org/project/openvino/) that contains all the runtime libraries and plugins, while a [local distribution](local-distribution.md) is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package:
+Granularity of Major Distribution Types
+#######################################
 
-![](../../img/deployment_simplified.svg)
+The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a `single 'openvino' package <https://pypi.org/project/openvino/>`__ that contains all the runtime libraries and plugins, while a :doc:`local distribution <openvino_docs_deploy_local_distribution>` is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package:
 
-- The main library `openvino` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, `openvino_c` is additionally required for distribution.
-- The "optional" plugin libraries like `openvino_intel_cpu_plugin` (matching the `openvino_.+_plugin` pattern) are used to provide inference capabilities on specific devices or additional capabilities like [Hetero Execution](../hetero_execution.md) and [Multi-Device Execution](../multi_device.md).
-- The "optional" plugin libraries like `openvino_ir_frontend` (matching `openvino_.+_frontend`) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle.
+.. image:: _static/images/deployment_simplified.svg
+
+
+- The main library ``openvino`` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, ``openvino_c`` is additionally required for distribution.
+- The "optional" plugin libraries like ``openvino_intel_cpu_plugin`` (matching the ``openvino_.+_plugin`` pattern) are used to provide inference capabilities on specific devices or additional capabilities like :doc:`Hetero Execution <openvino_docs_OV_UG_Hetero_execution>` and :doc:`Multi-Device Execution <openvino_docs_OV_UG_Running_on_multiple_devices>`.
+- The "optional" plugin libraries like ``openvino_ir_frontend`` (matching ``openvino_.+_frontend``) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle.
 
 Here the term "optional" means that if the application does not use the capability enabled by the plugin, the plugin library or a package with the plugin is not needed in the final distribution.
 
-Building a local distribution will require more detailed information, and you will find it in the dedicated [Libraries for Local Distribution](local-distribution.md) article.
+Building a local distribution will require more detailed information, and you will find it in the dedicated :doc:`Libraries for Local Distribution <openvino_docs_deploy_local_distribution>` article.
+
+.. note::
+
+   Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: :doc:`Configurations for GPU <openvino_docs_install_guides_configurations_for_intel_gpu>`, :doc:`Configurations for GNA <openvino_docs_install_guides_configurations_for_intel_gna>`.
 
-> **NOTE**: Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: [Configurations for GPU](../../install_guides/configurations-for-intel-gpu.md), [Configurations for GNA](../../install_guides/configurations-for-intel-gna.md).
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/img/deployment_simplified.svg b/docs/_static/images/deployment_simplified.svg
similarity index 100%
rename from docs/img/deployment_simplified.svg
rename to docs/_static/images/deployment_simplified.svg
diff --git a/tools/pot/docs/range_supervision/images/img_combined_2.png b/docs/_static/images/range_supervision/img_combined_2.png
similarity index 100%
rename from tools/pot/docs/range_supervision/images/img_combined_2.png
rename to docs/_static/images/range_supervision/img_combined_2.png
diff --git a/tools/pot/docs/range_supervision/images/scheme3.svg b/docs/_static/images/range_supervision/scheme3.svg
similarity index 100%
rename from tools/pot/docs/range_supervision/images/scheme3.svg
rename to docs/_static/images/range_supervision/scheme3.svg
diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md
index 0385c31e4aba25..de7b16950544f8 100644
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md
@@ -1,70 +1,85 @@
 # Experimental: Protecting Deep Learning Model through Range Supervision ("RangeSupervision") {#pot_ranger_README}
 
-## Introduction
+@sphinxdirective
+
+Introduction
+####################
 
 Deep neural network find applications in many scenarios where the prediction is a critical component for safety-relevant decisions. Such workloads can benefit from additional protection against underlying errors. For example, memory bit flips (**"soft errors"** originating, e.g., from external radiation or internal electrical disturbances within the circuitry) in der platform hosting the network inference can corrupt the learned network parameters and lead to incorrect predictions. Typically, errors resulting in very large parameter values have a more drastic impact on the network behavior. **The range supervision algorithm ("RangeSupervision") described here establishes and inserts additional protection layers after already present activation layers**. Those layers truncate values that are found to be out of an expected activation range in order to mitigate the traces of potential platform errors. They do so during inference by applying a *clamp* operation to any activation *x* in the input to the RangeSupervision layer,
 
-	\f[
-	x = clamp(x ; T_{low}, T_{up}) = min(max(x, T_{low}), T_{high})
-	\f]
-	where \f$T_{low}\f$ and \f$T_{up}\f$ are the lower and upper bounds for the particular protection layer, respectively.
-The process flow follows the diagram [Fig 1](#schematic-supervision). Starting from the internal representation (IR) of an OpenVINO model, the POT RangeSupervision algorithm is called to **add protection layers into the model graph**. This step requires **appropriate threshold values that are automatically extracted from a specified test dataset**. The result is an IR representation of the model with additional "RangeSupervision" layers after each supported activation layer. The original and the modified model can be called in the same way through the OpenVINO inference engine to evaluate the impact on accuracy, performance, and dependability in the presence of potential soft errors (for example using the *benchmark_app* and *accuracy_checker* functions). **The algorithm is designed to provide efficient protection at negligible performance overhead or accuracy impact in the absence of faults.** Bound extraction is a one-time effort and the protected IR model returned by the RangeSupervision algorithm can be used independently from there on. No changes in the learned parameters of the network are needed.
+.. math::
 
-<a name="schematic-supervision"></a>
+   x = clamp(x ; T_{low}, T_{up}) = min(max(x, T_{low}), T_{high})
 
-@anchor schematic
-![Schematic](../../../../../../docs/range_supervision/images/scheme3.svg)
 
-*Fig 1: Schematic of RangeSupervision process flow.*
+where :math:`T_{low}` and :math:`T_{up}` are the lower and upper bounds for the particular protection layer, respectively.
+The process flow follows the diagram :ref:`Fig 1 <schematic-supervision>`. Starting from the internal representation (IR) of an OpenVINO model, the POT RangeSupervision algorithm is called to **add protection layers into the model graph**. This step requires **appropriate threshold values that are automatically extracted from a specified test dataset**. The result is an IR representation of the model with additional "RangeSupervision" layers after each supported activation layer. The original and the modified model can be called in the same way through the OpenVINO inference engine to evaluate the impact on accuracy, performance, and dependability in the presence of potential soft errors (for example using the *benchmark_app* and *accuracy_checker* functions). **The algorithm is designed to provide efficient protection at negligible performance overhead or accuracy impact in the absence of faults.** Bound extraction is a one-time effort and the protected IR model returned by the RangeSupervision algorithm can be used independently from there on. No changes in the learned parameters of the network are needed.
+
+.. _schematic-supervision:
+
+.. image:: _static/images/range_supervision/scheme3.svg
+   :alt: Schematic
 
 
-### Supported activation layers
+*Fig 1: Schematic of RangeSupervision process flow.*
+
+Supported activation layers
++++++++++++++++++++++++++++
 
 The following activation layers are currently supported for range supervision:
 
- - `ReLU`
- - `Swish`
- - `PReLU`
- - `Elu`
- - `Gelu`
- - `Sigmoid`
- - `Tanh`
- 
+- `ReLU`
+- `Swish`
+- `PReLU`
+- `Elu`
+- `Gelu`
+- `Sigmoid`
+- `Tanh`
+
 This means that any activation layer of one of the above types, that the model under consideration contains, will be protected with an appropriate subsequent RangeSupervision layer.
 
-## Usage
-RangeSupervision protection can be used the same way as [DefaultQuantization](@ref pot_default_quantization_usage) method.
+Usage
+####################
+
+RangeSupervision protection can be used the same way as :doc:`DefaultQuantization <pot_default_quantization_usage>` method.
+
+Algorithm configuration
++++++++++++++++++++++++
 
-### Algorithm configuration
 Algorithm has a minimal configuration. Below is an example of such configuration:
 
-```json
-{
-	"name": "RangeSupervision", 
-	"params": {
-			"stat_subset_size": 300
-			"stat_batch_size": 1
-		}
-	
-}
-```
+.. code-block:: json
+
+   {
+      "name": "RangeSupervision",
+      "params": {
+            "stat_subset_size": 300
+            "stat_batch_size": 1
+         }
+   }
 
-The protected model will be saved in IR format in a new folder ``./results/\<model_name\>_RangeSupervision/...`` . 
+
+The protected model will be saved in IR format in a new folder ``./results/\<model_name\>_RangeSupervision/...``.
 
 Mandatory parameters:
-- `"stat_subset_size"`:  This parameter defines *how many images* of the specified dataset in "engine: config" are used to extract the bounds (images are randomly chosen if a subset is chosen). This value is set to **300** by default. The more images are selected for the bound generation, the more accurate the estimation of an out-of-bound event will be, at the cost of increasing extraction time.
 
-## Example of RangeSupervision results
+- ``"stat_subset_size"``:  This parameter defines *how many images* of the specified dataset in "engine: config" are used to extract the bounds (images are randomly chosen if a subset is chosen). This value is set to **300** by default. The more images are selected for the bound generation, the more accurate the estimation of an out-of-bound event will be, at the cost of increasing extraction time.
 
-The following example shows a traffic camera image and predicted objects using a Yolov3 pre-trained on the Coco dataset. A single weight fault was injected in a randomly chosen convolution layer of YOLO, flipping the most significant bit of the selected network parameter. If range supervision is applied, the original network performance is recovered despite the presence of the fault.
+Example of RangeSupervision results
+###################################
 
+The following example shows a traffic camera image and predicted objects using a Yolov3 pre-trained on the Coco dataset. A single weight fault was injected in a randomly chosen convolution layer of YOLO, flipping the most significant bit of the selected network parameter. If range supervision is applied, the original network performance is recovered despite the presence of the fault.
 
+.. image:: _static/images/range_supervision/img_combined_2.png
 
-![](../../../../../../docs/range_supervision/images/img_combined_2.png)
 
 *Fig 2: Example of fault mitigation via range supervision.*
 
-## Resources:
+Additional Resources
+####################
+
+- Z. Chen, G. Li, and K. Pittabiraman, "A Low-cost Fault Corrector for Deep Neural Networks through Range Restriction", 2020. https://arxiv.org/abs/2003.13874
+- F. Geissler, Q. Syed, S. Roychowdhury,  A. Asgari, Y. Peng, A. Dhamasia, R. Graefe, K. Pattabiraman, and M. Paulitsch, "Towards a Safety Case for Hardware Fault Tolerance in Convolutional Neural Networks Using Activation Range Supervision", 2021. https://arxiv.org/abs/2108.07019
+
+ @endsphinxdirective
 
- - Z. Chen, G. Li, and K. Pittabiraman, "A Low-cost Fault Corrector for Deep Neural Networks through Range Restriction", 2020. https://arxiv.org/abs/2003.13874
- - F. Geissler, Q. Syed, S. Roychowdhury,  A. Asgari, Y. Peng, A. Dhamasia, R. Graefe, K. Pattabiraman, and M. Paulitsch, "Towards a Safety Case for Hardware Fault Tolerance in Convolutional Neural Networks Using Activation Range Supervision", 2021. https://arxiv.org/abs/2108.07019

From 087b10ff00f5f75b75acf842cc2a2e376212a6de Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Thu, 23 Mar 2023 09:16:04 +0000
Subject: [PATCH 052/296] Snippets: precision propagation (#14996)

---
 src/bindings/python/tests/__init__.py         |   1 -
 .../python/tests/test_onnx/test_backend.py    |   5 -
 .../python/tests_compatibility/__init__.py    |   1 -
 .../test_onnx/test_backend.py                 |   5 -
 .../snippets/include/snippets/generator.hpp   |  15 +-
 .../snippets/include/snippets/op/subgraph.hpp |  16 +-
 .../snippets/pass/align_element_type.hpp      |  46 ---
 .../snippets/pass/fq_decomposition.hpp        |   5 +-
 .../snippets/pass/propagate_precision.hpp     |  48 +++
 src/common/snippets/src/op/subgraph.cpp       |  80 +++--
 .../snippets/src/pass/align_element_type.cpp  |  99 ------
 .../snippets/src/pass/collapse_subgraph.cpp   |   6 +-
 .../snippets/src/pass/fq_decomposition.cpp    |  12 -
 .../snippets/src/pass/propagate_precision.cpp | 293 +++++++++++++++++
 .../snippets/tests/include/lowering_utils.hpp |   6 +-
 .../include/pass/precision_propagation.hpp    |  54 ++++
 .../snippets/tests/src/lowering_utils.cpp     |  14 +-
 .../tests/src/pass/precision_propagation.cpp  | 294 ++++++++++++++++++
 .../precision_propagation_convert_test.cpp    | 153 +++++++++
 .../precision_propagation_get_precisions.cpp  |  45 +++
 src/core/src/pass/visualize_tree.cpp          |   4 +-
 .../intel_cpu/src/emitters/cpu_generator.cpp  |  10 +-
 .../src/emitters/jit_dnnl_emitters.cpp        |   4 +
 .../src/emitters/jit_dnnl_emitters.hpp        |   2 +
 .../src/emitters/jit_eltwise_emitters.cpp     | 204 +++++++++---
 .../src/emitters/jit_eltwise_emitters.hpp     |  66 ++--
 .../intel_cpu/src/emitters/jit_emitter.cpp    |   6 +-
 .../intel_cpu/src/emitters/jit_emitter.hpp    |   8 +-
 .../src/emitters/jit_snippets_emitters.cpp    |  15 +-
 .../src/emitters/jit_snippets_emitters.hpp    |   9 +
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   |  61 ++--
 src/plugins/intel_cpu/src/nodes/subgraph.cpp  |  29 +-
 .../remove_converts.cpp                       |  38 +++
 .../remove_converts.hpp                       |  27 ++
 .../snippets/check_broadcast.cpp              |  81 +++++
 .../precision_propagation_convertion.cpp      |  37 +++
 .../ngraph_transformations/mul_add_to_fma.cpp |   2 +-
 .../include/snippets/check_broadcast.hpp      |  38 +++
 .../precision_propagation_convertion.hpp      |  33 ++
 .../fuse_fake_quantize_transformation.cpp     |   2 +-
 .../shared/src/snippets/check_broadcast.cpp   |  89 ++++++
 .../plugin/shared/src/snippets/convert.cpp    |   4 +-
 .../precision_propagation_convertion.cpp      |  48 +++
 ...cision_propagation_convertion_function.hpp |  49 +++
 .../precision_propagation_function.hpp        | 131 ++++++++
 .../include/snippets_helpers.hpp              |   1 +
 ...cision_propagation_convertion_function.cpp |  92 ++++++
 .../src/precision_propagation_function.cpp    | 105 +++++++
 48 files changed, 2066 insertions(+), 327 deletions(-)
 delete mode 100644 src/common/snippets/include/snippets/pass/align_element_type.hpp
 create mode 100644 src/common/snippets/include/snippets/pass/propagate_precision.hpp
 delete mode 100644 src/common/snippets/src/pass/align_element_type.cpp
 create mode 100644 src/common/snippets/src/pass/propagate_precision.cpp
 create mode 100644 src/common/snippets/tests/include/pass/precision_propagation.hpp
 create mode 100644 src/common/snippets/tests/src/pass/precision_propagation.cpp
 create mode 100644 src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp
 create mode 100644 src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp
 create mode 100644 src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp
 create mode 100644 src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp
 create mode 100644 src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp
 create mode 100644 src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp
 create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp
 create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp
 create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp
 create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp

diff --git a/src/bindings/python/tests/__init__.py b/src/bindings/python/tests/__init__.py
index 06d8dfb043480f..a426ce8424ec71 100644
--- a/src/bindings/python/tests/__init__.py
+++ b/src/bindings/python/tests/__init__.py
@@ -117,7 +117,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 
 xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
 xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
-xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations")
 xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.")
 
 skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.")
diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py
index c681f376348142..dc30a9bda3806b 100644
--- a/src/bindings/python/tests/test_onnx/test_backend.py
+++ b/src/bindings/python/tests/test_onnx/test_backend.py
@@ -37,7 +37,6 @@
     xfail_issue_58033,
     xfail_issue_63033,
     xfail_issue_63036,
-    xfail_issue_63039,
     xfail_issue_63043,
     xfail_issue_63137,
     xfail_issue_63138,
@@ -278,10 +277,6 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu",
     ),
     (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"),
-    (
-        xfail_issue_63039,
-        "OnnxBackendNodeModelTest.test_div_uint8_cpu",
-    ),
     (
         xfail_issue_63043,
         "OnnxBackendNodeModelTest.test_gru_batchwise_cpu",
diff --git a/src/bindings/python/tests_compatibility/__init__.py b/src/bindings/python/tests_compatibility/__init__.py
index 7b5d7217cd8ed1..24d2050a3a9d77 100644
--- a/src/bindings/python/tests_compatibility/__init__.py
+++ b/src/bindings/python/tests_compatibility/__init__.py
@@ -122,7 +122,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 
 xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
 xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
-xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations")
 xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.")
 
 skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.")
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
index 89b7afcb47e4af..53ec35731cbc5f 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
@@ -37,7 +37,6 @@
     xfail_issue_58033,
     xfail_issue_63033,
     xfail_issue_63036,
-    xfail_issue_63039,
     xfail_issue_63043,
     xfail_issue_63137,
     xfail_issue_63138,
@@ -282,10 +281,6 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu",
     ),
     (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"),
-    (
-        xfail_issue_63039,
-        "OnnxBackendNodeModelTest.test_div_uint8_cpu",
-    ),
     (
         xfail_issue_63043,
         "OnnxBackendNodeModelTest.test_gru_batchwise_cpu",
diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp
index ab3156a108e3e1..939b4f4d43c33d 100644
--- a/src/common/snippets/include/snippets/generator.hpp
+++ b/src/common/snippets/include/snippets/generator.hpp
@@ -16,6 +16,8 @@ namespace snippets {
 
 auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo;
 
+typedef std::pair<std::function<std::shared_ptr<Emitter>(const std::shared_ptr<ngraph::Node>&)>,
+                  std::function<std::set<std::vector<element::Type>>(const std::shared_ptr<ngraph::Node>&)>> jitters_value;
 /**
  * @interface TargetMachine
  * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters
@@ -51,7 +53,16 @@ class TargetMachine {
         if (jitter == jitters.end()) {
             throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation.");
         }
-        return jitter->second;
+        return jitter->second.first;
+    }
+
+    std::function<std::set<std::vector<element::Type>>(const std::shared_ptr<ngraph::Node>&)>
+        get_supported_precisions(const ngraph::DiscreteTypeInfo type) const {
+        auto jitter = jitters.find(type);
+        if (jitter == jitters.end()) {
+            throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation.");
+        }
+        return jitter->second.second;
     }
 
     /**
@@ -64,7 +75,7 @@ class TargetMachine {
     virtual ~TargetMachine() = default;
 
 protected:
-    std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
+    std::map<const ngraph::DiscreteTypeInfo, jitters_value> jitters;
 };
 
 /**
diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp
index ec55f076301c64..46e6633f61b8aa 100644
--- a/src/common/snippets/include/snippets/op/subgraph.hpp
+++ b/src/common/snippets/include/snippets/op/subgraph.hpp
@@ -101,11 +101,17 @@ class Subgraph : public ov::op::util::SubGraphOp {
     bool is_quantized() const { return config.m_is_quantized; }
     bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
     bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }
-
-    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
+    snippets::Schedule generate(const BlockedShapeVector& output_shapes,
+                                const BlockedShapeVector& input_shapes,
+                                ngraph::pass::Manager& pre_dialect,
+                                ngraph::pass::Manager& post_dialect,
+                                ngraph::pass::Manager& post_precision,
                                 const void* compile_params = nullptr);
     snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr);
-    snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr);
+    snippets::Schedule generate(ngraph::pass::Manager& pre_dialect,
+                                ngraph::pass::Manager& post_dialect,
+                                ngraph::pass::Manager& post_precision,
+                                const void* compile_params = nullptr);
     snippets::Schedule generate(const void* compile_params = nullptr);
     ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
     std::vector<PartialShape> reshape_body(const std::vector<PartialShape>& input_shapes);
@@ -132,6 +138,8 @@ class Subgraph : public ov::op::util::SubGraphOp {
     // This check returns True if Constant op which is input of this op should be inside Subgraph body
     static auto constant_input_should_be_inside_body(const std::shared_ptr<ov::Node>& node) -> bool;
 
+    static bool check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept;
+
 private:
     void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
     void convert_to_snippet_dialect();
@@ -164,8 +172,6 @@ class Subgraph : public ov::op::util::SubGraphOp {
     public:
         // True if Subgraph contains FakeQuantize -> FQ decomposition should be called
         bool m_is_quantized = false;
-        // True if we should align element types indise body
-        bool m_is_needed_to_align_precision = false;
         // True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
         // because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
         bool m_has_type_relaxed_ops = false;
diff --git a/src/common/snippets/include/snippets/pass/align_element_type.hpp b/src/common/snippets/include/snippets/pass/align_element_type.hpp
deleted file mode 100644
index 0b1f831091c4cc..00000000000000
--- a/src/common/snippets/include/snippets/pass/align_element_type.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ngraph/pass/graph_rewrite.hpp>
-#include <ngraph/pattern/matcher.hpp>
-
-namespace ngraph {
-namespace snippets {
-namespace pass {
-
-/**
- * @interface AlignElementType
- * @brief Wrap sequence of operations which doesn't support execution on original element type by ConvertSaturation
- *        and reset element type for type relaxed nodes inside body to align element type between nodes.
- *        Example 1:
- *          - After FQ decomposition there may be Convert[U8/I8]. If after the Convert there are other operations
- *            that don't support U8/I8, new ConvertSaturation[exec_type] will be inserted after the FQ decomposition
- *            to execute these operations on supported element type
- *        Example 2:
- *          - Input[I8] -> Unsupported I8 op -> Movement op -> Output[I8]. There will be inserted two ConvertSaturation:
- *              * ConvertSatiration[exec_type] before op which is unsupported I8
- *              * ConvertSaturation[I8] before Movement op to return original low precision.
- *        Note: We cannot just remove original Convert[I8/U8] in Example 1 because we should cover two things:
- *              * allow execution of operations on supported element type for them
- *              * keep computations mathematically equivalent to the original function
- *              Thus, for these cases we should have the following pipeline: FP32 -> Convert[I8/U8] -> Convert[FP32] -> FP32
- *        Note: We shouldn't call validate_and_infer_type() after Convert insertions to avoid element type conflicts on inputs of ops
- * @ingroup snippets
- */
-class AlignElementType: public ngraph::pass::FunctionPass {
-public:
-    OPENVINO_RTTI("AlignElementType", "0");
-    AlignElementType(const ov::element::Type exec_type = ov::element::f32);
-    bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
-
-    static bool opNeedsAlignElementType(const std::shared_ptr<ov::Node>& n, const ov::element::Type exec_type = ov::element::f32);
-private:
-    ov::element::Type exec_type;
-};
-
-}  // namespace pass
-}  // namespace snippets
-}  // namespace ngraph
diff --git a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp
index 284640d8c18122..cfb9ff41955867 100644
--- a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp
+++ b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp
@@ -29,7 +29,7 @@ namespace pass {
  *
  * Expand brackets:
  *   round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol
- * 
+ *
  * Marking:
  *   - isc := (levels-1) / (ih - il)
  *   - ish := -il * isc
@@ -37,7 +37,7 @@ namespace pass {
  *   - osh := ol
  * Final expression:
  *   round(x * isc + ish) * osc + osh
- * 
+ *
  * Some optimizations (example for scalars):
  * 1. If output element type of FQ is U8 and il = 0, ish = 0, osc = 1, osh = 0, there is enough expression: x * isc
  * 2. If output element type of FQ is I8 and ish ~= 128, osc = 1, osh ~= -128, il * isc ~= -128, ih * isc ~= 127 there is enough expression: x * isc
@@ -54,7 +54,6 @@ class FakeQuantizeDecomposition : public ngraph::pass::MatcherPass {
 public:
     FakeQuantizeDecomposition();
 
-    static bool isAllScalarConstant(const std::shared_ptr<const ngraph::Node>& node);
     static bool getScalesAndShifts(const std::shared_ptr<const ngraph::op::v0::FakeQuantize>& fq_node,
                                    std::vector<float>& cl,
                                    std::vector<float>& ch,
diff --git a/src/common/snippets/include/snippets/pass/propagate_precision.hpp b/src/common/snippets/include/snippets/pass/propagate_precision.hpp
new file mode 100644
index 00000000000000..d0920766f632fd
--- /dev/null
+++ b/src/common/snippets/include/snippets/pass/propagate_precision.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/pass.hpp>
+#include "snippets/generator.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @class PropagatePrecision
+ * @ingroup snippets
+ * @brief PropagatePrecision transformation propagate precision from parameters to results.
+ */
+class PropagatePrecision: public ngraph::pass::FunctionPass {
+public:
+    OPENVINO_RTTI("PropagatePrecision", "0");
+    PropagatePrecision(const std::shared_ptr<const TargetMachine>& target_machine);
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+
+    static std::vector<element::Type> get_precisions(
+        const std::vector<element::Type>& input_precisions,
+        const std::set<std::vector<element::Type>>& supported_precisions) noexcept;
+
+    // if can_be_removed returns true then actual convertion (actual_before => actual_after)
+    // can be replaced to required (actual_before => required_after)
+    static bool can_be_removed(
+        const element::Type& actual_before,
+        const element::Type& actual_after,
+        const element::Type& required_after) noexcept;
+
+    // if can_be_fused returns true then actual convertion can be replaced to required
+    static bool can_be_fused(
+        const element::Type& actual,
+        const element::Type& required) noexcept;
+
+private:
+    const std::shared_ptr<const TargetMachine> target_machine;
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp
index 07f13ae8defb57..20b6edb17b9d14 100644
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@@ -11,6 +11,7 @@
 #include "snippets/pass/insert_movebroadcast.hpp"
 #include "snippets/pass/broadcast_to_movebroadcast.hpp"
 #include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
+#include "snippets/pass/propagate_precision.hpp"
 #include "snippets/pass/assign_registers.hpp"
 #include "snippets/pass/convert_constants.hpp"
 #include "snippets/pass/convert_power_to_powerstatic.hpp"
@@ -18,7 +19,6 @@
 #include "snippets/pass/insert_loops.hpp"
 #include "snippets/pass/transpose_decomposition.hpp"
 #include "snippets/pass/transform_convert.hpp"
-#include "snippets/pass/align_element_type.hpp"
 #include "snippets/pass/matmul_to_brgemm.hpp"
 #include "snippets/pass/fuse_transpose_brgemm.hpp"
 #include "snippets/pass/softmax_decomposition.hpp"
@@ -62,10 +62,6 @@ void snippets::op::Subgraph::init_config() {
             ov::is_type<ov::op::v0::FakeQuantize>(op);
         config.m_has_type_relaxed_ops = config.m_has_type_relaxed_ops ||
             std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(op);
-        config.m_is_needed_to_align_precision = config.m_is_needed_to_align_precision ||
-            is_quantized() ||
-            has_type_relaxed_ops() ||
-            snippets::pass::AlignElementType::opNeedsAlignElementType(op, execution_element_type);
         config.m_has_domain_sensitive_ops = config.m_has_domain_sensitive_ops ||
             ov::is_type<ov::op::v1::Transpose>(op) ||
             ov::is_type<ov::op::v1::Softmax>(op) ||
@@ -359,6 +355,14 @@ ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector&
     return master_shape;
 }
 
+bool snippets::op::Subgraph::check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept {
+    const auto elementwise = std::dynamic_pointer_cast<const ov::op::util::BinaryElementwiseArithmetic>(node);
+    return
+        (elementwise == nullptr) ||
+        (elementwise->get_input_partial_shape(0).size() == elementwise->get_input_partial_shape(1).size()) ||
+        (elementwise->get_autob().m_type != ov::op::AutoBroadcastType::PDPD);
+}
+
 void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outputShapes,
                                                  const BlockedShapeVector& inputShapes) {
     // We should insert Convert before Results to set original output element type if needed
@@ -369,35 +373,34 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu
             const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
                 body_results[i]->get_input_node_shared_ptr(0), needed_out_type);
             body_results[i]->set_argument(0, convert);
+            body_results[i]->validate_and_infer_types();
         }
     }
 
     // We should change existing element type to original for Parameters if needed
-    const auto& body_parameters = body_ptr()->get_parameters();
+    const auto& parameters = body_ptr()->get_parameters();
     for (size_t i = 0; i < inputShapes.size(); ++i) {
         const auto needed_in_type = std::get<2>(inputShapes[i]);
-        if (body_parameters[i]->get_element_type() != needed_in_type) {
-            body_parameters[i]->set_element_type(needed_in_type);
-            config.m_is_needed_to_align_precision = true;
-        }
-    }
+        const auto& parameter = parameters[i];
+        if (parameter->get_element_type() != needed_in_type) {
+            const auto parameter_output = parameter->output(0);
+            const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                parameter_output,
+                parameter_output.get_element_type());
+            ngraph::copy_runtime_info(parameter, convert);
+
+            for (const auto input : parameter_output.get_target_inputs()) {
+                const auto& input_node = input.get_node();
+                if (input_node == convert.get()) {
+                    continue;
+                }
+                input_node->set_argument(input.get_index(), convert->output(0));
+            }
 
-    // We should align element type inside body using the corresponding pass:
-    //  - Insert Convert before operations that doesn't support original element type for execution
-    //  - Insert reverse Convert before operations that support original element type
-    //    but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point)
-    //  - Then we should use ConstantFolding pass to convert element type of Scalars before inference.
-    //  - Eliminate redundant Converts which can be inserted in AlignElementType() pass
-    ngraph::pass::Manager manager;
-    if (config.m_is_needed_to_align_precision) {
-        manager.register_pass<snippets::pass::AlignElementType>(execution_element_type);
-        manager.register_pass<ov::pass::ConstantFolding>();
-        // TODO [100041] : In some cases AlignElementType pass can insert extra Convert because
-        //                 the pass doesn't know real precisions in real time.
-        //                 We call EliminateConverts pass to remove them
-        manager.register_pass<ov::pass::EliminateConvert>();
+            parameter->set_element_type(needed_in_type);
+            parameter->validate_and_infer_types();
+        }
     }
-    manager.run_passes(body_ptr());
 }
 
 void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
@@ -602,24 +605,39 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou
 
 snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes,
                                                     const BlockedShapeVector& input_shapes,
-                                                    ngraph::pass::Manager& opt,
+                                                    ngraph::pass::Manager& pre_dialect,
+                                                    ngraph::pass::Manager& post_dialect,
+                                                    ngraph::pass::Manager& post_precision,
                                                     const void* compile_params) {
     canonicalize(output_shapes, input_shapes);
-    return generate(opt, compile_params);
+    return generate(pre_dialect, post_dialect, post_precision, compile_params);
 }
 
 snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) {
     auto mngr = ngraph::pass::Manager();
-    return generate(mngr, compile_params);
+    return generate(mngr, mngr, mngr, compile_params);
 }
 
-snippets::Schedule snippets::op::Subgraph::generate(ngraph::pass::Manager& opt, const void* compile_params) {
+snippets::Schedule snippets::op::Subgraph::generate(
+    ngraph::pass::Manager& pre_dialect,
+    ngraph::pass::Manager& post_dialect,
+    ngraph::pass::Manager& post_precision,
+    const void* compile_params) {
     INTERNAL_OP_SCOPE(Subgraph);
     OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::generate")
     NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set");
 
+    pre_dialect.run_passes(body_ptr());
     convert_to_snippet_dialect();
-    opt.run_passes(body_ptr());
+    post_dialect.run_passes(body_ptr());
+
+    ngraph::pass::Manager precision_manager;
+    precision_manager.register_pass<snippets::pass::PropagatePrecision>(m_generator->get_target_machine());
+    precision_manager.register_pass<ngraph::pass::ConstantFolding>();
+    precision_manager.register_pass<snippets::pass::ConvertConstantsToScalars>();
+    precision_manager.run_passes(body_ptr());
+
+    post_precision.run_passes(body_ptr());
 
     // After all passes, when all optimizations are completed and all MemoryAccess ops are inserted,
     // we can calculate common buffer scratchpad size and propagate offset from Buffer to the corresponding MemoryAccess ops
diff --git a/src/common/snippets/src/pass/align_element_type.cpp b/src/common/snippets/src/pass/align_element_type.cpp
deleted file mode 100644
index abd50a9e44605c..00000000000000
--- a/src/common/snippets/src/pass/align_element_type.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <snippets/itt.hpp>
-
-#include "snippets/snippets_isa.hpp"
-#include "snippets/op/convert_saturation.hpp"
-#include "snippets/pass/align_element_type.hpp"
-#include "snippets/utils.hpp"
-#include "ov_ops/type_relaxed.hpp"
-#include "ngraph/op/util/op_types.hpp"
-
-#include <ngraph/rt_info.hpp>
-
-namespace {
-
-inline auto is_in_op(const std::shared_ptr<ov::Node>& n) -> bool {
-    return ov::is_type<ov::op::v0::Parameter>(n)
-        || ov::is_type<ov::op::v0::Constant>(n);
-}
-
-// At the moment Subgraph supports only Eltwise, Select, Convert, Broadcast and FQ (which is decomposed into Eltwises and Convert) with
-// Softmax (which is decomposed into Eltwises as well)
-// And only Eltwise and Select ops supports execution only in "exec_type". So we can check op type from the opposite
-// NOTE: This check is only for executable which isn't Parameter/Constant/Result
-inline auto op_supports_only_exec_type(const std::shared_ptr<ov::Node>& n) -> bool {
-    return !is_in_op(n) &&
-           !ov::is_type<ov::op::v0::Result>(n) &&
-           !ov::is_type<ov::op::v1::Transpose>(n) &&
-           !ov::is_type<ov::op::v0::Convert>(n) &&
-           !ov::is_type<ov::op::v1::Broadcast>(n) &&
-           !ov::is_type<ov::op::v3::Broadcast>(n);
-}
-
-}  // namespace
-
-ngraph::snippets::pass::AlignElementType::AlignElementType(const ov::element::Type exec_type) : exec_type(exec_type) { }
-
-bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_ptr<ov::Model> &m) {
-    RUN_ON_FUNCTION_SCOPE(AlignElementType);
-
-    auto insertConvert = [](const std::shared_ptr<ov::Node>& op, const size_t idx, const ov::element::Type& element_type) -> void {
-        auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(op->input(idx).get_source_output(), element_type);
-        ngraph::copy_runtime_info(op->get_input_node_shared_ptr(idx), convert);
-        op->set_argument(idx, convert);
-    };
-
-    // NOTE: We don't call validate_and_infer_types() to avoid precision conflicts on inputs
-    bool rewritten = false;
-    auto ops = m->get_ordered_ops();
-    for (auto& op : ops) {
-        if (is_in_op(op)) {
-            continue;
-        }
-
-        if (op_supports_only_exec_type(op)) {
-            for (size_t i = 0; i < op->inputs().size(); i++) {
-                auto shared_input = op->get_input_node_shared_ptr(i);
-                auto existing_convert = ov::as_type_ptr<ov::op::v0::Convert>(shared_input);
-                // We should insert Convert before Ops, which supports only exec element type, only when:
-                //  - Input is Convert with unsupported destination type
-                //  - Input is Op which support any element type
-                // We couldn't unite these conditions and just check that element type isn't supported exec type
-                // because we don't call validate_and_infer_types() so we don't know new precisions after setting of original
-                // input and output element types
-                if ((existing_convert && existing_convert->get_destination_type() != exec_type) ||
-                    (!op_supports_only_exec_type(shared_input))) {
-                    insertConvert(op, i, exec_type);
-                    rewritten |= true;
-                }
-            }
-            if (auto tr_node = std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(op)) {
-                tr_node->set_overridden_output_type(exec_type, 0);
-                rewritten |= true;
-            }
-        } else {  // branch for Movement ops, MatMul ops in the future and for the Convert, Result
-            for (size_t i = 0; i < op->inputs().size(); i++) {
-                auto shared_input = op->get_input_node_shared_ptr(i);
-                // it's original element type because we don't use validate_and_infer_type() anywhere
-                const auto original_eltype = op->input(i).get_element_type();
-                // If before op there is another op that doesn't support execution on original element type, we know that
-                // before this op will be inserted reverse Convert to support execution on supported element type (first branch of condition).
-                // So we should return original element type for operations that can support low precision
-                if (op_supports_only_exec_type(shared_input) && original_eltype != exec_type) {
-                    insertConvert(op, i, original_eltype);
-                    rewritten |= true;
-                }
-            }
-        }
-    }
-
-    return rewritten;
-}
-
-bool ngraph::snippets::pass::AlignElementType::opNeedsAlignElementType(const std::shared_ptr<ov::Node>& op, const ov::element::Type exec_type) {
-    // At the moment Snippets support only Eltwise/Convert/FQ/Select/Softmax/Broadcast which one output so we can just call get_element_type()
-    return op_supports_only_exec_type(op) && op->get_element_type() != exec_type;
-}
diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp
index cd3eb887481031..3325881834fd88 100644
--- a/src/common/snippets/src/pass/collapse_subgraph.cpp
+++ b/src/common/snippets/src/pass/collapse_subgraph.cpp
@@ -212,7 +212,11 @@ const std::set<ngraph::element::Type> ngraph::snippets::pass::TokenizeSnippets::
         { ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 };
 
 bool TokenizeSnippets::AppropriateForSubgraph(const std::shared_ptr<const Node> &node) {
-    return is_supported_op(node) && has_supported_in_out(node) && node->get_control_dependencies().empty();
+    return
+        is_supported_op(node) &&
+        has_supported_in_out(node) &&
+        node->get_control_dependencies().empty() &&
+        snippets::op::Subgraph::check_broadcast(node);
 }
 
 TokenizeSnippets::TokenizeSnippets() {
diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp
index 5c2cfd6b0f82c3..9688e0a0e22940 100644
--- a/src/common/snippets/src/pass/fq_decomposition.cpp
+++ b/src/common/snippets/src/pass/fq_decomposition.cpp
@@ -36,11 +36,6 @@ bool isValidRangesInputs(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fq
     });
 }
 
-bool is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) {
-    return ngraph::is_type<ngraph::opset1::Constant>(source_output_node) &&
-           ngraph::shape_size(source_output_node->get_shape()) == 1;
-}
-
 }  // namespace
 
 ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() {
@@ -182,13 +177,6 @@ ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() {
     register_matcher(m, callback);
 }
 
-bool ngraph::snippets::pass::FakeQuantizeDecomposition::isAllScalarConstant(const std::shared_ptr<const ngraph::Node>& node) {
-    return is_scalar_constant(node->get_input_node_shared_ptr(1)) &&
-           is_scalar_constant(node->get_input_node_shared_ptr(2)) &&
-           is_scalar_constant(node->get_input_node_shared_ptr(3)) &&
-           is_scalar_constant(node->get_input_node_shared_ptr(4));
-}
-
 bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
     const std::shared_ptr<const ngraph::opset1::FakeQuantize>& fq_node,
     std::vector<float>& cl,
diff --git a/src/common/snippets/src/pass/propagate_precision.cpp b/src/common/snippets/src/pass/propagate_precision.cpp
new file mode 100644
index 00000000000000..19be34b4e97648
--- /dev/null
+++ b/src/common/snippets/src/pass/propagate_precision.cpp
@@ -0,0 +1,293 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/pass/propagate_precision.hpp"
+
+#include <assert.h>
+#include <memory>
+#include "ov_ops/type_relaxed.hpp"
+#include "snippets/itt.hpp"
+#include "ngraph/rt_info.hpp"
+
+using namespace ngraph;
+
+ngraph::snippets::pass::PropagatePrecision::PropagatePrecision(
+    const std::shared_ptr<const TargetMachine>& target_machine) : target_machine(target_machine) {
+}
+
+bool ngraph::snippets::pass::PropagatePrecision::run_on_model(const std::shared_ptr<ov::Model>& f) {
+    RUN_ON_MODEL_SCOPE(PropagatePrecision);
+    OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::PropagatePrecision")
+
+    std::unordered_map<std::shared_ptr<ngraph::opset1::Result>, element::Type> result_types;
+    auto results = f->get_results();
+    for (auto& result : results) {
+        result_types.emplace(result, result->get_input_element_type(0));
+    }
+
+    bool was_updated = true;
+    for (const auto& op : f->get_ordered_ops()) {
+        auto type_info = op->get_type_info();
+        OPENVINO_ASSERT(
+            target_machine->has(type_info),
+            "operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "' was not found in target machine");
+
+        auto exec = target_machine->get_supported_precisions(type_info);
+        const auto supported_precisions = exec(op);
+        if (supported_precisions.empty()) {
+            continue;
+        }
+
+        // There are two operation types which break precision propagation:
+        //   1) Existing convertion operations. Solution: remove convertion
+        //      operation before general algo
+        //   2) Type relaxed based operations. Will be resolved by snippet opset.
+
+        auto input_precisions_were_changed = false;
+
+        for (const auto& input : op->inputs()) {
+            const auto convert = ngraph::as_type<snippets::op::ConvertSaturation>(input.get_source_output().get_node());
+            if (convert == nullptr) {
+                continue;
+            }
+
+            const auto precision_before = convert->get_input_element_type(0);
+            const auto precision_after = convert->get_output_element_type(0);
+            if (can_be_removed(precision_before, precision_after, precision_before)) {
+                op->set_argument(input.get_index(), convert->input(0).get_source_output());
+                input_precisions_were_changed = true;
+            }
+        }
+
+        std::vector<element::Type> input_precisions;
+        for (const auto& input : op->inputs()) {
+            const auto input_precision = input.get_source_output().get_element_type();
+            input_precisions.push_back(input_precision);
+        }
+
+        assert(std::all_of(
+            supported_precisions.begin(),
+            supported_precisions.end(),
+            [&input_precisions](const std::vector<element::Type>& precisions) {
+                return precisions.size() == input_precisions.size();
+            }) && "input precisions count is not equal for supported precisions");
+
+        // update input precisions
+        // if possible then convert precisions to supported
+        if (!supported_precisions.empty() &&
+            std::all_of(
+                supported_precisions.begin(),
+                supported_precisions.end(),
+                [&input_precisions](const std::vector<element::Type>& precisions) {
+                    return precisions != input_precisions;
+                })) {
+            auto precisions = get_precisions(input_precisions,
+                                             supported_precisions);
+            OPENVINO_ASSERT(
+                !precisions.empty(),
+                "there are no supported precisions for operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "'");
+
+            auto find_convert = [](
+                const ngraph::Output<ngraph::Node> parent_output,
+                const ngraph::element::Type convert_type) -> snippets::op::ConvertSaturation* {
+                for (const auto& input : parent_output.get_target_inputs()) {
+                    const auto child = ngraph::as_type<snippets::op::ConvertSaturation>(input.get_node());
+                    if ((child != nullptr) && (child->get_output_element_type(0) == convert_type)) {
+                        return child;
+                    }
+                }
+                return nullptr;
+            };
+
+            for (size_t i = 0; i < op->get_input_size(); ++i) {
+                const auto& op_input = op->input(i);
+                const auto& required_after = precisions[i];
+                auto parent_output = op_input.get_source_output();
+                const auto actual_before = parent_output.get_element_type();
+                if (actual_before != required_after) {
+                    was_updated = true;
+                    input_precisions_were_changed = true;
+                    auto existing_convert = ngraph::as_type<ngraph::snippets::op::ConvertSaturation>(
+                        parent_output.get_node());
+
+                    if (existing_convert == nullptr) {
+                        existing_convert = find_convert(parent_output, required_after);
+                        if (existing_convert != nullptr) {
+                            // reuse existing convert
+                            op->set_argument(op_input.get_index(), existing_convert->shared_from_this());
+                            continue;
+                        }
+                    }
+
+                    if (existing_convert == nullptr) {
+                        // create new Convert
+                        auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                            parent_output,
+                            required_after);
+                        ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
+                        op->set_argument(op_input.get_index(), convert);
+                        continue;
+                    }
+
+                    const auto actual_before = existing_convert->get_input_element_type(0);
+                    const auto actual_after = existing_convert->get_output_element_type(0);
+
+                    if (can_be_removed(actual_before, actual_after, required_after)) {
+                        // remove existing convert
+                        existing_convert->output(0).replace(parent_output);
+                        continue;
+                    }
+
+                    if (can_be_fused(actual_after, required_after)) {
+                        // fuse existing convert
+                        auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                            existing_convert->get_input_node_shared_ptr(0),
+                            required_after);
+                        ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert);
+                        op->set_argument(op_input.get_index(), convert);
+                        continue;
+                    }
+
+                    // create new convert
+                    auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                        existing_convert->output(0),
+                        required_after);
+                    ngraph::copy_runtime_info(existing_convert->output(0).get_node()->shared_from_this(), convert);
+                    op->set_argument(op_input.get_index(), convert);
+                }
+            }
+        }
+
+        auto type_relaxed_node = std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(op);
+        if (input_precisions_were_changed || (type_relaxed_node != nullptr)) {
+            // update output precision
+            std::vector<element::Type> op_output_types;
+            for (auto& output : op->outputs()) {
+                op_output_types.push_back(output.get_element_type());
+            }
+
+            if (type_relaxed_node != nullptr) {
+                // TODO: user story 104284
+                // to keep previous functionality
+                // unary and binary element-wise operations are supported
+                // will be replaced to snippets opset later
+                const auto op_element_type = op->get_input_element_type(0);
+                if (type_relaxed_node->get_overridden_output_type(0) != op_element_type) {
+                    was_updated = true;
+                    OPENVINO_ASSERT(op->get_output_size() == 1ull, "operation with several output is not supported");
+
+                    type_relaxed_node->set_overridden_output_type(op_element_type, 0);
+                    op->validate_and_infer_types();
+                }
+            } else {
+                op->validate_and_infer_types();
+            }
+
+            for (size_t i = 0; i < op->get_output_size(); ++i) {
+                auto output = op->output(i);
+
+                if (output.get_element_type() != op_output_types[i]) {
+                    was_updated = true;
+                    auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                        output,
+                        op_output_types[i]);
+                    ngraph::copy_runtime_info(output.get_node_shared_ptr(), convert);
+
+                    for (auto& input : output.get_target_inputs()) {
+                        auto child = input.get_node();
+                        if (child == convert.get()) {
+                            continue;
+                        }
+
+                        input.replace_source_output(convert->output(0));
+
+
+                        if (ngraph::is_type<ngraph::op::Result>(input.get_node())) {
+                            input.get_tensor_ptr()->add_names(output.get_tensor_ptr()->get_names());
+
+                            const std::string original_name = op->get_friendly_name();
+                            op->set_friendly_name(original_name + "_original");
+                            convert->set_friendly_name(original_name);
+                        }
+                    }
+                    output.get_tensor_ptr()->set_names({});
+                }
+            }
+        }
+    }
+
+    for (auto it = result_types.begin(); it != result_types.end(); ++it) {
+        const auto result = it->first;
+        const auto actual_type = result->get_input_element_type(0);
+        const auto expected_type = it->second;
+        if (actual_type != it->second) {
+            was_updated = true;
+            auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
+                result->get_input_node_shared_ptr(0),
+                expected_type);
+            ngraph::copy_runtime_info(result->get_input_node_shared_ptr(0), convert);
+            result->set_argument(0, convert);
+        }
+    }
+
+    return was_updated;
+}
+
+bool ngraph::snippets::pass::PropagatePrecision::can_be_removed(
+    const element::Type& actual_before,
+    const element::Type& actual_after,
+    const element::Type& required_after) noexcept {
+    if (actual_before != required_after) {
+        return false;
+    }
+
+    return can_be_fused(actual_after, actual_before);
+}
+
+bool ngraph::snippets::pass::PropagatePrecision::can_be_fused(
+    const element::Type& actual,
+    const element::Type& required) noexcept {
+    // custom conditions: between int & float precisions
+    if (((actual == element::bf16) || (actual == element::f16) || (actual == element::f32)) &&
+        ((required == element::u8) || (required == element::i8))) {
+        return true;
+    }
+
+    if ((actual == element::f32) && ((required == element::u16) || (required == element::i16))) {
+        return true;
+    }
+
+    // general conditions: any new added precision will support
+    return
+        (actual.is_real() == required.is_real()) &&
+        (actual.bitwidth() >= required.bitwidth());
+}
+
+std::vector<element::Type> ngraph::snippets::pass::PropagatePrecision::get_precisions(
+    const std::vector<element::Type>& input_precisions,
+    const std::set<std::vector<element::Type>>& supported_precisions_pack) noexcept {
+    bool was_found = false;
+    for (const auto& supported_precisions : supported_precisions_pack) {
+        for (size_t i = 0; i < supported_precisions.size(); ++i) {
+            const auto& supported_precision = supported_precisions[i];
+            const auto& input_precision = input_precisions[i];
+            if ((supported_precision.is_real() != input_precision.is_real()) ||
+                (input_precision.bitwidth() > supported_precision.bitwidth())) {
+                was_found = false;
+                break;
+            }
+
+            was_found = true;
+        }
+        if (was_found) {
+            return supported_precisions;
+        }
+    }
+
+    if (!supported_precisions_pack.empty()) {
+        return *supported_precisions_pack.begin();
+    }
+
+    return {};
+}
diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp
index be2e0f2e756044..b0b1bafb245308 100644
--- a/src/common/snippets/tests/include/lowering_utils.hpp
+++ b/src/common/snippets/tests/include/lowering_utils.hpp
@@ -16,7 +16,7 @@ using BlockedShapeVector = ngraph::snippets::op::Subgraph::BlockedShapeVector;
 class DummyEmitter : public ngraph::snippets::Emitter {
 public:
     // Here I pass Add to Emitter, but could be any other op, since it's ignored anyway.
-    DummyEmitter() : ngraph::snippets::Emitter(std::make_shared<ov::op::v1::Add>()) {}
+    DummyEmitter(const std::vector<ov::Node::type_info_t>& custom_opset = {}) : ngraph::snippets::Emitter(std::make_shared<ov::op::v1::Add>()) {}
     void emit_code(const std::vector<size_t>&,
                    const std::vector<size_t>&,
                    const std::vector<size_t>&,
@@ -49,7 +49,9 @@ class LoweringTests : public TransformationTestsF {
     static std::shared_ptr<ngraph::snippets::op::Subgraph> getSubgraph(const std::shared_ptr<Model>& f);
     static std::shared_ptr<ngraph::snippets::op::Subgraph> getLoweredSubgraph(const std::shared_ptr<Model>& f,
                                                                               const ov::PartialShape& master_shape,
-                                                                              ov::pass::Manager target_optimizations = {},
+                                                                              ov::pass::Manager pre_dialect = {},
+                                                                              ov::pass::Manager post_dialect = {},
+                                                                              ov::pass::Manager post_precision = {},
                                                                               const std::shared_ptr<ngraph::snippets::Generator> generator = nullptr);
     static std::shared_ptr<ngraph::snippets::op::Subgraph> getTokenizedSubgraph(const std::shared_ptr<Model>& f);
     ov::PartialShape master_shape{};
diff --git a/src/common/snippets/tests/include/pass/precision_propagation.hpp b/src/common/snippets/tests/include/pass/precision_propagation.hpp
new file mode 100644
index 00000000000000..a60b9161ab4fc4
--- /dev/null
+++ b/src/common/snippets/tests/include/pass/precision_propagation.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "lowering_utils.hpp"
+#include "snippets_helpers.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+class PrecisionPropagationParamsValues {
+public:
+    class Actual {
+    public:
+        std::pair<element::Type, element::Type> convertion_before_op1;
+        element::Type convertion_before_op2_1;
+        std::pair<element::Type, element::Type> convertion_before_op2_2;
+        std::set<std::vector<element::Type>> op1_supported_precisions;
+        std::set<std::vector<element::Type>> op2_supported_precisions;
+    };
+
+    class Expected {
+    public:
+        std::pair<element::Type, element::Type> convertion_before_op1;
+        element::Type convertion_before_op2_1;
+        std::pair<element::Type, element::Type> convertion_before_op2_2;
+        element::Type convertion_after_op2;
+    };
+
+    std::vector<element::Type> input_types;
+    Actual actual;
+    Expected expected;
+};
+
+typedef std::tuple<
+    std::pair<PartialShape, PartialShape>, // input shapes
+    PrecisionPropagationParamsValues
+> PrecisionPropagationParams;
+
+class PrecisionPropagationTest : public TransformationTestsF,
+                                 public testing::WithParamInterface<PrecisionPropagationParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<PrecisionPropagationParams> obj);
+
+protected:
+    std::shared_ptr<SnippetsFunctionBase> snippets_function;
+};
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp
index a536a0317eae12..55480e95dae510 100644
--- a/src/common/snippets/tests/src/lowering_utils.cpp
+++ b/src/common/snippets/tests/src/lowering_utils.cpp
@@ -11,10 +11,12 @@ namespace ov {
 namespace test {
 namespace snippets {
 
-DummyTargetMachine::DummyTargetMachine(const std::vector<ov::Node::type_info_t>& custom_opset) {
-    auto dummy_functor = [](const std::shared_ptr<ngraph::Node>& n) {
-        return std::make_shared<DummyEmitter>();
+DummyTargetMachine::DummyTargetMachine(const std::vector<ov::Node::type_info_t>&custom_opset) {
+    auto dummy_functor = ngraph::snippets::jitters_value {
+        [](const std::shared_ptr<ngraph::Node>& n) { return std::make_shared<DummyEmitter>(); },
+        [](const std::shared_ptr<ngraph::Node>& n) { return std::set<std::vector<element::Type>>{};}
     };
+
     jitters[op::v0::Parameter::get_type_info_static()] = dummy_functor;
     jitters[op::v0::Constant::get_type_info_static()] = dummy_functor;
     jitters[op::v0::Result::get_type_info_static()] = dummy_functor;
@@ -97,7 +99,9 @@ std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getSubgraph(const
 
 std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getLoweredSubgraph(const std::shared_ptr<Model> &f,
                                                                                   const ov::PartialShape& master_shape,
-                                                                                  ov::pass::Manager target_optimizations,
+                                                                                  ov::pass::Manager pre_dialect,
+                                                                                  ov::pass::Manager post_dialect,
+                                                                                  ov::pass::Manager post_precision,
                                                                                   const std::shared_ptr<ngraph::snippets::Generator> generator) {
     auto subgraph = getTokenizedSubgraph(f);
     subgraph->set_generator(generator == nullptr ? std::make_shared<DummyGenerator>() : generator);
@@ -119,7 +123,7 @@ std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getLoweredSubgrap
     }
     body_rt_info["PluginShapesOverride"] = new_shapes;
     subgraph->set_tile_rank(2);
-    subgraph->generate(target_optimizations);
+    subgraph->generate(pre_dialect, post_precision, post_precision);
     return subgraph;
 }
 
diff --git a/src/common/snippets/tests/src/pass/precision_propagation.cpp b/src/common/snippets/tests/src/pass/precision_propagation.cpp
new file mode 100644
index 00000000000000..3c7da4d06aa165
--- /dev/null
+++ b/src/common/snippets/tests/src/pass/precision_propagation.cpp
@@ -0,0 +1,294 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "pass/precision_propagation.hpp"
+
+#include <gtest/gtest.h>
+#include "ngraph/pass/validate.hpp"
+#include "snippets/pass/propagate_precision.hpp"
+#include "snippets/op/convert_saturation.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "precision_propagation_function.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+
+class DummyPrecisionPropagationTargetMachine : public DummyTargetMachine {
+public:
+    DummyPrecisionPropagationTargetMachine(
+        const std::set<std::vector<element::Type>>& op1_supported_precisions,
+        const std::set<std::vector<element::Type>>& op2_supported_precisions)
+        : DummyTargetMachine() {
+        jitters[DummyAdd::get_type_info_static()] = ngraph::snippets::jitters_value {
+            [](const std::shared_ptr<ngraph::Node>& n) { return std::make_shared<DummyEmitter>(); },
+            [op1_supported_precisions](const std::shared_ptr<ngraph::Node>& n) { return op1_supported_precisions; }};
+        jitters[op::v1::Maximum::get_type_info_static()] = ngraph::snippets::jitters_value{
+            [](const std::shared_ptr<ngraph::Node>& n) { return std::make_shared<DummyEmitter>(); },
+            [op2_supported_precisions](const std::shared_ptr<ngraph::Node>&n) { return op2_supported_precisions; }};
+
+        auto default_jitter = ngraph::snippets::jitters_value{
+            [](const std::shared_ptr<ngraph::Node>& n) { return std::make_shared<DummyEmitter>(); },
+            [](const std::shared_ptr<ngraph::Node>& n) { return std::set<std::vector<element::Type>>{};} };
+        jitters[ngraph::snippets::op::ConvertSaturation::get_type_info_static()] = default_jitter;
+    }
+};
+
+} // namespace
+
+std::string PrecisionPropagationTest::getTestCaseName(testing::TestParamInfo<PrecisionPropagationParams> obj) {
+    std::pair<PartialShape, PartialShape> shapes;
+    PrecisionPropagationParamsValues test_values;
+    std::tie(shapes, test_values) = obj.param;
+
+    auto to_string = [](const std::set<std::vector<element::Type>>& precisions_pack) noexcept {
+        std::ostringstream result;
+        result << "{";
+        for (const auto& precisions : precisions_pack) {
+            result << CommonTestUtils::vec2str(precisions) << "_";
+        }
+        result << "}";
+        return result.str();
+    };
+
+    std::ostringstream result;
+    result << "IN0_" << shapes.first << "_" << test_values.input_types[0] << "_"
+           << "IN1_" << shapes.second << "_" << test_values.input_types[1] << "_"
+           << "IN2_" << test_values.input_types[2]
+           << to_string(test_values.actual.op1_supported_precisions) << "_"
+           << to_string(test_values.actual.op2_supported_precisions) << "_"
+           << test_values.expected.convertion_before_op1.first << "_" << test_values.expected.convertion_before_op1.second << "_"
+           << test_values.expected.convertion_before_op2_1 << "_"
+           << test_values.expected.convertion_before_op2_2.first << "_" << test_values.expected.convertion_before_op2_2.second << "_"
+           << test_values.expected.convertion_after_op2 << "_";
+    return result.str();
+}
+
+TEST_P(PrecisionPropagationTest, CompareFunctions) {
+    disable_rt_info_check();
+
+    const auto param = GetParam();
+    const auto shapes = std::get<0>(param);
+    const auto test_values = std::get<1>(param);
+
+    const auto input_shapes = std::vector<PartialShape>({ shapes.first, shapes.second });
+    PrecisionPropagationAddFunction function_stub(
+        input_shapes,
+        test_values.input_types[0],
+        test_values.input_types[1],
+        test_values.input_types[2],
+        {
+            test_values.actual.convertion_before_op1,
+            test_values.actual.convertion_before_op2_1,
+            test_values.actual.convertion_before_op2_2
+        },
+        {
+            test_values.expected.convertion_before_op1,
+            test_values.expected.convertion_before_op2_1,
+            test_values.expected.convertion_before_op2_2,
+            test_values.expected.convertion_after_op2
+        });
+    function = function_stub.getOriginal();
+
+    const auto target_machine = std::make_shared<DummyPrecisionPropagationTargetMachine>(
+        test_values.actual.op1_supported_precisions,
+        test_values.actual.op2_supported_precisions);
+
+    manager.register_pass<ngraph::snippets::pass::PropagatePrecision>(target_machine);
+
+    function_ref = function_stub.getReference();
+}
+
+namespace PrecisionPropagationTestInstantiation {
+// clang-format off
+
+std::vector<std::pair<PartialShape, PartialShape>> shapes {
+    {{1, 3, 16, 16}, {1, 3, 16, 16}}
+};
+
+std::vector<PrecisionPropagationParamsValues> test_cases {
+    {
+        {element::f32, element::f32, element::f32},
+        {
+            {},
+            {},
+            {},
+            {{element::f32, element::f32}},
+            {{element::f32, element::f32}}
+        },
+        {}
+    },
+    // in:  Parameter I8 => Op1 I32 => Convert I8 => Op1 I8 => Result
+    // out: Parameter I8 => Add I32 => Convert I8 => Convert FP32 => Op1 FP32 => Result
+    {
+        {element::i8, element::i8, element::i8},
+        {
+            {},
+            {},
+            {},
+            {{element::i8, element::i8}},
+            {{element::f32, element::f32}}
+        },
+        {
+            {},
+            element::i8,
+            {element::f32, element::f32},
+            {element::i8}
+        }
+    },
+    {
+        {element::i8, element::i8, element::i8},
+        {
+            {},
+            {},
+            {},
+            {{element::i8, element::i8}},
+            {{element::i8, element::i8}}
+        },
+        {
+            {},
+            {},
+            {element::i8, element::undefined},
+            {}
+        }
+    },
+    {
+        {element::i8, element::i8, element::i8},
+        {
+            {},
+            {},
+            {},
+            {{element::i8, element::i8}},
+            {{element::i32, element::i32}}
+        },
+        {
+            {},
+            {element::i8},
+            {element::i32, element::i32},
+            {element::i8}
+        }
+    },
+    {
+        {element::bf16, element::bf16, element::f32},
+        {
+            {element::f32, element::f32},
+            {},
+            {},
+            {
+                {element::f32, element::f32},
+                {element::i8, element::i8}
+            },
+            {
+                {element::f32, element::f32},
+                {element::i32, element::i32}
+            }
+        },
+        {
+            {element::f32, element::f32},
+            {},
+            {},
+            {}
+        }
+    },
+    // propagate precision via operation #1
+    {
+        {element::bf16, element::bf16, element::f32},
+        {
+            {element::f32, element::f32},
+            {},
+            {},
+            {
+                {element::f32, element::f32},
+                {element::bf16, element::bf16}
+            },
+            {
+                {element::f32, element::f32}
+            }
+        },
+        {
+            {},
+            {},
+            {element::f32, element::undefined},
+            {}
+        }
+    },
+    // propagate precision via operation #1
+    {
+        {element::bf16, element::bf16, element::bf16},
+        {
+            {element::f32, element::f32},
+            {},
+            {element::undefined, element::f32},
+            {
+                {element::f32, element::f32},
+                {element::bf16, element::bf16}
+            },
+            {
+                {element::f32, element::f32}
+            }
+        },
+        {
+            {},
+            {},
+            {element::f32, element::f32},
+            {}
+        }
+    },
+    // propagate precision via both operations
+    {
+        {element::bf16, element::bf16, element::bf16},
+        {
+            {element::f32, element::f32},
+            {},
+            {element::undefined, element::f32},
+            {
+                {element::f32, element::f32},
+                {element::bf16, element::bf16}
+            },
+            {
+                {element::f32, element::f32},
+                {element::bf16, element::bf16}
+            }
+        },
+        {
+            {},
+            {},
+            {},
+            {element::f32}
+        }
+    },
+    {
+        {element::bf16, element::bf16, element::bf16},
+        {
+            {},
+            {},
+            {},
+            {{element::f32, element::f32}},
+            {{element::f32, element::f32}}
+        },
+        {
+            {{element::f32}, {element::f32}},
+            {element::bf16},
+            {{element::f32}, {element::f32}},
+            {element::bf16}
+        }
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Snippets_PrecisionPropagationTest,
+    PrecisionPropagationTest,
+    ::testing::Combine(
+        ::testing::ValuesIn(shapes),
+        ::testing::ValuesIn(test_cases)),
+    PrecisionPropagationTest::getTestCaseName);
+
+// clang-format on
+} // namespace PrecisionPropagationTestInstantiation
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp
new file mode 100644
index 00000000000000..cc6c113cc3f671
--- /dev/null
+++ b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp
@@ -0,0 +1,153 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include "snippets/pass/propagate_precision.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+class PrecisionPropagationConvertTest : public testing::Test {};
+
+TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_fused) {
+    const std::set<std::pair<element::Type, element::Type>> precisions_set = {
+        {element::u64, element::u64},
+        {element::u64, element::u32},
+        {element::u64, element::u16},
+        {element::u64, element::u8},
+        {element::u32, element::u32},
+        {element::u32, element::u16},
+        {element::u32, element::u8},
+        {element::u16, element::u16},
+        {element::u16, element::u8},
+        {element::u8, element::u8},
+
+        {element::i64, element::i64},
+        {element::i64, element::i32},
+        {element::i64, element::i16},
+        {element::i64, element::i8},
+        {element::i32, element::i32},
+        {element::i32, element::i16},
+        {element::i32, element::i8},
+        {element::i16, element::i16},
+        {element::i16, element::i8},
+        {element::i8, element::i8},
+
+        {element::f64, element::f64},
+        {element::f64, element::f32},
+        {element::f64, element::f16},
+        {element::f32, element::f32},
+        {element::f32, element::f16},
+        {element::f16, element::f16},
+
+        {element::f32, element::bf16},
+        {element::bf16, element::bf16},
+        {element::f32, element::i8},
+        {element::f16, element::i8},
+        {element::bf16, element::i8},
+        {element::f32, element::u8},
+        {element::f16, element::u8},
+        {element::bf16, element::u8}
+    };
+
+    for (const auto& precisions : precisions_set) {
+        ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_fused(
+            precisions.first,
+            precisions.second)) << precisions.second << " can replace " << precisions.first;
+
+        if (precisions.first == precisions.second) {
+            continue;
+        }
+
+        ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused(
+            precisions.second,
+            precisions.first)) << precisions.second << " can not replace " << precisions.first;
+    }
+}
+
+TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_not_be_fused) {
+    const std::set<std::pair<element::Type, element::Type>> precisions_set = {
+        {element::i64, element::f32},
+        {element::i64, element::f16},
+        {element::i64, element::bf16},
+
+        {element::i32, element::f32},
+        {element::i32, element::f16},
+        {element::i32, element::bf16},
+
+        {element::i16, element::f16},
+        {element::i16, element::bf16},
+
+        {element::u64, element::f32},
+        {element::u64, element::f16},
+        {element::u64, element::bf16},
+
+        {element::u32, element::f32},
+        {element::u32, element::f16},
+        {element::u32, element::bf16},
+
+        {element::u16, element::f16},
+        {element::u16, element::bf16}
+    };
+
+    for (const auto& precisions : precisions_set) {
+        ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused(
+            precisions.first,
+            precisions.second)) << precisions.second << " can not replace " << precisions.first;
+    }
+}
+
+TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_removed) {
+    const std::set<std::tuple<element::Type, element::Type, element::Type>> precisions_set = {
+        {element::u64, element::u64, element::u64},
+        {element::u32, element::u64, element::u32},
+        {element::u16, element::u64, element::u16},
+        {element::u8, element::u64, element::u8},
+        {element::u32, element::u32, element::u32},
+        {element::u16, element::u32, element::u16},
+        {element::u8, element::u32, element::u8},
+        {element::u16, element::u16, element::u16},
+        {element::u8, element::u16, element::u8},
+        {element::u8, element::u8, element::u8},
+
+        {element::i64, element::i64, element::i64},
+        {element::i32, element::i64, element::i32},
+        {element::i16, element::i64, element::i16},
+        {element::i8, element::i64, element::i8},
+        {element::i32, element::i32, element::i32},
+        {element::i16, element::i32, element::i16},
+        {element::i8, element::i32, element::i8},
+        {element::i16, element::i16, element::i16},
+        {element::i8, element::i16, element::i8},
+        {element::i8, element::i8, element::i8},
+
+        {element::f64, element::f64, element::f64},
+        {element::f32, element::f64, element::f32},
+        {element::f16, element::f64, element::f16},
+        {element::f32, element::f32, element::f32},
+        {element::f16, element::f16, element::f16},
+
+        {element::bf16, element::f32, element::bf16},
+        {element::bf16, element::bf16, element::bf16},
+    };
+
+    for (const auto& precisions : precisions_set) {
+        const auto actual_before = std::get<0>(precisions);
+        const auto actual_after = std::get<1>(precisions);
+        const auto required_after = std::get<2>(precisions);
+        ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_removed(
+            actual_before,
+            actual_after,
+            required_after)) << "can_be_removed: " << actual_before << " => " << actual_after << " => " << required_after;
+
+        if ((actual_before == actual_after) && (actual_before == required_after)) {
+            continue;
+        }
+    }
+}
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp
new file mode 100644
index 00000000000000..9e97fcc8ad4aa1
--- /dev/null
+++ b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include "snippets/pass/propagate_precision.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+
+class PrecisionPropagationGetPrecisionsTest : public testing::Test {};
+
+TEST_F(PrecisionPropagationGetPrecisionsTest, empty) {
+    ASSERT_EQ(std::vector<element::Type>{}, ngraph::snippets::pass::PropagatePrecision::get_precisions({}, {}));
+}
+
+TEST_F(PrecisionPropagationGetPrecisionsTest, selected) {
+    ASSERT_EQ(
+        std::vector<element::Type>({element::f32, element::f32}),
+        ngraph::snippets::pass::PropagatePrecision::get_precisions(
+            { element::f32, element::f32 },
+            {
+                {element::bf16, element::bf16},
+                {element::f32, element::f32},
+                {element::i8, element::i8},
+            }));
+}
+
+TEST_F(PrecisionPropagationGetPrecisionsTest, first) {
+    ASSERT_EQ(
+        std::vector<element::Type>({ element::bf16, element::bf16 }),
+        ngraph::snippets::pass::PropagatePrecision::get_precisions(
+            { element::i32, element::i32 },
+            {
+                {element::bf16, element::bf16},
+                {element::f32, element::f32},
+                {element::i8, element::i8},
+            }));
+}
+
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp
index 70ee298b547e5e..c89decb3f42121 100644
--- a/src/core/src/pass/visualize_tree.cpp
+++ b/src/core/src/pass/visualize_tree.cpp
@@ -503,7 +503,9 @@ string pass::VisualizeTree::get_node_name(shared_ptr<Node> node) {
     if (node->get_friendly_name() != node->get_name()) {
         rc += "\\n" + (nvtmn ? string("name: ") : "") + node->get_name();
     }
-    rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(node->get_type_name());
+    const auto type_info = node->get_type_info();
+    rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(type_info.version_id) +
+          "::" + std::string(type_info.name);
 
     static const bool nvttn = getenv_bool("OV_VISUALIZE_TREE_TENSORS_NAME");
     if (nvttn) {
diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
index 8423a9bec9d611..8c2e666d6b6438 100644
--- a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
+++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
@@ -26,8 +26,14 @@
 using namespace std;
 using namespace ngraph::snippets;
 
-#define CREATE_EMITTER(e_type) [this](const std::shared_ptr<ngraph::Node>& n) \
-    -> std::shared_ptr<ngraph::snippets::Emitter> {return std::make_shared<e_type>(h.get(), isa, n);};
+#define CREATE_EMITTER(e_type) { \
+    [this](const std::shared_ptr<ngraph::Node>& n) -> std::shared_ptr<ngraph::snippets::Emitter> { \
+        return std::make_shared<e_type>(h.get(), isa, n); \
+    }, \
+    [](const std::shared_ptr<ngraph::Node>& n) -> std::set<std::vector<element::Type>> { \
+        return e_type::get_supported_precisions(n); \
+    } \
+};
 
 class jit_snippet : public dnnl::impl::cpu::x64::jit_generator {
 public:
diff --git a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp
index 501cd934753b10..416218b92a3bb6 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp
@@ -13,6 +13,10 @@ using namespace Xbyak;
 namespace ov {
 namespace intel_cpu {
 
+std::set<std::vector<element::Type>> jit_dnnl_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
 jit_dnnl_emitter::jit_dnnl_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, InferenceEngine::Precision exec_prc)
     : jit_emitter(host, host_isa, node, exec_prc) {
 
diff --git a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp
index b9ea5ffd2339da..0b7165d2484580 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp
@@ -20,6 +20,8 @@ class jit_dnnl_emitter : public jit_emitter {
 
     void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs) const override {};
 
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+
 protected:
     jit_dnnl_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                        dnnl_alg_kind_t algKind, float inpAlpha, float inpBeta,
diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp
index d222f8345511dc..150d524ac04ce7 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "jit_eltwise_emitters.hpp"
+#include "ie_ngraph_utils.hpp"
 
 using namespace InferenceEngine;
 using namespace dnnl::impl::utils;
@@ -16,9 +17,26 @@ using namespace Xbyak;
 namespace ov {
 namespace intel_cpu {
 
+namespace {
+InferenceEngine::Precision get_arithmetic_binary_exec_precision(const std::shared_ptr<ov::Node>& n) {
+    std::vector<InferenceEngine::Precision> input_precisions;
+    for (const auto& input : n->inputs()) {
+        input_precisions.push_back(
+            InferenceEngine::details::convertPrecision(input.get_source_output().get_element_type()));
+    }
+
+    assert(std::all_of(
+        input_precisions.begin(),
+        input_precisions.end(),
+        [&input_precisions](const InferenceEngine::Precision& precision) {return precision == input_precisions[0]; }));
+
+    return input_precisions[0];
+}
+} // namespace
+
 /// ADD ///
-jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -59,13 +77,13 @@ void jit_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
     }
 }
 
-std::set<Precision> jit_add_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_add_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// MUL_ADD ///
-jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -150,13 +168,13 @@ size_t jit_mul_add_emitter::aux_vecs_count() const {
     return 1;
 }
 
-std::set<Precision> jit_mul_add_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_mul_add_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32, element::f32}, {element::i32, element::i32, element::i32}};
 }
 
 /// SUB ///
-jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -197,13 +215,13 @@ void jit_subtract_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
     }
 }
 
-std::set<Precision> jit_subtract_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_subtract_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// MULTIPLY ///
-jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -244,13 +262,13 @@ void jit_multiply_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
     }
 }
 
-std::set<Precision> jit_multiply_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_multiply_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// DIVIDE ///
 jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -305,8 +323,8 @@ void jit_divide_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
     }
 }
 
-std::set<Precision> jit_divide_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_divide_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 size_t jit_divide_emitter::aux_vecs_count() const {
@@ -321,7 +339,11 @@ jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t ho
 
 size_t jit_floor_emitter::get_inputs_num() const { return 1; }
 
-void jit_floor_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_floor_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_floor_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -344,11 +366,15 @@ void jit_floor_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const s
 /// CEILING ///
 jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, Precision exec_prc)
+jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
     : jit_emitter(host, host_isa, exec_prc) {}
 
 size_t jit_ceiling_emitter::get_inputs_num() const { return 1; }
 
+std::set<std::vector<element::Type>> jit_ceiling_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
 void jit_ceiling_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
                                     const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
@@ -378,7 +404,11 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_
 
 size_t jit_floor_mod_emitter::get_inputs_num() const { return 2; }
 
-void jit_floor_mod_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_floor_mod_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -428,7 +458,11 @@ jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_i
 
 size_t jit_mod_emitter::get_inputs_num() const { return 2; }
 
-void jit_mod_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_mod_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_mod_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -471,8 +505,8 @@ size_t jit_mod_emitter::aux_vecs_count() const {
 }
 
 /// MAXIMUM ///
-jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -514,13 +548,13 @@ void jit_maximum_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
     }
 }
 
-std::set<Precision> jit_maximum_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_maximum_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// MINIMUM ///
-jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& node)
+: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}
 jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc)
 : jit_emitter(host, host_isa, exec_prc) {}
 
@@ -562,8 +596,8 @@ void jit_minimum_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
     }
 }
 
-std::set<Precision> jit_minimum_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_minimum_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// SQUARED_DIFFERENCE ///
@@ -617,8 +651,8 @@ void jit_squared_difference_emitter::emit_isa(const std::vector<size_t> &in_vec_
     }
 }
 
-std::set<Precision> jit_squared_difference_emitter::get_supported_precisions() {
-    return {Precision::FP32, Precision::I32};
+std::set<std::vector<element::Type>> jit_squared_difference_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}, {element::i32, element::i32}};
 }
 
 /// POWER_DYNAMIC ///
@@ -630,7 +664,11 @@ jit_power_dynamic_emitter::jit_power_dynamic_emitter(x64::jit_generator *host, x
 
 size_t jit_power_dynamic_emitter::get_inputs_num() const { return 2; }
 
-void jit_power_dynamic_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_power_dynamic_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_power_dynamic_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -741,7 +779,11 @@ jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t ho
 
 size_t jit_equal_emitter::get_inputs_num() const { return 2; }
 
-void jit_equal_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -800,7 +842,11 @@ jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_
 
 size_t jit_not_equal_emitter::get_inputs_num() const { return 2; }
 
-void jit_not_equal_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_not_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_not_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -859,7 +905,11 @@ jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_
 
 size_t jit_greater_emitter::get_inputs_num() const { return 2; }
 
-void jit_greater_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_greater_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_greater_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -919,7 +969,11 @@ jit_greater_equal_emitter::jit_greater_equal_emitter(x64::jit_generator *host, x
 
 size_t jit_greater_equal_emitter::get_inputs_num() const { return 2; }
 
-void jit_greater_equal_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_greater_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_greater_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -978,7 +1032,11 @@ jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host
 
 size_t jit_less_emitter::get_inputs_num() const { return 2; }
 
-void jit_less_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_less_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_less_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1037,7 +1095,11 @@ jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cp
 
 size_t jit_less_equal_emitter::get_inputs_num() const { return 2; }
 
-void jit_less_equal_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_less_equal_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_less_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1097,7 +1159,11 @@ jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64::
 
 size_t jit_logical_and_emitter::get_inputs_num() const { return 2; }
 
-void jit_logical_and_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_logical_and_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_logical_and_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1177,7 +1243,11 @@ jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cp
 
 size_t jit_logical_or_emitter::get_inputs_num() const { return 2; }
 
-void jit_logical_or_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_logical_or_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1256,7 +1326,11 @@ jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64::
 
 size_t jit_logical_xor_emitter::get_inputs_num() const { return 2; }
 
-void jit_logical_xor_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_logical_xor_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_logical_xor_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1335,7 +1409,11 @@ jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64::
 
 size_t jit_logical_not_emitter::get_inputs_num() const { return 1; }
 
-void jit_logical_not_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_logical_not_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_logical_not_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1405,7 +1483,11 @@ jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64
 
 size_t jit_power_static_emitter::get_inputs_num() const { return 1; }
 
-void jit_power_static_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_power_static_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_power_static_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1579,7 +1661,11 @@ jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t ho
 }
 size_t jit_prelu_emitter::get_inputs_num() const { return 2; }
 
-void jit_prelu_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_prelu_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32}};
+}
+
+void jit_prelu_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1634,7 +1720,11 @@ jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host
 
 size_t jit_sqrt_emitter::get_inputs_num() const { return 1; }
 
-void jit_sqrt_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_sqrt_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_sqrt_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1661,7 +1751,11 @@ jit_negative_emitter::jit_negative_emitter(x64::jit_generator *host, x64::cpu_is
 
 size_t jit_negative_emitter::get_inputs_num() const { return 1; }
 
-void jit_negative_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_negative_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_negative_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -1695,6 +1789,10 @@ jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_i
 
 size_t jit_erf_emitter::get_inputs_num() const { return 1; }
 
+std::set<std::vector<element::Type>> jit_erf_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
 void jit_erf_emitter::emit_impl(
     const std::vector<size_t> &in_vec_idxs,
     const std::vector<size_t> &out_vec_idxs) const {
@@ -1875,7 +1973,11 @@ jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_
 
 size_t jit_soft_sign_emitter::get_inputs_num() const { return 1; }
 
-void jit_soft_sign_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+std::set<std::vector<element::Type>> jit_soft_sign_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32}};
+}
+
+void jit_soft_sign_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
     if (host_isa_ == x64::sse41) {
         emit_isa<x64::sse41>(in_vec_idxs, out_vec_idxs);
     } else if (host_isa_ == x64::avx2) {
@@ -2086,6 +2188,10 @@ jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t
 
 size_t jit_select_emitter::get_inputs_num() const { return 3; }
 
+std::set<std::vector<element::Type>> jit_select_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {{element::f32, element::f32, element::f32}};
+}
+
 size_t jit_select_emitter::aux_vecs_count() const {
     if (host_isa_ == x64::avx512_core)
         return 0;
diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp
index 138ba513eda71a..5c00e4584b4274 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp
@@ -13,11 +13,10 @@ class jit_add_emitter : public jit_emitter {
 public:
     jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -30,11 +29,10 @@ class jit_mul_add_emitter : public jit_emitter {
 public:
     jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -50,11 +48,10 @@ class jit_subtract_emitter : public jit_emitter {
 public:
     jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -68,11 +65,10 @@ class jit_multiply_emitter : public jit_emitter {
 public:
     jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -90,7 +86,7 @@ class jit_divide_emitter : public jit_emitter {
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -108,6 +104,7 @@ class jit_floor_emitter : public jit_emitter {
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -124,6 +121,7 @@ class jit_ceiling_emitter : public jit_emitter {
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -140,6 +138,7 @@ class jit_floor_mod_emitter : public jit_emitter {
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -158,6 +157,7 @@ class jit_mod_emitter : public jit_emitter {
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -172,11 +172,10 @@ class jit_maximum_emitter : public jit_emitter {
 public:
     jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -190,11 +189,10 @@ class jit_minimum_emitter : public jit_emitter {
 public:
     jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
-    jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
-                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -213,7 +211,7 @@ class jit_squared_difference_emitter : public jit_emitter {
                                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -231,6 +229,7 @@ class jit_power_dynamic_emitter : public jit_emitter {
                               InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -248,6 +247,7 @@ class jit_equal_emitter : public jit_emitter {
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -268,6 +268,7 @@ class jit_not_equal_emitter : public jit_emitter {
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -288,6 +289,7 @@ class jit_greater_emitter : public jit_emitter {
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -308,6 +310,7 @@ class jit_greater_equal_emitter : public jit_emitter {
                               InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -328,6 +331,7 @@ class jit_less_emitter : public jit_emitter {
                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -349,6 +353,7 @@ class jit_less_equal_emitter : public jit_emitter {
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -369,6 +374,7 @@ class jit_logical_and_emitter : public jit_emitter {
                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -389,6 +395,7 @@ class jit_logical_or_emitter : public jit_emitter {
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -409,6 +416,7 @@ class jit_logical_xor_emitter : public jit_emitter {
                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -428,6 +436,7 @@ class jit_logical_not_emitter : public jit_emitter {
                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -448,6 +457,8 @@ class jit_power_static_emitter : public jit_emitter {
                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
+
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -471,6 +482,7 @@ class jit_prelu_emitter : public jit_emitter {
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -489,6 +501,7 @@ class jit_sqrt_emitter : public jit_emitter {
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -503,6 +516,7 @@ class jit_negative_emitter : public jit_emitter {
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const override;
@@ -520,6 +534,7 @@ class jit_erf_emitter : public jit_emitter {
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(
@@ -541,6 +556,7 @@ class jit_soft_sign_emitter : public jit_emitter {
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
@@ -563,6 +579,9 @@ class jit_is_finite_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; };
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32}};
+    }
 
 protected:
     size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
@@ -588,6 +607,9 @@ class jit_is_inf_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; };
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32}};
+    }
 
 protected:
     size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
@@ -615,6 +637,9 @@ class jit_is_nan_emitter : public jit_emitter {
     }
 
     size_t get_inputs_num() const override { return 1; }
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32}};
+    }
 
 protected:
     size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
@@ -635,6 +660,7 @@ class jit_select_emitter : public jit_emitter {
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
 
     size_t get_inputs_num() const override;
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
     size_t aux_vecs_count() const override;
 
 private:
diff --git a/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp
index 3bbd03935563f0..7d9ab0d0994315 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "jit_emitter.hpp"
-#include "utils/general_utils.h"
 #include <vector>
+#include "utils/general_utils.h"
 
 using namespace dnnl::impl::cpu;
 using namespace dnnl::impl;
@@ -55,8 +55,8 @@ size_t jit_emitter::aux_gprs_count() const {
     return entry_map_.empty() ? 0 : 1;
 }
 
-std::set<InferenceEngine::Precision> jit_emitter::get_supported_precisions() {
-    return {InferenceEngine::Precision::FP32};
+std::set<std::vector<element::Type>> jit_emitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    return {};
 }
 
 void jit_emitter::emitter_preamble(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
diff --git a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
index be548c614e0aa2..eb3309de32d8c5 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
@@ -49,7 +49,13 @@ class jit_emitter : public ngraph::snippets::Emitter {
     virtual size_t get_inputs_num() const = 0;
     virtual size_t aux_vecs_count() const;
     emitter_in_out_map get_in_out_type() const;
-    static std::set<InferenceEngine::Precision> get_supported_precisions();
+
+    /**
+     * @brief Returns supported precisions.
+     * Precisions are ordered, the first bigger bitness precision with the same type will be selected.
+     * Empty collection means the emitter supports any input precisions.
+     */
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 protected:
     virtual size_t aux_gprs_count() const;
diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
index af583e804b157f..4f63dd641f6295 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
@@ -479,7 +479,20 @@ void BroadcastMoveEmitter::emit_isa(const std::vector<size_t> &in, const std::ve
 
 ScalarEmitter::ScalarEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa,
                              const std::shared_ptr<ov::Node>& n) : jit_emitter(h, isa, n) {
-    value = dnnl::impl::cpu::x64::float2int(ov::as_type_ptr<ngraph::snippets::op::Scalar>(n)->cast_vector<float>()[0]);
+    const auto precision = n->get_output_element_type(0);
+    switch (precision) {
+        case element::i32: {
+            value = ov::as_type_ptr<ov::op::v0::Constant>(n)->cast_vector<int32_t>()[0];
+            break;
+        }
+        case element::f32: {
+            value = dnnl::impl::cpu::x64::float2int(ov::as_type_ptr<ov::op::v0::Constant>(n)->cast_vector<float>()[0]);
+            break;
+        }
+        default: {
+            IE_THROW() << "Scalar emitter doesn't support " << precision;
+        }
+    }
     push_arg_entry_of("scalar", value, true);
     prepare_table();
 }
diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
index caeab227ad4b44..cae08b3fe43ac8 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
@@ -322,6 +322,9 @@ class BrgemmEmitter : public jit_emitter {
     BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override {return 2;}
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32, element::f32}};
+    }
 
 private:
     void emit_impl(const std::vector<size_t>& in,
@@ -369,6 +372,9 @@ class HorizonMaxEmitter : public jit_emitter {
     HorizonMaxEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override {return 1;}
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32}};
+    }
 
 protected:
     size_t aux_gprs_count() const override {return 1;}
@@ -387,6 +393,9 @@ class HorizonSumEmitter : public jit_emitter {
     HorizonSumEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n);
 
     size_t get_inputs_num() const override {return 1;}
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::f32}};
+    }
 
 protected:
     size_t aux_gprs_count() const override {return 1;}
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index 4ef400ae601a2f..5bc46c00b40b7e 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -9,6 +9,7 @@
 
 #include "cpu_types.h"
 #include "utils/bfloat16.hpp"
+#include "ie_ngraph_utils.hpp"
 #include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
 #include <cpu/ref_eltwise.hpp>
 
@@ -58,7 +59,7 @@ namespace {
 
 template<typename T>
 struct SupportedPrecisions {
-    void operator()(std::set<Precision> &precisions) {
+    void operator()(std::set<std::vector<element::Type>> &precisions) {
         precisions = T::get_supported_precisions();
     }
 };
@@ -105,7 +106,7 @@ struct EltwiseEmitter<jit_is_inf_emitter> {
 /**
  * Implements Eltwise shape inference algorithm. The algorithm is based on broadcasting all the input shapes
  * according to the NUMPY broadcast rule. This implementation is more lightweight than the ngraph one.
- * 
+ *
  */
 class EltwiseShapeInfer : public ShapeInferEmptyPads {
 public:
@@ -176,10 +177,31 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
     void generate() override {
         Precision exec_prc = Precision::UNSPECIFIED;
 
-        std::set<Precision> supported_precision_intersection = get_supported_precisions(eltwise_data_.front().algo);
+        std::set<std::vector<element::Type>> supported_precision_intersection = get_supported_precisions(eltwise_data_.front().algo);
+
+        // for element-wise operations all inputs must to have the same precisions
+        assert(std::all_of(
+            supported_precision_intersection.begin(),
+            supported_precision_intersection.end(),
+            [&supported_precision_intersection](const std::vector<element::Type>& precisions) {
+                return std::all_of(
+                    precisions.begin(),
+                    precisions.end(),
+                    [&precisions](const element::Type precision) { return precision == precisions[0]; });
+            }));
+
         for (size_t i = 1; i < eltwise_data_.size(); ++i) {
-            std::set<Precision> prcs = get_supported_precisions(eltwise_data_[i].algo);
-            std::set<Precision> prcs_intersect = {};
+            std::set<std::vector<element::Type>> prcs = get_supported_precisions(eltwise_data_[i].algo);
+            std::set<std::vector<element::Type>> prcs_intersect = {};
+
+            // to support previous functionality
+            if (!std::all_of(
+                prcs.begin(),
+                prcs.end(),
+                [&supported_precision_intersection](const std::vector<element::Type>& types) {
+                    return types.size() == supported_precision_intersection.size(); })) {
+                continue;
+            }
 
             std::set_intersection(supported_precision_intersection.begin(), supported_precision_intersection.end(),
                                   prcs.begin(), prcs.end(), std::inserter(prcs_intersect, prcs_intersect.begin()));
@@ -187,19 +209,22 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
             supported_precision_intersection = prcs_intersect;
         }
 
-        static const Precision exec_precisions_priority[] = {
-                Precision::U8,
-                Precision::I8,
-                Precision::U16,
-                Precision::I16,
-                Precision::BF16,
-                Precision::I32,
-                Precision::FP32
+        static const element::Type exec_precisions_priority[] = {
+                element::u8,
+                element::i8,
+                element::u16,
+                element::i16,
+                element::bf16,
+                element::i32,
+                element::f32
         };
 
-        for (auto prc : exec_precisions_priority) {
-            if (std::find(supported_precision_intersection.begin(), supported_precision_intersection.end(), prc) != supported_precision_intersection.end()) {
-                exec_prc = prc;
+        for (const auto prc : exec_precisions_priority) {
+            if (std::any_of(
+                supported_precision_intersection.begin(),
+                supported_precision_intersection.end(),
+                [&prc](const std::vector<element::Type>& precisions) { return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) {
+                exec_prc = InferenceEngine::details::convertPrecision(prc);
                 break;
             }
         }
@@ -482,8 +507,8 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
     const std::vector<ov::intel_cpu::Type>& ops_list_;
     const dnnl::post_ops& post_ops_;
 
-    std::set<Precision> get_supported_precisions(Algorithm algo) {
-        std::set<Precision> precisions;
+    std::set<std::vector<element::Type>> get_supported_precisions(Algorithm algo) {
+        std::set<std::vector<element::Type>> precisions;
 
         OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo,
         OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter),
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
index d11fc50d33edfe..8eb425e7ec4921 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -25,6 +25,7 @@
 #include "utils/cpu_utils.hpp"
 #include "snippets_transformations/fuse_load_store_and_convert.hpp"
 #include "snippets_transformations/mul_add_to_fma.hpp"
+#include "snippets_transformations/remove_converts.hpp"
 #include "ngraph_transformations/convert_to_swish_cpu.hpp"
 
 using namespace InferenceEngine;
@@ -39,7 +40,7 @@ namespace node {
 namespace {
 
 /* This class implementation is a temporal WA
-   TODO: revise the implementation to remove the node reference*/    
+   TODO: revise the implementation to remove the node reference*/
 class SnippetShapeInfer : public ShapeInferEmptyPads {
 public:
     SnippetShapeInfer(Snippet* node) : m_node(node) {}
@@ -531,28 +532,36 @@ bool Snippet::created() const {
 }
 
 void Snippet::generate(const jit_snippets_compile_args* jcp) {
-    ov::pass::Manager optManager;
-    optManager.register_pass<ov::intel_cpu::pass::FuseLoadConvert>();
-    optManager.register_pass<ov::intel_cpu::pass::FuseStoreConvert>();
-    optManager.register_pass<ConvertToSwishCPU>();
-    optManager.register_pass<ov::intel_cpu::pass::MulAddToFMA>();
+    ov::pass::Manager pre_dialect;
+    pre_dialect.register_pass<ConvertToSwishCPU>();
 
+    ov::pass::Manager post_dialect;
+
+    ov::pass::Manager post_precision;
+    post_precision.register_pass<ov::intel_cpu::pass::RemoveConverts>();
+    post_precision.register_pass<ov::intel_cpu::pass::FuseLoadConvert>();
+    post_precision.register_pass<ov::intel_cpu::pass::FuseStoreConvert>();
     // LoadConvert uses Load emitter that support conversion from any type to only f32
-    optManager.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseLoadConvert>(
+    post_precision.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseLoadConvert>(
             [](const std::shared_ptr<const ov::Node>& n) -> bool {
                 if (const auto& convert = std::dynamic_pointer_cast<const ov::op::v0::Convert>(n))
                     return convert->get_destination_type() != ov::element::f32;
                 return true;
             });
-
     // StoreConvert uses Store emitter that support conversion from only f32 to any types
-    optManager.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseStoreConvert>(
+    post_precision.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseStoreConvert>(
             [](const std::shared_ptr<const ov::Node>& n) -> bool {
                 if (const auto& convert = std::dynamic_pointer_cast<const ov::op::v0::Convert>(n))
                     return convert->get_input_element_type(0) != ov::element::f32;
                 return true;
             });
-    schedule = snippet->generate(optManager, reinterpret_cast<const void*>(jcp));
+    post_precision.register_pass<ov::intel_cpu::pass::MulAddToFMA>();
+
+    schedule = snippet->generate(
+        pre_dialect,
+        post_dialect,
+        post_precision,
+        reinterpret_cast<const void*>(jcp));
 }
 
 void Snippet::update_ptrs(jit_snippets_call_args& call_args) {
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp
new file mode 100644
index 00000000000000..238fadaa47e897
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "remove_converts.hpp"
+
+#include "snippets/itt.hpp"
+#include "ngraph/opsets/opset1.hpp"
+#include "ngraph/rt_info.hpp"
+#include "ngraph/pattern/op/wrap_type.hpp"
+
+#include "snippets/op/convert_saturation.hpp"
+
+ov::intel_cpu::pass::RemoveConverts::RemoveConverts() {
+    MATCHER_SCOPE(RemoveConverts);
+    auto parent_convert_wrap = ngraph::pattern::wrap_type<ngraph::snippets::op::ConvertSaturation>();
+    auto child_convert_wrap = ngraph::pattern::wrap_type<ngraph::snippets::op::ConvertSaturation>({ parent_convert_wrap });
+
+    auto callback = [=](ngraph::pattern::Matcher& m) {
+        OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::RemoveConverts")
+        const auto& pm = m.get_pattern_value_map();
+        const auto parent_convert = pm.at(parent_convert_wrap).get_node_shared_ptr();
+        const auto child_convert = pm.at(child_convert_wrap).get_node_shared_ptr();
+        if (
+            (parent_convert->get_input_element_type(0) != element::f32) ||
+            (parent_convert->get_output_target_inputs(0).size() != 1ull) ||
+            (parent_convert->get_output_element_type(0) != element::bf16) ||
+            (child_convert->get_output_element_type(0) != element::f32)) {
+            return false;
+        }
+
+        replace_output_update_name(child_convert->output(0), parent_convert->get_input_source_output(0));
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(child_convert_wrap, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp
new file mode 100644
index 00000000000000..b1fc6d4503d606
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/pass/graph_rewrite.hpp"
+#include "ngraph/pattern/matcher.hpp"
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+
+/**
+ * @interface RemoveConverts
+ * @brief Remove sequence of two ConvertSaturation operations for specific precisions: FP32 => BF16 => FP32
+ * @ingroup snippets
+ */
+class RemoveConverts : public ngraph::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("RemoveConverts", "0");
+    RemoveConverts();
+};
+
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp
new file mode 100644
index 00000000000000..9469bc9607141a
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/check_broadcast.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+
+namespace {
+
+const std::vector<ov::element::Type> input_types = {
+    // TODO: 105804
+    //ov::element::i32,
+    ov::element::f32
+};
+
+const std::vector<CheckBroadcastTestCaseParams> test_cases = {
+    // broadcast is neccessary
+    {
+        {{1, 3, 4, 4}, {4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, -1),
+        1,
+        0
+    },
+    {
+        {{1, 3, 4, 4}, {4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, 2),
+        1,
+        0
+    },
+
+    // broadcast is not neccessary
+    {
+        {{1, 3, 4, 4}, {1, 3, 4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, -1),
+        1,
+        1
+    },
+    {
+        {{1, 3, 4, 4}, {1, 3, 4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, 0),
+        1,
+        1
+    },
+
+    // any other PDPD
+    {
+        {{1, 3, 4, 4}, {4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, -1),
+        1,
+        1
+    },
+    {
+        {{1, 3, 4, 4}, {4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, 0),
+        1,
+        1
+    },
+    {
+        {{1, 3, 4, 4}, {4, 4}},
+        ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, 2),
+        1,
+        1
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_CheckBroadcast, CheckBroadcast,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(input_types),
+                                 ::testing::ValuesIn(test_cases),
+                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                         CheckBroadcast::getTestCaseName);
+
+} // namespace
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp
new file mode 100644
index 00000000000000..5c93badbd3c9e9
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/precision_propagation_convertion.hpp"
+#include <gtest/gtest.h>
+#include <ngraph/ngraph.hpp>
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+
+namespace {
+
+const std::vector<std::vector<ov::PartialShape>> input_shapes = {
+    {{ 1, 3, 16, 16 }, { 1, 1, 1, 16 }},
+};
+
+const std::vector<std::vector<float>> fake_quantize_intervals = {
+    {0.f, 2.55f, 0.f, 2.55f},
+    {-1.28f, 1.27f, -1.28f, 1.27f}
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_PrecisionPropagation_Convertion, PrecisionPropagationConvertion,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(input_shapes),
+                                 ::testing::ValuesIn(fake_quantize_intervals),
+                                 ::testing::Values(1),
+                                 ::testing::Values(1),
+                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                         PrecisionPropagationConvertion::getTestCaseName);
+
+} // namespace
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp
index 0fcaaceadd70ab..5431cbb2626a55 100644
--- a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp
+++ b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp
@@ -155,7 +155,7 @@ class MulAddToFMATests : public LoweringTests, public testing::WithParamInterfac
 };
 
 TEST_P(MulAddToFMATests, MulAddToFMATests) {
-    auto subgraph = getLoweredSubgraph(snippets_function->getOriginal(), master_shape, cpu_manager, generator);
+    auto subgraph = getLoweredSubgraph(snippets_function->getOriginal(), master_shape, {}, {}, cpu_manager, generator);
     model = subgraph->body_ptr();
     model_ref = snippets_function->getLowered();
 }
diff --git a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp
new file mode 100644
index 00000000000000..1c33792cd328ec
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/snippets_test_utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+class CheckBroadcastTestCaseParams {
+public:
+    std::pair<ov::PartialShape, ov::PartialShape> input_shapes;
+    ov::op::AutoBroadcastSpec broadcast;
+    size_t num_nodes;
+    size_t num_subgraphs;
+};
+
+typedef std::tuple <
+    ov::element::Type,            // input types
+    CheckBroadcastTestCaseParams, // test case details
+    std::string                   // target device
+> CheckBroadcastParams;
+
+class CheckBroadcast : public testing::WithParamInterface<CheckBroadcastParams>,
+                virtual public ov::test::SnippetsTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<CheckBroadcastParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp
new file mode 100644
index 00000000000000..3ab24d7cf299f3
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/snippets_test_utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+typedef std::tuple<
+        std::vector<ov::PartialShape>,   // Input shapes
+        std::vector<float>,              // FakeQuantize intervals
+        size_t,                          // Expected num nodes
+        size_t,                          // Expected num subgraphs
+        std::string                      // Target Device
+> PrecisionPropagationParams;
+
+class PrecisionPropagationConvertion :
+    public testing::WithParamInterface<PrecisionPropagationParams>,
+    virtual public ov::test::SnippetsTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<PrecisionPropagationParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp
index 0dc3d899f7988a..8c4109c439365d 100644
--- a/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp
+++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp
@@ -20,7 +20,7 @@ std::string FuseFakeQuantizeTransformation::getTestCaseName(const testing::TestP
     std::tie(targetDevice, testValues) = obj.param;
 
     std::ostringstream result;
-    result << targetDevice << "_" <<
+    result << "targetDevice=" << targetDevice << "_" <<
         testValues.actual.precisionBeforeAdd << "_" <<
         testValues.actual.add.values.size() << "_" <<
         testValues.actual.add.outPrecision << "_" <<
diff --git a/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp b/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp
new file mode 100644
index 00000000000000..3730771a1a44d5
--- /dev/null
+++ b/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp
@@ -0,0 +1,89 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/check_broadcast.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "subgraph_converts.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+class CheckBroadcastFunction {
+public:
+    static std::shared_ptr<ov::Model> get(
+        const PartialShape& input_shape1,
+        const PartialShape& input_shape2,
+        const ov::element::Type input_type,
+        const ov::op::AutoBroadcastSpec broadcast) {
+        const auto parameter1 = std::make_shared<ngraph::opset1::Parameter>(input_type, input_shape1);
+        parameter1->set_friendly_name("parameter1");
+
+        const auto parameter2 = std::make_shared<ngraph::opset1::Parameter>(input_type, input_shape2);
+        parameter2->set_friendly_name("parameter2");
+
+        std::shared_ptr<Node> parent = std::make_shared<ngraph::opset1::Multiply>(
+            parameter1,
+            parameter2,
+            broadcast);
+        parent->set_friendly_name("multiply");
+
+        const auto result = std::make_shared<ngraph::opset1::Result>(parent);
+        result->set_friendly_name("result");
+
+        return std::make_shared<ngraph::Function>(
+            ngraph::ResultVector{ result },
+            ngraph::ParameterVector{ parameter1, parameter2 },
+            "CheckBroadcastFunction");
+    }
+};
+
+std::string CheckBroadcast::getTestCaseName(testing::TestParamInfo<CheckBroadcastParams> obj) {
+    ov::element::Type input_type;
+    CheckBroadcastTestCaseParams test_case_params;
+    std::string target_device;
+
+    std::tie(input_type, test_case_params, target_device) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << test_case_params.input_shapes.first.get_shape() << "_" <<
+        test_case_params.input_shapes.second.get_shape() << "_";
+    result << "IT=" << input_type << "_";
+    result << "BCT=" << test_case_params.broadcast.m_type << "_";
+    result << "BCA=" << test_case_params.broadcast.m_axis << "_";
+    result << "#N=" << test_case_params.num_nodes << "_";
+    result << "#S=" << test_case_params.num_subgraphs << "_";
+    result << "targetDevice=" << target_device;
+    return result.str();
+}
+
+void CheckBroadcast::SetUp() {
+    ov::element::Type input_type;
+    CheckBroadcastTestCaseParams test_case_params;
+
+    std::tie(input_type, test_case_params, targetDevice) = this->GetParam();
+    ref_num_nodes = test_case_params.num_nodes;
+    ref_num_subgraphs = test_case_params.num_subgraphs;
+
+    init_input_shapes(static_partial_shapes_to_test_representation({
+        test_case_params.input_shapes.first,
+        test_case_params.input_shapes.second}));
+
+    function = CheckBroadcastFunction::get(
+        test_case_params.input_shapes.first,
+        test_case_params.input_shapes.second,
+        input_type,
+        test_case_params.broadcast);
+}
+
+TEST_P(CheckBroadcast, CompareWithRefImpl) {
+    run();
+    validateNumSubgraphs();
+}
+
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/tests/functional/plugin/shared/src/snippets/convert.cpp b/src/tests/functional/plugin/shared/src/snippets/convert.cpp
index 60419d28b2f96f..95749f32da1272 100644
--- a/src/tests/functional/plugin/shared/src/snippets/convert.cpp
+++ b/src/tests/functional/plugin/shared/src/snippets/convert.cpp
@@ -106,8 +106,8 @@ parameters ConvertInput::generate_params_random() const {
                 break;
             case ov::element::i32:
             case ov::element::i8:
-                startFrom = -10;
-                range = 20;
+                startFrom = -32;
+                range = 64;
                 break;
             case ov::element::u8:
                 startFrom = 10;
diff --git a/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp b/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp
new file mode 100644
index 00000000000000..570fa4b44dac70
--- /dev/null
+++ b/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/precision_propagation_convertion.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "precision_propagation_convertion_function.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+std::string PrecisionPropagationConvertion::getTestCaseName(testing::TestParamInfo<PrecisionPropagationParams> obj) {
+    std::vector<ov::PartialShape> input_shapes;
+    std::vector<float> fake_quantize_intervals;
+    std::string targetDevice;
+    size_t num_nodes, num_subgraphs;
+    std::tie(input_shapes, fake_quantize_intervals, num_nodes, num_subgraphs, targetDevice) = obj.param;
+
+    std::ostringstream result;
+    for (size_t i = 0; i < input_shapes.size(); ++i)
+        result << "IS[" << i << "]=" << input_shapes[i] << "_";
+    for (size_t i = 0; i < fake_quantize_intervals.size(); ++i)
+        result << "FQ[" << i << "]=" << fake_quantize_intervals[i] << "_";
+    result << "#N=" << num_nodes << "_";
+    result << "#S=" << num_subgraphs << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void PrecisionPropagationConvertion::SetUp() {
+    std::vector<ov::PartialShape> input_shapes;
+    std::vector<float> fake_quantize_intervals;
+    std::tie(input_shapes, fake_quantize_intervals, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
+
+    function = PrecisionPropagationConvertionFunction(input_shapes, ov::element::f32, fake_quantize_intervals).getOriginal();
+}
+
+TEST_P(PrecisionPropagationConvertion, CompareWithRefImpl) {
+    run();
+    validateNumSubgraphs();
+}
+
+} // namespace snippets
+} // namespace test
+} // namespace ov
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp
new file mode 100644
index 00000000000000..554d7b08fc5134
--- /dev/null
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include "openvino/core/model.hpp"
+#include "snippets_helpers.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+/**
+ * @class PrecisionPropagationConvertionFunction
+ * @brief PrecisionPropagationConvertionFunction instance returns reference and original functions.
+ *
+ * Input arguments are used to create function in getOriginal methods only.
+ * Dont use getReference and getLowered method, they are not implemented and throw std::runtime_error exception.
+ * Note, ov::element::Type_t precision base type input argument is not used.
+ */
+class PrecisionPropagationConvertionFunction : public SnippetsFunctionBase {
+public:
+    PrecisionPropagationConvertionFunction(
+        const std::vector<ov::PartialShape>& input_shapes,
+        const element::Type input_type,
+        const std::vector<float>& fake_quantize_intervals);
+
+    /*
+     * Don't call this method explicity. You should create the instance of PrecisionPropagationConvertionFunction before.
+     * After the method will be called implicitly in getOriginal.
+     * Note, please, getReference and getLowered methods are not implemented and throw exception.
+     */
+    static std::shared_ptr<ov::Model> get(
+        const std::vector<ov::PartialShape>& input_shapes,
+        const element::Type input_type,
+        const std::vector<float>& fake_quantize_intervals);
+
+protected:
+    std::shared_ptr<Model> initOriginal() const override;
+
+private:
+    const std::vector<float> fake_quantize_intervals;
+};
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp
new file mode 100644
index 00000000000000..b32099cf3020de
--- /dev/null
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/ngraph.hpp>
+#include "ngraph/opsets/opset1.hpp"
+#include "snippets/op/convert_saturation.hpp"
+#include "snippets_helpers.hpp"
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+/**
+ * @class DummyAdd
+ * @brief DummyAdd operation has custom validate_and_infer_types method implementation.
+ */
+class DummyAdd : public ngraph::opset1::Add {
+public:
+    OPENVINO_OP("DummyAdd", "test::snippets");
+
+    DummyAdd(const Output<Node>& arg0,
+        const Output<Node>& arg1,
+        const ngraph::op::AutoBroadcastSpec& auto_broadcast =
+        ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY))
+        : ngraph::opset1::Add(arg0, arg1, auto_broadcast) {
+        constructor_validate_and_infer_types();
+    }
+
+    DummyAdd(const ngraph::opset1::Add& add)
+        : Add(add.get_input_source_output(0), add.get_input_source_output(1), add.get_autob()) {
+        constructor_validate_and_infer_types();
+    }
+
+    DummyAdd() = default;
+
+    void validate_and_infer_types() override {
+        const auto input_type1 = get_input_element_type(0);
+        const auto input_type2 = get_input_element_type(1);
+
+        const element::Type output_type = (input_type1 == element::i8) || (input_type2 == element::i8) ?
+            element::i32 :
+            get_input_element_type(0);
+
+        set_output_type(0, output_type, get_input_partial_shape(0));
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        return std::make_shared<DummyAdd>(new_args.at(0), new_args.at(1), this->get_autob());
+    }
+};
+
+class PrecisionPropagationAddFunctionParams {
+public:
+    class Actual {
+    public:
+        std::pair<element::Type, element::Type> convertion_before_op1;
+        element::Type convertion_before_op2_1;
+        std::pair<element::Type, element::Type> convertion_before_op2_2;
+    };
+
+    class Expected {
+    public:
+        std::pair<element::Type, element::Type> convertion_before_op1;
+        element::Type convertion_before_op2_1;
+        std::pair<element::Type, element::Type> convertion_before_op2_2;
+        element::Type convertion_after_op2;
+    };
+};
+
+/**
+ * @class PrecisionPropagationAddFunction
+ * @brief PrecisionPropagationAddFunction instance returns reference and original functions.
+ *
+ * Input arguments are used to create function in getOriginal or getReference methods only.
+ * Dont use getLowered method, it is not implemented and throw std::runtime_error exception.
+ * Note, ov::element::Type_t precision base type input argument is not used.
+ */
+class PrecisionPropagationAddFunction : public SnippetsFunctionBase {
+public:
+    explicit PrecisionPropagationAddFunction(
+        const std::vector<PartialShape> input_shapes,
+        const ngraph::element::Type precision1,
+        const ngraph::element::Type precision2,
+        const ngraph::element::Type constant_precision,
+        PrecisionPropagationAddFunctionParams::Actual actual,
+        PrecisionPropagationAddFunctionParams::Expected expected) :
+        SnippetsFunctionBase(input_shapes),
+        precision1(precision1),
+        precision2(precision2),
+        constant_precision(constant_precision),
+        actual(actual),
+        expected(expected) {
+        OPENVINO_ASSERT(input_shapes.size() == 2ull, "input_shapes size has to be equal to 2");
+    }
+
+    /*
+     * Don't call this method explicity. You should create the instance of PrecisionPropagationAddFunction before.
+     * After the method will be called implicitly in getOriginal or getReference methods.
+     * Note, please, getLowered method is not implemented and throws exception.
+     */
+    static std::shared_ptr<ngraph::Function> get(
+        const ngraph::element::Type precision1,
+        const ngraph::PartialShape& inputShape1,
+        const ngraph::element::Type precision2,
+        const ngraph::PartialShape& inputShape2,
+        const ngraph::element::Type constant_precision,
+        const std::pair<element::Type, element::Type>& convertion_before_op1 = std::pair<element::Type, element::Type>(),
+        const element::Type convertion_before_op2_1 = element::undefined,
+        const std::pair<element::Type, element::Type>& convertion_before_op2_2 = std::pair<element::Type, element::Type>(),
+        const element::Type convertion_after_op2 = {});
+
+protected:
+    std::shared_ptr<ov::Model> initOriginal() const override;
+    std::shared_ptr<ov::Model> initReference() const override;
+
+    const ngraph::element::Type precision1;
+    const ngraph::element::Type precision2;
+    const ngraph::element::Type constant_precision;
+    const PrecisionPropagationAddFunctionParams::Actual actual;
+    const PrecisionPropagationAddFunctionParams::Expected expected;
+};
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp
index b4073b2d065ae0..9d3edad4b55339 100644
--- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp
@@ -17,6 +17,7 @@ using ov::Model;
 class SnippetsFunctionBase {
 public:
     SnippetsFunctionBase() = delete;
+    virtual ~SnippetsFunctionBase() = default;
 
     explicit SnippetsFunctionBase(const std::vector<PartialShape>& inputShapes, ov::element::Type_t precision = element::f32)
                 : precision{precision}, input_shapes{inputShapes} {}
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp
new file mode 100644
index 00000000000000..20f517b16dfceb
--- /dev/null
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "precision_propagation_convertion_function.hpp"
+#include <assert.h>
+#include <ngraph/opsets/opset1.hpp>
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+namespace {
+std::shared_ptr<ngraph::op::FakeQuantize> make_fake_quantize(
+    const Output<Node>& parent,
+    const ngraph::PartialShape& inputShape,
+    const element::Type inputType,
+    const std::vector<float>& fake_quantize_intervals) {
+    auto generate = [](const ov::element::Type precision,
+        const ngraph::Shape& shape,
+        const float initialValue,
+        const std::string& name) {
+            const auto size = ngraph::shape_size(shape);
+            std::vector<float> values(size);
+            for (auto i = 0; i < size; ++i) {
+                values[i] = static_cast<float>(initialValue + i);
+            }
+            auto constant = std::make_shared<ngraph::opset1::Constant>(precision, shape, values);
+            constant->set_friendly_name(name);
+            return constant;
+    };
+
+    const auto fakeQuantize = std::make_shared<ngraph::opset1::FakeQuantize>(
+        parent,
+        generate(inputType, {}, fake_quantize_intervals[0], "inputLow"),
+        generate(inputType, {}, fake_quantize_intervals[1], "inputHigh"),
+        generate(inputType, {}, fake_quantize_intervals[2], "outputLow"),
+        generate(inputType, {}, fake_quantize_intervals[3], "outputHigh"),
+        256ul);
+    fakeQuantize->set_friendly_name("fakeQuantize");
+
+    return fakeQuantize;
+}
+} // namespace
+
+PrecisionPropagationConvertionFunction::PrecisionPropagationConvertionFunction(
+    const std::vector<ov::PartialShape>& input_shapes,
+    const element::Type input_type,
+    const std::vector<float>& fake_quantize_intervals) :
+    SnippetsFunctionBase(input_shapes, input_type),
+    fake_quantize_intervals(fake_quantize_intervals) {
+}
+
+std::shared_ptr<ov::Model> PrecisionPropagationConvertionFunction::get(
+    const std::vector<ov::PartialShape>& input_shapes,
+    const element::Type input_type,
+    const std::vector<float>& fake_quantize_intervals) {
+    assert(2ull == input_shapes.size());
+    assert(4ull == fake_quantize_intervals.size());
+    const auto parameter1 = std::make_shared<ngraph::opset1::Parameter>(input_type, input_shapes[0]);
+    parameter1->set_friendly_name("parameter1");
+
+    const auto parameter2 = std::make_shared<ngraph::opset1::Parameter>(input_type, input_shapes[1]);
+    parameter2->set_friendly_name("parameter2");
+
+    std::shared_ptr<Node> parent = make_fake_quantize(
+        parameter1,
+        input_shapes[0],
+        input_type,
+        fake_quantize_intervals);
+    parent->set_friendly_name("fakeQuantize");
+
+    parent = std::make_shared<ngraph::opset1::Add>(parent, parameter2);
+    parent->set_friendly_name("add");
+
+    const auto result = std::make_shared<ngraph::opset1::Result>(parent);
+    result->set_friendly_name("result");
+
+    auto function = std::make_shared<ngraph::Function>(
+        ngraph::ResultVector{ result },
+        ParameterVector{ parameter1, parameter2 },
+        "PrecisionPropagationConvertionFunction");
+    return function;
+}
+
+std::shared_ptr<Model> PrecisionPropagationConvertionFunction::initOriginal() const {
+    return get(input_shapes, precision, fake_quantize_intervals);
+}
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp
new file mode 100644
index 00000000000000..6a9ef600409e84
--- /dev/null
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "precision_propagation_function.hpp"
+#include <assert.h>
+#include <ngraph/opsets/opset1.hpp>
+
+namespace ov {
+namespace test {
+namespace snippets {
+
+std::shared_ptr<ngraph::Function> PrecisionPropagationAddFunction::get(
+    const ngraph::element::Type precision1,
+    const ngraph::PartialShape& inputShape1,
+    const ngraph::element::Type precision2,
+    const ngraph::PartialShape& inputShape2,
+    const ngraph::element::Type constant_precision,
+    const std::pair<element::Type, element::Type>& convertion_before_op1,
+    const element::Type convertion_before_op2_1,
+    const std::pair<element::Type, element::Type>& convertion_before_op2_2,
+    const element::Type convertion_after_op2) {
+    const auto create_convert = [](std::shared_ptr<Node> parent, const element::Type convertion_type) -> std::shared_ptr<Node> {
+        return convertion_type == element::undefined
+            ? std::dynamic_pointer_cast<Node>(parent)
+            : std::make_shared<ngraph::snippets::op::ConvertSaturation>(parent, convertion_type);
+    };
+
+    const auto make_branch = [&create_convert](
+        const ngraph::element::Type precision,
+        const ngraph::PartialShape& inputShape,
+        const size_t index,
+        const element::Type convertion_type) -> std::pair<std::shared_ptr<ngraph::opset1::Parameter>, std::shared_ptr<ov::Node>> {
+            const auto parameter = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
+            parameter->set_friendly_name("parameter" + std::to_string(index));
+
+            std::shared_ptr<Node> parent = create_convert(parameter, convertion_type);
+
+            return { parameter, parent };
+    };
+
+    const auto branch1 = make_branch(precision1, inputShape1, 1, convertion_before_op1.first);
+    const auto branch2 = make_branch(precision2, inputShape2, 2, convertion_before_op1.second);
+
+    std::shared_ptr<Node> parent = std::make_shared<DummyAdd>(branch1.second, branch2.second);
+    parent->set_friendly_name("add");
+
+    parent = create_convert(parent, convertion_before_op2_1);
+
+    const auto maximum_in2_type = convertion_before_op2_2.second == element::undefined ?
+        constant_precision :
+        convertion_before_op2_2.second;
+    if ((convertion_before_op2_2.first == element::undefined) &&
+        (parent->get_output_element_type(0) != maximum_in2_type)) {
+        parent = std::make_shared<ngraph::snippets::op::ConvertSaturation>(parent, maximum_in2_type);
+    }
+
+    parent = std::make_shared<ngraph::opset1::Maximum>(
+        create_convert(parent, convertion_before_op2_2.first),
+        create_convert(
+            std::make_shared<ngraph::opset1::Constant>(constant_precision, Shape{}, std::vector<float>{0.f}),
+            convertion_before_op2_2.second));
+    parent->set_friendly_name("maximum");
+
+    parent = create_convert(parent, convertion_after_op2);
+
+    const auto result = std::make_shared<ngraph::opset1::Result>(parent);
+    auto& result_out_tensor = result->get_output_tensor(0);
+    result_out_tensor.set_names({ "result_tensor" });
+    result->set_friendly_name("result");
+
+    const ngraph::ResultVector results{ result };
+    const ngraph::ParameterVector parameters{ branch1.first, branch2.first };
+    const auto model = std::make_shared<ngraph::Function>(results, parameters, "SnippetsPrecisionPropagation");
+    return model;
+}
+
+std::shared_ptr<ov::Model> PrecisionPropagationAddFunction::initOriginal() const {
+    return get(
+        precision1,
+        input_shapes[0],
+        precision2,
+        input_shapes[1],
+        constant_precision,
+        actual.convertion_before_op1,
+        actual.convertion_before_op2_1,
+        actual.convertion_before_op2_2);
+}
+
+std::shared_ptr<ov::Model> PrecisionPropagationAddFunction::initReference() const {
+    return get(
+        precision1,
+        input_shapes[0],
+        precision2,
+        input_shapes[1],
+        constant_precision,
+        expected.convertion_before_op1,
+        expected.convertion_before_op2_1,
+        expected.convertion_before_op2_2,
+        expected.convertion_after_op2);
+}
+
+}  // namespace snippets
+}  // namespace test
+}  // namespace ov

From 982e1c1192855192c8c262d479fe9c9ac94435d5 Mon Sep 17 00:00:00 2001
From: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
Date: Thu, 23 Mar 2023 10:29:32 +0100
Subject: [PATCH 053/296] [PyOV] Fix issues with RTMap (#15636)

* [PyOV] Fix issues with RTMap

* update year

* some clean-up and items fix

* tests and small fixes

* Update src/bindings/python/src/pyopenvino/utils/utils.cpp

* undo changes

* fix serialization on python side

* rt_info as rt_map

* undo several changes in tests

* fix mo test

* sadd docstrings

* add tests

* fix codestyle

* try to fix win

* fix master

* apply comments
---
 .../pyopenvino/frontend/frontend_module.cmake |  2 +-
 .../python/src/pyopenvino/graph/any.cpp       | 77 ++++++++++++++++++-
 .../python/src/pyopenvino/graph/model.cpp     | 26 +++----
 .../python/src/pyopenvino/graph/rt_map.cpp    | 58 ++++++++++++--
 .../python/src/pyopenvino/utils/utils.cpp     | 59 +++++++++++++-
 .../python/src/pyopenvino/utils/utils.hpp     |  8 ++
 .../python/tests/test_graph/test_any.py       | 25 +++++-
 .../python/tests/test_runtime/test_model.py   | 51 ++++++------
 .../tests/test_utils/test_data_dispatch.py    |  2 +-
 src/core/src/model.cpp                        |  2 +-
 .../unit_tests/mo/convert/meta_data_test.py   |  6 +-
 .../mo/convert/meta_data_test_actual.py       |  6 +-
 12 files changed, 256 insertions(+), 66 deletions(-)

diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake
index 33aaa10a6b3b55..d056bbfc1e93b0 100644
--- a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake
+++ b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake
@@ -25,7 +25,7 @@ function(frontend_module TARGET FRAMEWORK INSTALL_COMPONENT)
 
     target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
                                                       "${OpenVINOPython_SOURCE_DIR}/src/pyopenvino/utils/")
-    target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::frontend::${FRAMEWORK})
+    target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::runtime::dev openvino::frontend::${FRAMEWORK})
 
     set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
diff --git a/src/bindings/python/src/pyopenvino/graph/any.cpp b/src/bindings/python/src/pyopenvino/graph/any.cpp
index 19c9e3c52cb111..a53fffac102119 100644
--- a/src/bindings/python/src/pyopenvino/graph/any.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/any.cpp
@@ -5,12 +5,19 @@
 #include "openvino/core/any.hpp"
 
 #include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
 
 #include "pyopenvino/graph/any.hpp"
 #include "pyopenvino/utils/utils.hpp"
 
 namespace py = pybind11;
 
+namespace {
+bool check_key(py::object key, py::object obj) {
+    return key.is(py::type::of(obj));
+}
+};  // namespace
+
 void regclass_graph_Any(py::module m) {
     py::class_<ov::Any, std::shared_ptr<ov::Any>> ov_any(m, "OVAny");
 
@@ -23,9 +30,7 @@ void regclass_graph_Any(py::module m) {
     }));
 
     ov_any.def("__repr__", [](const ov::Any& self) {
-        std::stringstream ret;
-        self.print(ret);
-        return ret.str();
+        return "<OVAny class>";
     });
 
     ov_any.def("__hash__", [](ov::Any& self) {
@@ -62,6 +67,72 @@ void regclass_graph_Any(py::module m) {
     ov_any.def("__eq__", [](const ov::Any& a, py::object& b) -> bool {
         return a == ov::Any(Common::utils::py_object_to_any(b));
     });
+    ov_any.def(
+        "astype",
+        [](ov::Any& self, py::object dtype) {
+            if (check_key(dtype, py::bool_())) {
+                return py::cast(self.as<bool>());
+            } else if (check_key(dtype, py::str())) {
+                return py::cast(self.as<std::string>());
+            } else if (check_key(dtype, py::int_())) {
+                return py::cast(self.as<int64_t>());
+            } else if (check_key(dtype, py::float_())) {
+                return py::cast(self.as<double>());
+            } else if (check_key(dtype, py::dict())) {
+                return Common::utils::from_ov_any_map_no_leaves(self);
+            }
+            std::stringstream str;
+            str << "Unsupported data type : '" << dtype << "' is passed as an argument.";
+            OPENVINO_THROW(str.str());
+        },
+        R"(
+            Returns runtime attribute casted to defined data type.
+
+            :param dtype: Data type in which runtime attribute will be casted.
+            :type dtype: Union[bool, int, str, float, dict]
+
+            :return: A runtime attribute.
+            :rtype: Any
+    )");
+    ov_any.def(
+        "aslist",
+        [](ov::Any& self, py::object dtype) {
+            // before serialization
+            if (self.is<Common::utils::EmptyList>() || dtype.is_none()) {
+                return py::cast<py::object>(py::list());
+            } else if (self.is<std::vector<double>>()) {
+                return py::cast(self.as<std::vector<double>>());
+            } else if (self.is<std::vector<std::string>>()) {
+                return py::cast(self.as<std::vector<std::string>>());
+            } else if (self.is<std::vector<bool>>()) {
+                return py::cast(self.as<std::vector<bool>>());
+            } else if (self.is<std::vector<int64_t>>()) {
+                return py::cast(self.as<std::vector<int64_t>>());
+            }
+            // after serialization
+            if (check_key(dtype, py::str())) {
+                return py::cast(self.as<std::vector<std::string>>());
+            } else if (check_key(dtype, py::int_())) {
+                return py::cast(self.as<std::vector<int64_t>>());
+            } else if (check_key(dtype, py::float_())) {
+                return py::cast(self.as<std::vector<double>>());
+            } else if (check_key(dtype, py::bool_())) {
+                return py::cast(self.as<std::vector<bool>>());
+            }
+            std::stringstream str;
+            str << "Unsupported data type : '" << dtype << "' is passed as an argument.";
+            OPENVINO_THROW(str.str());
+        },
+        py::arg("dtype") = py::none(),
+        R"(
+            Returns runtime attribute as a list with specified data type.
+
+            :param dtype: Data type of a list in which runtime attribute will be casted.
+            :type dtype: Union[bool, int, str, float]
+
+            :return: A runtime attribute as a list.
+            :rtype: Union[List[float], List[int], List[str], List[bool]]
+    )");
     ov_any.def(
         "get",
         [](const ov::Any& self) -> py::object {
diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp
index f95801bdb11258..acaa8792f0538a 100644
--- a/src/bindings/python/src/pyopenvino/graph/model.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/model.cpp
@@ -774,47 +774,41 @@ void regclass_graph_Model(py::module m) {
             for (size_t i = 0; i < path.size(); i++) {
                 cpp_args[i] = path[i].cast<std::string>();
             }
-            return Common::utils::from_ov_any(self.get_rt_info<ov::Any>(cpp_args));
+            return py::cast(self.get_rt_info<ov::Any>(cpp_args));
         },
         py::arg("path"),
         R"(
-                Returns runtime attribute.
+                Returns runtime attribute as a OVAny object.
 
                 :param path: List of strings which defines a path to runtime info.
                 :type path: List[str]
 
                 :return: A runtime attribute.
-                :rtype: Any
+                :rtype: openvino.runtime.OVAny
              )");
     model.def(
         "get_rt_info",
         [](const ov::Model& self, const py::str& path) -> py::object {
-            return Common::utils::from_ov_any(self.get_rt_info<ov::Any>(path.cast<std::string>()));
+            return py::cast(self.get_rt_info<ov::Any>(path.cast<std::string>()));
         },
         py::arg("path"),
         R"(
-                Returns runtime attribute.
+                Returns runtime attribute as a OVAny object.
 
                 :param path: List of strings which defines a path to runtime info.
                 :type path: str
 
                 :return: A runtime attribute.
-                :rtype: Any
+                :rtype: openvino.runtime.OVAny
              )");
     model.def(
         "has_rt_info",
         [](const ov::Model& self, const py::list& path) -> bool {
-            // FIXME: understand why has_rt_info causes Python crash
-            try {
-                std::vector<std::string> cpp_args(path.size());
-                for (size_t i = 0; i < path.size(); i++) {
-                    cpp_args[i] = path[i].cast<std::string>();
-                }
-                self.get_rt_info<ov::Any>(cpp_args);
-                return true;
-            } catch (ov::Exception&) {
-                return false;
+            std::vector<std::string> cpp_args(path.size());
+            for (size_t i = 0; i < path.size(); i++) {
+                cpp_args[i] = path[i].cast<std::string>();
             }
+            return self.has_rt_info(cpp_args);
         },
         py::arg("path"),
         R"(
diff --git a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp
index 5985c87f06136f..e666b3972e605c 100644
--- a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp
@@ -10,6 +10,7 @@
 #include <pybind11/stl_bind.h>
 
 #include "dict_attribute_visitor.hpp"
+#include "meta_data.hpp"
 #include "openvino/core/node.hpp"
 #include "openvino/core/runtime_attribute.hpp"
 #include "openvino/op/add.hpp"
@@ -27,11 +28,48 @@ using PyRTMap = ov::RTMap;
 
 PYBIND11_MAKE_OPAQUE(PyRTMap);
 
+// Create our custom iterator to return python object not OVAny itself.
+class PyRTMapIterator {
+public:
+    PyRTMapIterator(const PyRTMap& py_rt_map, py::object ref, bool is_value)
+        : py_rt_map(py_rt_map),
+          is_value(is_value),
+          ref(ref),
+          it(py_rt_map.cbegin()) {}
+
+    py::object next() {
+        if (it == py_rt_map.end()) {
+            throw py::stop_iteration();
+        }
+        const auto result = *it;
+        it++;
+        if (is_value) {
+            return Common::utils::from_ov_any_no_leaves(result.second);
+        } else {
+            std::pair<std::string, py::object> res = {result.first,
+                                                      Common::utils::from_ov_any_no_leaves(result.second)};
+            return py::cast(res);
+        }
+    }
+
+    const PyRTMap& py_rt_map;
+    bool is_value = false;
+    py::object ref;  // keep a reference
+    std::map<std::string, ov::Any>::const_iterator it;
+};
+
 void regclass_graph_PyRTMap(py::module m) {
     auto py_map = py::class_<PyRTMap>(m, "RTMap");
     py_map.doc() = "openvino.runtime.RTMap makes bindings for std::map<std::string, "
                    "ov::Any>, which can later be used as ov::Node::RTMap";
 
+    py::class_<PyRTMapIterator>(m, "Iterator")
+        .def("__iter__",
+             [](PyRTMapIterator& it) -> PyRTMapIterator& {
+                 return it;
+             })
+        .def("__next__", &PyRTMapIterator::next);
+
     py_map.def("__setitem__", [](PyRTMap& m, const std::string& k, const std::string v) {
         m[k] = v;
     });
@@ -39,7 +77,7 @@ void regclass_graph_PyRTMap(py::module m) {
         m[k] = v;
     });
     py_map.def("__getitem__", [](PyRTMap& m, const std::string& k) -> py::object {
-        return Common::utils::from_ov_any(m[k]);
+        return Common::utils::from_ov_any_no_leaves(m[k]);
     });
     py_map.def(
         "__bool__",
@@ -50,20 +88,28 @@ void regclass_graph_PyRTMap(py::module m) {
 
     py_map.def(
         "__iter__",
-        [](PyRTMap& m) {
-            return py::make_key_iterator(m.begin(), m.end());
+        [](PyRTMap& rt_map) {
+            return py::make_key_iterator(rt_map.begin(), rt_map.end());
         },
         py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
     );
 
     py_map.def(
-        "items",
-        [](PyRTMap& m) {
-            return py::make_iterator(m.begin(), m.end());
+        "keys",
+        [](PyRTMap& rt_map) {
+            return py::make_key_iterator(rt_map.begin(), rt_map.end());
         },
         py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
     );
 
+    py_map.def("items", [](py::object rt_map) {
+        return PyRTMapIterator(rt_map.cast<const PyRTMap&>(), rt_map, false);
+    });
+
+    py_map.def("values", [](py::object rt_map) {
+        return PyRTMapIterator(rt_map.cast<const PyRTMap&>(), rt_map, true);
+    });
+
     py_map.def("__contains__", [](PyRTMap& m, const std::string& k) -> bool {
         auto it = m.find(k);
         if (it == m.end())
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index f029323e35ab09..12f08410a67271 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -12,6 +12,7 @@
 #include <vector>
 
 #include "Python.h"
+#include "meta_data.hpp"
 #include "openvino/core/except.hpp"
 #include "openvino/frontend/decoder.hpp"
 
@@ -20,12 +21,57 @@ using Version = ov::pass::Serialize::Version;
 namespace Common {
 namespace utils {
 
+// For complex structure if an element isn't map, then just cast it to OVAny
+py::object from_ov_any_no_leaves(const ov::Any& any) {
+    if (any.is<std::shared_ptr<ov::Meta>>() || any.is<ov::AnyMap>()) {
+        return Common::utils::from_ov_any_map_no_leaves(any);
+    } else {
+        return py::cast(any);
+    }
+}
+
+// Recursively go through dict to unwrap nested dicts and keep leaves as OVAny.
+py::object from_ov_any_map_no_leaves(const ov::Any& any) {
+    const auto traverse_map = [](const ov::AnyMap& map) {
+        const auto unwrap_only_maps = [](const ov::Any& any) {
+            if (any.is<std::shared_ptr<ov::Meta>>()) {
+                const ov::AnyMap& as_map = *any.as<std::shared_ptr<ov::Meta>>();
+                return from_ov_any_map_no_leaves(as_map);
+            } else if (any.is<ov::AnyMap>()) {
+                return from_ov_any_map_no_leaves(any.as<ov::AnyMap>());
+            }
+            return py::cast(any);
+        };
+
+        std::map<std::string, py::object> result;
+        for (const auto& entry : map) {
+            result[entry.first] = unwrap_only_maps(entry.second);
+        }
+        return py::cast(result);
+    };
+
+    if (any.is<std::shared_ptr<ov::Meta>>()) {
+        const ov::AnyMap& as_map = *any.as<std::shared_ptr<ov::Meta>>();
+        return traverse_map(as_map);
+    } else if (any.is<ov::AnyMap>()) {
+        return traverse_map(any.as<ov::AnyMap>());
+    }
+    OPENVINO_THROW("Only ov::AnyMap or ov::Meta are expected here.");
+}
+
+py::object from_ov_any_map(const ov::AnyMap& map) {
+    std::map<std::string, py::object> result;
+    for (const auto& entry : map) {
+        result[entry.first] = from_ov_any(entry.second);
+    }
+    return py::cast(result);
+}
+
 py::object from_ov_any(const ov::Any& any) {
     // Check for py::object
     if (any.is<py::object>()) {
         return any.as<py::object>();
-    }
-    // Check for std::string
+    }  // Check for std::string
     else if (any.is<std::string>()) {
         return py::cast(any.as<std::string>().c_str());
     }
@@ -98,6 +144,9 @@ py::object from_ov_any(const ov::Any& any) {
     // Check for std::map<element::Type, float>
     else if (any.is<std::map<ov::element::Type, float>>()) {
         return py::cast(any.as<std::map<ov::element::Type, float>>());
+    }  // Check for ov::AnyMap (std::map<std::string, ov::Any>)
+    else if (any.is<ov::AnyMap>()) {
+        return from_ov_any_map(any.as<ov::AnyMap>());
     }
     // Check for std::map<std::string, Any> {
     else if (any.is<std::map<std::string, ov::Any>>()) {
@@ -113,6 +162,9 @@ py::object from_ov_any(const ov::Any& any) {
             PyDict_SetItemString(dict, property_name.c_str(), PyUnicode_FromString(mutability.c_str()));
         }
         return py::cast<py::object>(dict);
+    } else if (any.is<std::shared_ptr<ov::Meta>>()) {
+        const ov::AnyMap& as_map = *any.as<std::shared_ptr<ov::Meta>>();
+        return from_ov_any_map(as_map);
     } else if (any.is<ov::element::Type>()) {
         return py::cast(any.as<ov::element::Type>());
     } else if (any.is<ov::hint::Priority>()) {
@@ -258,9 +310,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
             }
         }
 
-        // In case of empty vector works like with vector of strings
         if (_list.empty())
-            return _list.cast<std::vector<std::string>>();
+            return ov::Any(EmptyList());
 
         switch (detected_type) {
         case PY_TYPE::STR:
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp
index 5b0d00165b968a..328f06820033f8 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.hpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp
@@ -15,6 +15,14 @@ namespace py = pybind11;
 
 namespace Common {
 namespace utils {
+    struct EmptyList {};
+
+    py::object from_ov_any_no_leaves(const ov::Any& any);
+
+    py::object from_ov_any_map_no_leaves(const ov::Any& almost_map);
+
+    py::object from_ov_any_map(const ov::AnyMap& map);
+
     py::object from_ov_any(const ov::Any& any);
 
     std::map<std::string, ov::Any> properties_to_any_map(const std::map<std::string, py::object>& properties);
diff --git a/src/bindings/python/tests/test_graph/test_any.py b/src/bindings/python/tests/test_graph/test_any.py
index e4817e52f7e296..4a8643a7586189 100644
--- a/src/bindings/python/tests/test_graph/test_any.py
+++ b/src/bindings/python/tests/test_graph/test_any.py
@@ -34,7 +34,7 @@ def test_any_list(values, data_type):
 
 
 @pytest.mark.parametrize(("value_dict", "value_type", "data_type"), [
-    ({"key": "value"}, OVAny, str),
+    ({"key": "value"}, str, str),
     ({21: 37}, int, int),
     ({21.0: 37.0}, float, float),
 ])
@@ -65,3 +65,26 @@ def __init__(self):
     value = OVAny(TestClass())
     assert isinstance(value.value, TestClass)
     assert value.value.text == "test"
+
+
+@pytest.mark.parametrize(("value", "dtype"), [
+    ("some_value", str),
+    (31.23456, float),
+    (True, bool),
+    (42, int),
+])
+def test_astype(value, dtype):
+    ovany = OVAny(value)
+    assert ovany.astype(dtype) == value
+
+
+@pytest.mark.parametrize(("value", "dtype"), [
+    (["some_value", "another value"], str),
+    ([31.23456, -31.3453], float),
+    ([True, False], bool),
+    ([42, 21], int),
+    ([], None),
+])
+def test_aslist(value, dtype):
+    ovany = OVAny(value)
+    assert ovany.aslist(dtype) == value
diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py
index 20f58201ee84e3..e0af9cbda469be 100644
--- a/src/bindings/python/tests/test_runtime/test_model.py
+++ b/src/bindings/python/tests/test_runtime/test_model.py
@@ -5,6 +5,7 @@
 import os
 import numpy as np
 import pytest
+import math
 
 import openvino.runtime.opset8 as ops
 from openvino.runtime import (
@@ -549,22 +550,6 @@ def check_rt_info(model):
 # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
 def test_serialize_complex_rt_info(request, tmp_path):
     def check_rt_info(model, serialized):
-        if serialized:
-            threshold = "13.23"
-            min_val = "-3.24543"
-            max_val = "3.23422"
-            directed = "YES"
-            empty = ""
-            ids = "sasd fdfdfsdf"
-            mean = "22.3 33.11 44"
-        else:
-            threshold = 13.23
-            min_val = -3.24543
-            max_val = 3.234223
-            directed = True
-            empty = []
-            ids = ["sasd", "fdfdfsdf"]
-            mean = [22.3, 33.11, 44.0]
         assert model.has_rt_info(["config", "type_of_model"]) is True
         assert model.has_rt_info(["config", "converter_type"]) is True
         assert model.has_rt_info(["config", "model_parameters", "threshold"]) is True
@@ -577,17 +562,29 @@ def check_rt_info(model, serialized):
         assert model.has_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) is True
         assert model.has_rt_info(["config", "model_parameters", "mean_values"]) is True
 
-        assert model.get_rt_info(["config", "type_of_model"]) == "classification"
-        assert model.get_rt_info(["config", "converter_type"]) == "classification"
-        assert model.get_rt_info(["config", "model_parameters", "threshold"]) == threshold
-        assert model.get_rt_info(["config", "model_parameters", "min"]) == min_val
-        assert model.get_rt_info(["config", "model_parameters", "max"]) == max_val
-        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]) == "tree"
-        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]) == directed
-        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]) == empty
-        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]) == empty
-        assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) == ids
-        assert model.get_rt_info(["config", "model_parameters", "mean_values"]) == mean
+        assert model.get_rt_info(["config", "type_of_model"]).astype(str) == "classification"
+        assert model.get_rt_info(["config", "converter_type"]).astype(str) == "classification"
+        assert math.isclose(model.get_rt_info(["config", "model_parameters", "threshold"]).astype(float), 13.23, rel_tol=0.0001)
+        assert math.isclose(model.get_rt_info(["config", "model_parameters", "min"]).astype(float), -3.24543, rel_tol=0.0001)
+        assert math.isclose(model.get_rt_info(["config", "model_parameters", "max"]).astype(float), 3.234223, rel_tol=0.0001)
+        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]).astype(str) == "tree"
+        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]).astype(bool) is True
+
+        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]).aslist() == []
+        assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]).aslist() == []
+        assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]).aslist(str) == ["sasd", "fdfdfsdf"]
+        assert model.get_rt_info(["config", "model_parameters", "mean_values"]).aslist(float) == [22.3, 33.11, 44.0]
+
+        rt_info = model.get_rt_info()
+        assert isinstance(rt_info["config"], dict)
+
+        for key, value in rt_info.items():
+            if key == "config":
+                for config_value in value:
+                    assert config_value in ["type_of_model", "converter_type", "model_parameters"]
+
+        for rt_info_val in model.get_rt_info(["config", "model_parameters", "labels", "label_tree"]).astype(dict):
+            assert rt_info_val in ["float_empty", "nodes", "type", "directed"]
 
     core = Core()
     xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
diff --git a/src/bindings/python/tests/test_utils/test_data_dispatch.py b/src/bindings/python/tests/test_utils/test_data_dispatch.py
index fad863f61a52e8..e2ce00f10e7482 100644
--- a/src/bindings/python/tests/test_utils/test_data_dispatch.py
+++ b/src/bindings/python/tests/test_utils/test_data_dispatch.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2022 Intel Corporation
+# Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import os
diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp
index df540d81f5554b..142514be45384b 100644
--- a/src/core/src/model.cpp
+++ b/src/core/src/model.cpp
@@ -955,7 +955,7 @@ bool ov::Model::has_rt_info(const std::vector<std::string>& args) const {
             return false;
         if (i == args.size() - 1)
             break;
-        const ov::Any& rt_attr = get_rt_arg<std::string>(info, args[i]);
+        const ov::Any rt_attr = get_rt_arg<std::string>(info, args[i]);
         info = get_map_from_attr(rt_attr);
     }
     return true;
diff --git a/tools/mo/unit_tests/mo/convert/meta_data_test.py b/tools/mo/unit_tests/mo/convert/meta_data_test.py
index c5d2d05cc111ce..b5e78a15b0f67d 100644
--- a/tools/mo/unit_tests/mo/convert/meta_data_test.py
+++ b/tools/mo/unit_tests/mo/convert/meta_data_test.py
@@ -76,16 +76,16 @@ def check_meta_data(ov_model):
             for key, value in ref_meta.items():
                 if key == 'conversion_parameters':
                     for param_name, param_value in value.items():
-                        val = ov_model.get_rt_info([key, param_name])
+                        val = ov_model.get_rt_info([key, param_name]).astype(str)
                         if param_name in ['extensions', 'caffe_parser_path', 'input_model', 'k', 'output_dir']:
                             val = Path(val)
                         assert val == param_value, \
                             "Runtime info attribute with name {} does not match. Expected: {}, " \
                             "got {}".format(param_name, param_value, val)
                     continue
-                assert str(ov_model.get_rt_info(key)) == value, \
+                assert ov_model.get_rt_info(key).astype(str) == value, \
                     "Runtime info attribute with name {} does not match. Expected: {}, " \
-                    "got {}".format(key, value, ov_model.get_rt_info(key))
+                    "got {}".format(key, value, ov_model.get_rt_info(key).astype(str))
 
         with tempfile.TemporaryDirectory(dir=self.test_directory) as tmpdir:
 
diff --git a/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py b/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py
index 7839f56bd28610..643510a5a631f0 100644
--- a/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py
+++ b/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py
@@ -22,16 +22,16 @@ def check_meta_data(ov_model, ref_meta):
         for key, value in ref_meta.items():
             if key == 'conversion_parameters':
                 for param_name, param_value in value.items():
-                    val = ov_model.get_rt_info([key, param_name])
+                    val = ov_model.get_rt_info([key, param_name]).astype(str)
                     if param_name in ['extensions', 'caffe_parser_path', 'input_model', 'k', 'output_dir']:
                         val = Path(val)
                     assert val == param_value, \
                         "Runtime info attribute with name {} does not match. Expected: {}, " \
                         "got {}".format(param_name, param_value, val)
                 continue
-            assert str(ov_model.get_rt_info(key)) == value, \
+            assert ov_model.get_rt_info(key).astype(str) == value, \
                 "Runtime info attribute with name {} does not match. Expected: {}, " \
-                "got {}".format(key, value, ov_model.get_rt_info(key))
+                "got {}".format(key, value, ov_model.get_rt_info(key).astype(str))
 
         for key, value in ov_model.get_rt_info().items():
             if key in ignore_attrs:

From a3958d6ddfa375a612666bf6e10cb278e99bad3c Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 23 Mar 2023 13:52:03 +0400
Subject: [PATCH 054/296] Use evaluation context for the inference (#16492)

---
 src/plugins/template/backend/executable.hpp   |  8 ++++
 .../template/backend/int_executable.cpp       | 46 +++++++++++--------
 .../template/backend/int_executable.hpp       |  3 ++
 .../template/src/sync_infer_request.cpp       |  2 +-
 4 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/src/plugins/template/backend/executable.hpp b/src/plugins/template/backend/executable.hpp
index 2375d6e0a096af..0794488c334c3b 100644
--- a/src/plugins/template/backend/executable.hpp
+++ b/src/plugins/template/backend/executable.hpp
@@ -24,6 +24,14 @@ class Executable {
     /// \returns true if iteration is successful, false otherwise
     virtual bool call(std::vector<ov::Tensor>& outputs, const std::vector<ov::Tensor>& inputs) = 0;
 
+    /// \param outputs vector of runtime::Tensor used as outputs
+    /// \param inputs vector of runtime::Tensor used as inputs
+    /// \param context Evaluation context
+    /// \returns true if iteration is successful, false otherwise
+    virtual bool call(std::vector<ov::Tensor>& outputs,
+                      const std::vector<ov::Tensor>& inputs,
+                      const ov::EvaluationContext& context) = 0;
+
     /// \brief Executes a single iteration of a Function.
     /// \param outputs vector of runtime::Tensor used as outputs
     /// \param inputs vector of runtime::Tensor used as inputs
diff --git a/src/plugins/template/backend/int_executable.cpp b/src/plugins/template/backend/int_executable.cpp
index fc4057955fecfa..49253aec58f379 100644
--- a/src/plugins/template/backend/int_executable.cpp
+++ b/src/plugins/template/backend/int_executable.cpp
@@ -105,7 +105,31 @@ ov::runtime::interpreter::INTExecutable::INTExecutable(const std::shared_ptr<ov:
 
 bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outputs,
                                                    const std::vector<ov::Tensor>& inputs) {
-    // map function params -> HostTensor
+    EvaluationContext eval_context;
+    ov::op::util::VariableContext variable_context;
+    eval_context.emplace("VariableContext", variable_context);
+
+    // for each ordered op in the graph
+    for (const auto& op : m_nodes) {
+        if (auto var_extension = std::dynamic_pointer_cast<ov::op::util::VariableExtension>(op)) {
+            auto variable = var_extension->get_variable();
+            if (!variable_context.get_variable_value(variable)) {
+                auto h_tensor = ov::Tensor(op->get_input_element_type(0), op->get_input_shape(0));
+                // h_tensor->write(h_tensor->get_data_ptr(), h_tensor->get_size_in_bytes());
+                const auto tensor_input = make_tmp_host_tensor(h_tensor);
+                variable_context.set_variable_value(variable,
+                                                    std::make_shared<ov::op::util::VariableValue>(tensor_input));
+            }
+        }
+    }
+
+    return call(outputs, inputs, eval_context);
+}
+
+bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outputs,
+                                                   const std::vector<ov::Tensor>& inputs,
+                                                   const ov::EvaluationContext& context) {
+    // map function params -> ov::Tensor
     std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, ov::Tensor> tensor_map;
     size_t input_count = 0;
     for (const auto& param : get_parameters()) {
@@ -116,17 +140,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
     }
 
     std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, size_t> results_map;
-    // map function outputs -> HostTensor
+    // map function outputs -> ov::Tensor
     for (size_t output_count = 0; output_count < get_results().size(); ++output_count) {
         auto output = get_results()[output_count]->output(0).get_tensor_ptr();
         if (!results_map.count(output))
             results_map.emplace(output, output_count);
     }
 
-    EvaluationContext eval_context;
-    ov::op::util::VariableContext variable_context;
-    eval_context.emplace("VariableContext", variable_context);
-
     // for each ordered op in the graph
     for (const auto& op : m_nodes) {
         if (std::dynamic_pointer_cast<ov::op::v0::Parameter>(op)) {
@@ -165,19 +185,9 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
             op_outputs.push_back(host_tensor);
         }
 
-        if (auto var_extension = std::dynamic_pointer_cast<ov::op::util::VariableExtension>(cloned_node)) {
-            auto variable = var_extension->get_variable();
-            if (!variable_context.get_variable_value(variable)) {
-                auto h_tensor = ov::Tensor(cloned_node->get_input_element_type(0), cloned_node->get_input_shape(0));
-                // h_tensor->write(h_tensor->get_data_ptr(), h_tensor->get_size_in_bytes());
-                const auto tensor_input = make_tmp_host_tensor(h_tensor);
-                variable_context.set_variable_value(variable,
-                                                    std::make_shared<ov::op::util::VariableValue>(tensor_input));
-            }
-        }
-
         // Call evaluate for cloned_node with static shapes
-        if (!cloned_node->evaluate(op_outputs, op_inputs, eval_context)) {
+        if (!cloned_node->evaluate(op_outputs, op_inputs, context)) {
+            // TODO: extend evaluate map for the context
             evaluate_node(cloned_node, op_outputs, op_inputs);
         }
         // Update tensors in tensor map
diff --git a/src/plugins/template/backend/int_executable.hpp b/src/plugins/template/backend/int_executable.hpp
index 1ca49ff4253a79..2610a82ee23e4e 100644
--- a/src/plugins/template/backend/int_executable.hpp
+++ b/src/plugins/template/backend/int_executable.hpp
@@ -29,6 +29,9 @@ class INTExecutable : public Executable {
     INTExecutable(const std::shared_ptr<ov::Model>& model);
 
     bool call(std::vector<ov::Tensor>& outputs, const std::vector<ov::Tensor>& inputs) override;
+    bool call(std::vector<ov::Tensor>& outputs,
+              const std::vector<ov::Tensor>& inputs,
+              const ov::EvaluationContext& context) override;
 
     ov::Tensor create_input_tensor(size_t input_index) override;
 
diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp
index f560e876dee47e..6fa96c02a23d68 100644
--- a/src/plugins/template/src/sync_infer_request.cpp
+++ b/src/plugins/template/src/sync_infer_request.cpp
@@ -207,7 +207,7 @@ void ov::template_plugin::InferRequest::infer_preprocess() {
 void ov::template_plugin::InferRequest::start_pipeline() {
     OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, m_profiling_task[StartPipeline])
     auto start = Time::now();
-    m_executable->call(m_backend_output_tensors, m_backend_input_tensors);
+    m_executable->call(m_backend_output_tensors, m_backend_input_tensors, m_eval_context);
     m_durations[StartPipeline] = Time::now() - start;
 }
 // ! [infer_request:start_pipeline]

From a00460177466c8ab16f0ad025a134e82339a1437 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <mateusz.bencer@intel.com>
Date: Thu, 23 Mar 2023 10:59:00 +0100
Subject: [PATCH 055/296] [ONNX FE] Fix Windows warnings (#16141)

---
 src/frontends/onnx/frontend/CMakeLists.txt    |    6 -
 .../onnx/frontend/src/core/graph.cpp          |    9 +-
 src/frontends/onnx/frontend/src/editor.cpp    |    5 +-
 .../onnx/frontend/src/op/roi_align.cpp        |    6 +-
 src/frontends/onnx/frontend/src/place.cpp     |    4 +-
 src/frontends/onnx/tests/CMakeLists.txt       |    7 -
 src/frontends/onnx/tests/onnx_import.in.cpp   |  949 ++++++++-------
 .../tests/onnx_import_com_microsoft.in.cpp    | 1053 +++++++++--------
 .../tests/onnx_import_const_folding.in.cpp    |    2 +-
 .../onnx/tests/onnx_import_controlflow.in.cpp |   25 +-
 .../onnx/tests/onnx_import_dyn_shapes.in.cpp  |   85 +-
 .../tests/onnx_import_org_openvino.in.cpp     |  292 ++---
 .../onnx/tests/onnx_import_org_pytorch.in.cpp |   38 +-
 .../onnx/tests/onnx_import_quant.in.cpp       |    4 +-
 .../onnx/tests/onnx_import_reshape.in.cpp     |   46 +-
 .../onnx/tests/onnx_import_rnn.in.cpp         |  442 +++----
 .../onnx/tests/onnx_transformations.cpp       |    2 +-
 17 files changed, 1518 insertions(+), 1457 deletions(-)

diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt
index 2edeaae86ec594..db84dae67ddb36 100644
--- a/src/frontends/onnx/frontend/CMakeLists.txt
+++ b/src/frontends/onnx/frontend/CMakeLists.txt
@@ -2,12 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4267)
-    ie_add_compiler_flags(/wd4018)
-    ie_add_compiler_flags(/wd4244)
-endif()
-
 ov_add_frontend(NAME onnx
                 LINKABLE_FRONTEND
                 PROTOBUF_LITE
diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp
index e940c172e87eec..5d46da8805efe1 100644
--- a/src/frontends/onnx/frontend/src/core/graph.cpp
+++ b/src/frontends/onnx/frontend/src/core/graph.cpp
@@ -328,7 +328,7 @@ std::shared_ptr<Function> Graph::create_function() {
     const auto& onnx_outputs = m_model->get_graph().output();
     for (std::size_t i{0}; i < function->get_output_size(); ++i) {
         const auto& result_node = function->get_output_op(i);
-        const std::string onnx_output_name = onnx_outputs.Get(i).name();
+        const std::string onnx_output_name = onnx_outputs.Get(static_cast<int>(i)).name();
         result_node->set_friendly_name(onnx_output_name + "/sink_port_0");
         const auto& previous_operation = result_node->get_input_node_shared_ptr(0);
         previous_operation->set_friendly_name(onnx_output_name);
@@ -386,7 +386,7 @@ OutputVector Graph::make_ng_nodes(const Node& onnx_node) {
 
     const size_t outputs_size = std::accumulate(std::begin(ng_subgraph_outputs),
                                                 std::end(ng_subgraph_outputs),
-                                                0,
+                                                static_cast<size_t>(0),
                                                 [](const size_t lhs, const Output<ov::Node>& rhs) {
                                                     return lhs + rhs.get_node()->get_output_size();
                                                 });
@@ -420,10 +420,11 @@ void Graph::set_friendly_names(const Node& onnx_node, const OutputVector& ng_sub
 
     const auto common_node = detail::common_node_for_all_outputs(ng_subgraph_outputs);
 
-    for (size_t i = 0; i < ng_subgraph_outputs.size(); ++i) {
+    const auto ng_subgraph_output_size = static_cast<int>(ng_subgraph_outputs.size());
+    for (int i = 0; i < ng_subgraph_output_size; ++i) {
         // Trailing optional outputs may not be specified in the ONNX model.
         // Other optional outputs should have name set to an empty string.
-        if (i >= onnx_node.get_outputs_size()) {
+        if (i >= static_cast<int>(onnx_node.get_outputs_size())) {
             break;
         }
 
diff --git a/src/frontends/onnx/frontend/src/editor.cpp b/src/frontends/onnx/frontend/src/editor.cpp
index 56ba992a5c882b..af968ac16572d5 100644
--- a/src/frontends/onnx/frontend/src/editor.cpp
+++ b/src/frontends/onnx/frontend/src/editor.cpp
@@ -213,9 +213,10 @@ void graph_topological_sort(GraphProto* graph) {
         std::multimap<std::string, const NodeProto*> output_name_to_node;
         GraphProto result;
 
-        for (int i = 0; i < graph->node().size(); ++i) {
+        const auto nodes_number = static_cast<int>(graph->node().size());
+        for (int i = 0; i < nodes_number; ++i) {
             for (const auto& output_name : graph->node(i).output()) {
-                output_name_to_node.emplace(output_name, graph->mutable_node(static_cast<int>(i)));
+                output_name_to_node.emplace(output_name, graph->mutable_node(i));
             }
         }
         auto get_node_by_out_name = [&output_name_to_node](const std::string& out_name) -> const NodeProto* {
diff --git a/src/frontends/onnx/frontend/src/op/roi_align.cpp b/src/frontends/onnx/frontend/src/op/roi_align.cpp
index 9b6959e126c38a..6773806fe993a7 100644
--- a/src/frontends/onnx/frontend/src/op/roi_align.cpp
+++ b/src/frontends/onnx/frontend/src/op/roi_align.cpp
@@ -68,9 +68,9 @@ OutputVector roi_align(const Node& node) {
     return {std::make_shared<opset9::ROIAlign>(data,
                                                rois,
                                                num_rois,
-                                               pooled_h,
-                                               pooled_w,
-                                               sampling_ratio,
+                                               static_cast<int>(pooled_h),
+                                               static_cast<int>(pooled_w),
+                                               static_cast<int>(sampling_ratio),
                                                spatial_scale,
                                                pooling_mode,
                                                aligned_mode)};
diff --git a/src/frontends/onnx/frontend/src/place.cpp b/src/frontends/onnx/frontend/src/place.cpp
index 6b67cf7efcfd03..3430bf4e6b56cb 100644
--- a/src/frontends/onnx/frontend/src/place.cpp
+++ b/src/frontends/onnx/frontend/src/place.cpp
@@ -317,8 +317,8 @@ ov::frontend::Place::Ptr PlaceOp::get_input_port(const std::string& input_name)
 
 std::vector<ov::frontend::Place::Ptr> PlaceOp::get_consuming_ports() const {
     std::vector<ov::frontend::Place::Ptr> consuming_ports;
-    const auto out_ports_number = m_editor->get_output_ports(m_node).size();
-    for (size_t out_idx = 0; out_idx < out_ports_number; ++out_idx) {
+    const auto out_ports_number = static_cast<int>(m_editor->get_output_ports(m_node).size());
+    for (int out_idx = 0; out_idx < out_ports_number; ++out_idx) {
         auto consuming_ops_out = get_output_port(out_idx)->get_consuming_ports();
         consuming_ports.insert(consuming_ports.end(), consuming_ops_out.begin(), consuming_ops_out.end());
     }
diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt
index ac41687b9b47f2..b6fed5f851ae43 100644
--- a/src/frontends/onnx/tests/CMakeLists.txt
+++ b/src/frontends/onnx/tests/CMakeLists.txt
@@ -6,13 +6,6 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
 
 ov_try_use_gold_linker()
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4244)
-    ie_add_compiler_flags(/wd4267)
-    ie_add_compiler_flags(/wd4305)
-    ie_add_compiler_flags(/wd4756)
-endif()
-
 message(STATUS "ONNX frontend test enabled")
 
 add_compile_definitions(
diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp
index 91ce448c02c06f..b49861e1806fa7 100644
--- a/src/frontends/onnx/tests/onnx_import.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import.in.cpp
@@ -469,7 +469,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_greater_or_equal_inside_if) {
     // expected value == x * y
     std::vector<float> x(40, 2);
     std::vector<float> y(40);
-    std::iota(y.begin(), y.end(), -20);
+    std::iota(y.begin(), y.end(), -20.f);
     std::vector<float> expected;
     std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float {
         return i * j;
@@ -489,21 +489,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{3, 5},
-                               {0.54881352186203,
-                                0.7151893377304077,
-                                0.6027633547782898,
-                                0.5448831915855408,
-                                0.42365479469299316,
-                                0.6458941102027893,
-                                0.4375872015953064,
-                                0.891772985458374,
-                                0.9636627435684204,
-                                0.3834415078163147,
-                                0.7917250394821167,
-                                0.5288949012756348,
-                                0.5680445432662964,
-                                0.9255966544151306,
-                                0.07103605568408966});
+                               {0.54881352186203f,
+                                0.7151893377304077f,
+                                0.6027633547782898f,
+                                0.5448831915855408f,
+                                0.42365479469299316f,
+                                0.6458941102027893f,
+                                0.4375872015953064f,
+                                0.891772985458374f,
+                                0.9636627435684204f,
+                                0.3834415078163147f,
+                                0.7917250394821167f,
+                                0.5288949012756348f,
+                                0.5680445432662964f,
+                                0.9255966544151306f,
+                                0.07103605568408966f});
     test_case.add_input<int64_t>(Shape{3}, {1, 4, 3});
     test_case.add_expected_output<int32_t>(Shape{}, {1});
     test_case.run();
@@ -797,27 +797,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>({-1.0, 0.0, 1.0});
-    test_case.add_expected_output<float>({0.09003058, 0.24472848, 0.66524094});
+    test_case.add_expected_output<float>({0.09003058f, 0.24472848f, 0.66524094f});
     test_case.run();
 }
 namespace {
 // common input for all Softmax 3D test cases (Shape = {3,4,5})
 // clang-format off
 const std::vector<float> SOFTMAX_INPUT = {
-    2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118,
-    0.21004745,  1.38337255,  1.19030397,  2.0940445,   -0.03551657,
-    -0.78686039, 1.992782,    0.04300319,  -0.29230777, -0.56797112,
-    -1.26732165, -0.61935399, 0.57670432,  0.92844898,  2.82469233,
-
-    0.98721677,  -0.05100663, -1.21178917, -0.17530157, 1.40051805,
-    -0.13259761, -1.14313018, 0.2673723,   -0.87996154, 1.29053106,
-    1.55,        0.8396538,   1.20729817,  0.23727845,  -0.89113606,
-    -1.70909842, 0.26460363,  -0.70566808, 2.383518,    1.07024615,
-
-    -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,
-    0.75425957,  -2.43721014, -1.24478184, 2.65316853,  1.19509542,
-    -0.95523998, 0.5149006,   -0.01151649, 0.68327026,  -0.4589638,
-    -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
+    2.75793882f,  -0.50841322f, 0.82013929f,  -0.62409912f, -0.96136118f,
+    0.21004745f,  1.38337255f,  1.19030397f,  2.0940445f,   -0.03551657f,
+    -0.78686039f, 1.992782f,    0.04300319f,  -0.29230777f, -0.56797112f,
+    -1.26732165f, -0.61935399f, 0.57670432f,  0.92844898f,  2.82469233f,
+
+    0.98721677f,  -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f,
+    -0.13259761f, -1.14313018f, 0.2673723f,   -0.87996154f, 1.29053106f,
+    1.55f,        0.8396538f,   1.20729817f,  0.23727845f,  -0.89113606f,
+    -1.70909842f, 0.26460363f,  -0.70566808f, 2.383518f,    1.07024615f,
+
+    -1.21722605f, 0.82919357f,  0.55765697f,  0.12657686f,  0.63432172f,
+    0.75425957f,  -2.43721014f, -1.24478184f, 2.65316853f,  1.19509542f,
+    -0.95523998f, 0.5149006f,   -0.01151649f, 0.68327026f,  -0.4589638f,
+    -0.46554745f, 0.21055324f,  0.39266729f,  2.05098086f,  1.83207919f};
 }  // namespace
 // clang-format on
 
@@ -831,20 +831,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) {
     // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823,
-         0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694,
-         0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007,
-         0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497,
-
-         0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679,
-         0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216,
-         0.02893419, 0.0142204,  0.02053893, 0.00778581, 0.00251907,
-         0.00111174, 0.00800149, 0.0030324,  0.06658917, 0.0179084,
-
-         0.00181811, 0.01407243, 0.01072611, 0.0069699,  0.01158077,
-         0.01305647, 0.00053677, 0.0017687,  0.08719896, 0.02028982,
-         0.00236265, 0.01027717, 0.0060709,  0.01216173, 0.00388087,
-         0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337});
+        {0.09683057f, 0.00369363f, 0.01394559f, 0.00329012f, 0.00234823f,
+         0.00757665f, 0.02449322f, 0.02019284f, 0.04985249f, 0.00592694f,
+         0.00279593f, 0.04505148f, 0.00641108f, 0.00458466f, 0.00348007f,
+         0.00172928f, 0.00330577f, 0.01093237f, 0.01554086f, 0.10351497f,
+
+         0.01648154f, 0.00583583f, 0.00182802f, 0.00515374f, 0.02491679f,
+         0.00537859f, 0.00195794f, 0.00802367f, 0.00254737f, 0.0223216f,
+         0.02893419f, 0.0142204f,  0.02053893f, 0.00778581f, 0.00251907f,
+         0.00111174f, 0.00800149f, 0.0030324f,  0.06658917f, 0.0179084f,
+
+         0.00181811f, 0.01407243f, 0.01072611f, 0.0069699f,  0.01158077f,
+         0.01305647f, 0.00053677f, 0.0017687f,  0.08719896f, 0.02028982f,
+         0.00236265f, 0.01027717f, 0.0060709f,  0.01216173f, 0.00388087f,
+         0.00385541f, 0.00758048f, 0.00909469f, 0.04775123f, 0.03836337f});
     // clang-format on
 
     test_case.run(6);
@@ -860,20 +860,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) {
     // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188,
-         0.0178066,  0.05756383, 0.04745709, 0.11716303, 0.01392945,
-         0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884,
-         0.00406413, 0.00776921, 0.0256932,  0.03652405, 0.24328028,
-
-         0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488,
-         0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492,
-         0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028,
-         0.00419387, 0.03018442, 0.01143929, 0.2511977,  0.06755678,
-
-         0.00587593, 0.04548053, 0.0346656,  0.02252594, 0.03742775,
-         0.04219705, 0.00173478, 0.00571623, 0.2818174,  0.06557446,
-         0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255,
-         0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617});
+        {0.22757064f, 0.00868076f, 0.03277484f, 0.00773243f, 0.0055188f,
+         0.0178066f,  0.05756383f, 0.04745709f, 0.11716303f, 0.01392945f,
+         0.00657097f, 0.10587974f, 0.01506727f, 0.01077484f, 0.00817884f,
+         0.00406413f, 0.00776921f, 0.0256932f,  0.03652405f, 0.24328028f,
+
+         0.06217413f, 0.02201481f, 0.00689594f, 0.01944171f, 0.09399488f,
+         0.02028993f, 0.00738604f, 0.03026811f, 0.00960958f, 0.08420492f,
+         0.10914991f, 0.05364435f, 0.07748005f, 0.02937079f, 0.0095028f,
+         0.00419387f, 0.03018442f, 0.01143929f, 0.2511977f,  0.06755678f,
+
+         0.00587593f, 0.04548053f, 0.0346656f,  0.02252594f, 0.03742775f,
+         0.04219705f, 0.00173478f, 0.00571623f, 0.2818174f,  0.06557446f,
+         0.00763582f, 0.03321466f, 0.01962049f, 0.03930537f, 0.01254255f,
+         0.01246025f, 0.02449929f, 0.02939305f, 0.15432668f, 0.12398617f});
     // clang-format on
 
     test_case.run(4);
@@ -890,20 +890,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) {
     // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
-         0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
-         0.02566661, 0.5885689,  0.12453075, 0.06257374, 0.03019055,
-         0.01587475, 0.0431878,  0.21235381, 0.21210944, 0.89802015,
-
-         0.31752626, 0.19442629, 0.0546935,  0.06279221, 0.36823282,
-         0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
-         0.55743381, 0.473766,   0.61451431, 0.09486084, 0.03722801,
-         0.02141829, 0.26657706, 0.090728,   0.81131024, 0.26465935,
-
-         0.08619648, 0.43343993, 0.3877785,  0.04523505, 0.15625437,
-         0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
-         0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
-         0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
+        {0.88890495f, 0.04825497f, 0.27088348f, 0.04490523f, 0.02037154f,
+         0.06955369f, 0.31998834f, 0.39223197f, 0.68041159f, 0.05141776f,
+         0.02566661f, 0.5885689f,  0.12453075f, 0.06257374f, 0.03019055f,
+         0.01587475f, 0.0431878f,  0.21235381f, 0.21210944f, 0.89802015f,
+
+         0.31752626f, 0.19442629f, 0.0546935f,  0.06279221f, 0.36823282f,
+         0.10362164f, 0.06523066f, 0.24006419f, 0.03103672f, 0.32987983f,
+         0.55743381f, 0.473766f,   0.61451431f, 0.09486084f, 0.03722801f,
+         0.02141829f, 0.26657706f, 0.090728f,   0.81131024f, 0.26465935f,
+
+         0.08619648f, 0.43343993f, 0.3877785f,  0.04523505f, 0.15625437f,
+         0.61900597f, 0.01653285f, 0.06394322f, 0.56592636f, 0.27376196f,
+         0.11201305f, 0.31654337f, 0.21947994f, 0.07893034f, 0.05236297f,
+         0.18278451f, 0.23348385f, 0.32879834f, 0.30990825f, 0.5176207f});
     // clang-format on
 
     test_case.run(4);
@@ -920,20 +920,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) {
     // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.80619484, 0.03075256, 0.1161086,  0.027393,   0.01955098,
-         0.07012683, 0.22670066, 0.18689778, 0.4614171,  0.05485764,
-         0.04486171, 0.7228683,  0.10286818, 0.07356264, 0.05583908,
-         0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555,
-
-         0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844,
-         0.13369875, 0.04866969, 0.19944906, 0.0633215,  0.554861,
-         0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216,
-         0.01150354, 0.08279411, 0.03137731, 0.6890207,  0.18530433,
-
-         0.0402528,  0.31156224, 0.23747502, 0.15431291, 0.25639707,
-         0.10627912, 0.00436928, 0.01439711, 0.7097961,  0.16515835,
-         0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911,
-         0.03615172, 0.07108136, 0.08527993, 0.4477579,  0.35972902});
+        {0.80619484f, 0.03075256f, 0.1161086f,  0.027393f,   0.01955098f,
+         0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f,  0.05485764f,
+         0.04486171f, 0.7228683f,  0.10286818f, 0.07356264f, 0.05583908f,
+         0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f,
+
+         0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f,
+         0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f,  0.554861f,
+         0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f,
+         0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f,  0.18530433f,
+
+         0.0402528f,  0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f,
+         0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f,  0.16515835f,
+         0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f,
+         0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f,  0.35972902f});
     // clang-format on
 
     test_case.run(6);
@@ -950,20 +950,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) {
     // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.80619484, 0.03075256, 0.1161086,  0.027393,   0.01955098,
-         0.07012683, 0.22670066, 0.18689778, 0.4614171,  0.05485764,
-         0.04486171, 0.7228683,  0.10286818, 0.07356264, 0.05583908,
-         0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555,
-
-         0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844,
-         0.13369875, 0.04866969, 0.19944906, 0.0633215,  0.554861,
-         0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216,
-         0.01150354, 0.08279411, 0.03137731, 0.6890207,  0.18530433,
-
-         0.0402528,  0.31156224, 0.23747502, 0.15431291, 0.25639707,
-         0.10627912, 0.00436928, 0.01439711, 0.7097961,  0.16515835,
-         0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911,
-         0.03615172, 0.07108136, 0.08527993, 0.4477579,  0.35972902});
+        {0.80619484f, 0.03075256f, 0.1161086f,  0.027393f,   0.01955098f,
+         0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f,  0.05485764f,
+         0.04486171f, 0.7228683f,  0.10286818f, 0.07356264f, 0.05583908f,
+         0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f,
+
+         0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f,
+         0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f,  0.554861f,
+         0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f,
+         0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f,  0.18530433f,
+
+         0.0402528f,  0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f,
+         0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f,  0.16515835f,
+         0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f,
+         0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f,  0.35972902f});
     // clang-format on
 
     test_case.run(6);
@@ -1786,7 +1786,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_cubic_half_pixel) {
     test_case.add_input<float>(input_data);
     test_case.add_expected_output<float>(
         expected_output_shape,
-        {1.6307871, 3.0046299, 4.3784733, 7.1261587, 8.5, 9.873844, 12.621532, 13.995373, 15.369216});
+        {1.6307871f, 3.0046299f, 4.3784733f, 7.1261587f, 8.5f, 9.873844f, 12.621532f, 13.995373f, 15.369216f});
 
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
@@ -1848,18 +1848,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel) {
     test_case.add_input<float>(input_data);
     test_case.add_expected_output<float>(
         expected_output_shape,
-        {0.45507922f,  0.64057922f,  0.97157922f,  1.42257922f,  1.90732922,   2.22332922f,  2.70807922f,  3.15907922f,
-         3.49007922f,  3.67557922,   1.39437963f,  1.57987963f,  1.91087963f,  2.36187963f,  2.84662963,   3.16262963f,
-         3.64737963f,  4.09837963f,  4.42937963f,  4.61487963,   2.95130693f,  3.13680693f,  3.46780693f,  3.91880693f,
-         4.40355693,   4.71955693f,  5.20430693f,  5.65530693f,  5.98630693f,  6.17180693,   5.20525069f,  5.39075069f,
-         5.72175069f,  6.17275069f,  6.65750069,   6.97350069f,  7.45825069f,  7.90925069f,  8.24025069f,  8.42575069,
-         6.88975f,     7.07525f,     7.40625f,     7.85725f,     8.342,        8.658f,       9.14275f,     9.59375f,
-         9.92475f,     10.11025f,    8.57424931f,  8.75974931f,  9.09074931f,  9.54174931f,  10.02649931,  10.34249931f,
-         10.82724931f, 11.27824931f, 11.60924931f, 11.79474931,  10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f,
-         12.28044307,  12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307,  12.38512037f, 12.57062037f,
-         12.90162037f, 13.35262037f, 13.83737037,  14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037,
-         13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078,  15.09267078f, 15.57742078f, 16.02842078f,
-         16.35942078f, 16.54492078});
+        {0.45507922f,  0.64057922f,  0.97157922f,  1.42257922f,  1.90732922f,  2.22332922f,  2.70807922f,  3.15907922f,
+         3.49007922f,  3.67557922f,  1.39437963f,  1.57987963f,  1.91087963f,  2.36187963f,  2.84662963f,  3.16262963f,
+         3.64737963f,  4.09837963f,  4.42937963f,  4.61487963f,  2.95130693f,  3.13680693f,  3.46780693f,  3.91880693f,
+         4.40355693f,  4.71955693f,  5.20430693f,  5.65530693f,  5.98630693f,  6.17180693f,  5.20525069f,  5.39075069f,
+         5.72175069f,  6.17275069f,  6.65750069f,  6.97350069f,  7.45825069f,  7.90925069f,  8.24025069f,  8.42575069f,
+         6.88975f,     7.07525f,     7.40625f,     7.85725f,     8.342f,       8.658f,       9.14275f,     9.59375f,
+         9.92475f,     10.11025f,    8.57424931f,  8.75974931f,  9.09074931f,  9.54174931f,  10.02649931f, 10.34249931f,
+         10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f,
+         12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f,
+         12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f,
+         13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f,
+         16.35942078f, 16.54492078f});
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
 
@@ -1891,18 +1891,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel_dynamic_siz
     test_case.add_input<float>(std::vector<float>{1, 1, 9, 10});  // sizes
     test_case.add_expected_output<float>(
         expected_output_shape,
-        {0.45507922f,  0.64057922f,  0.97157922f,  1.42257922f,  1.90732922,   2.22332922f,  2.70807922f,  3.15907922f,
-         3.49007922f,  3.67557922,   1.39437963f,  1.57987963f,  1.91087963f,  2.36187963f,  2.84662963,   3.16262963f,
-         3.64737963f,  4.09837963f,  4.42937963f,  4.61487963,   2.95130693f,  3.13680693f,  3.46780693f,  3.91880693f,
-         4.40355693,   4.71955693f,  5.20430693f,  5.65530693f,  5.98630693f,  6.17180693,   5.20525069f,  5.39075069f,
-         5.72175069f,  6.17275069f,  6.65750069,   6.97350069f,  7.45825069f,  7.90925069f,  8.24025069f,  8.42575069,
-         6.88975f,     7.07525f,     7.40625f,     7.85725f,     8.342,        8.658f,       9.14275f,     9.59375f,
-         9.92475f,     10.11025f,    8.57424931f,  8.75974931f,  9.09074931f,  9.54174931f,  10.02649931,  10.34249931f,
-         10.82724931f, 11.27824931f, 11.60924931f, 11.79474931,  10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f,
-         12.28044307,  12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307,  12.38512037f, 12.57062037f,
-         12.90162037f, 13.35262037f, 13.83737037,  14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037,
-         13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078,  15.09267078f, 15.57742078f, 16.02842078f,
-         16.35942078f, 16.54492078});
+        {0.45507922f,  0.64057922f,  0.97157922f,  1.42257922f,  1.90732922f,  2.22332922f,  2.70807922f,  3.15907922f,
+         3.49007922f,  3.67557922f,  1.39437963f,  1.57987963f,  1.91087963f,  2.36187963f,  2.84662963f,  3.16262963f,
+         3.64737963f,  4.09837963f,  4.42937963f,  4.61487963f,  2.95130693f,  3.13680693f,  3.46780693f,  3.91880693f,
+         4.40355693f,  4.71955693f,  5.20430693f,  5.65530693f,  5.98630693f,  6.17180693f,  5.20525069f,  5.39075069f,
+         5.72175069f,  6.17275069f,  6.65750069f,  6.97350069f,  7.45825069f,  7.90925069f,  8.24025069f,  8.42575069f,
+         6.88975f,     7.07525f,     7.40625f,     7.85725f,     8.342f,       8.658f,       9.14275f,     9.59375f,
+         9.92475f,     10.11025f,    8.57424931f,  8.75974931f,  9.09074931f,  9.54174931f,  10.02649931f, 10.34249931f,
+         10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f,
+         12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f,
+         12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f,
+         13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f,
+         16.35942078f, 16.54492078f});
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
 
@@ -2177,7 +2177,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise) {
 
     // Shape{2, 3, 4, 5}
     std::vector<float> slope(shape_size(Shape{2, 3, 4, 5}));
-    std::iota(std::begin(slope), std::end(slope), 0);
+    std::iota(std::begin(slope), std::end(slope), 0.f);
     inputs.emplace_back(slope);
 
     // Shape{2, 3, 4, 5}
@@ -2426,19 +2426,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus) {
                                      -FLT_MAX}};
 
     const auto inf = std::numeric_limits<float>::infinity();
-    std::vector<float> output{0.3132616579532623291,
-                              0.6931471824645996094,
-                              1.313261628150939941,
-                              10.0000457763671875,
-                              100.0,
-                              0.0,
-                              1000.0,
-                              0.0,
-                              0.6931471824645996094,
-                              0.6931471824645996094,
-                              0.6931471824645996094,
+    std::vector<float> output{0.3132616579532623291f,
+                              0.6931471824645996094f,
+                              1.313261628150939941f,
+                              10.0000457763671875f,
+                              100.0f,
+                              0.0f,
+                              1000.0f,
+                              0.0f,
+                              0.6931471824645996094f,
+                              0.6931471824645996094f,
+                              0.6931471824645996094f,
                               inf,
-                              0.0};
+                              0.0f};
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_multiple_inputs(inputs);
@@ -2507,7 +2507,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_float) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmax_float.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0});
+    test_case.add_input<float>({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f});
     test_case.add_expected_output<std::int64_t>({0, 3, 0});
     test_case.run();
 }
@@ -2517,7 +2517,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_float) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmin_float.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0});
+    test_case.add_input<float>({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f});
     test_case.add_expected_output<std::int64_t>({1, 1, 0, 2});
     test_case.run();
 }
@@ -2528,7 +2528,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_select_last_index) {
                                                                         "onnx/argmax_select_last_index.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{4, 3}, {1, 1, 1, 0.5, 3, 4, 0.5, 1, 1.1, 0, 3, 0});
+    test_case.add_input<float>(Shape{4, 3}, {1.f, 1.f, 1.f, 0.5f, 3.f, 4.f, 0.5f, 1.f, 1.1f, 0.f, 3.f, 0.f});
     test_case.add_expected_output<std::int64_t>(Shape{1, 3}, {0, 3, 1});
     test_case.run();
 }
@@ -2539,7 +2539,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_select_last_index) {
                                                                         "onnx/argmin_select_last_index.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{4, 3}, {1, 1, 1, 2, 3, 4, 2, 1, 1.1, 3, 3, 8});
+    test_case.add_input<float>(Shape{4, 3}, {1.f, 1.f, 1.f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.1f, 3.f, 3.f, 8.f});
     test_case.add_expected_output<std::int64_t>(Shape{4}, {2, 0, 1, 1});
     test_case.run();
 }
@@ -2736,10 +2736,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_with_axis) {
     auto function = onnx_import::import_onnx_model(
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/one_hot_axis.onnx"));
 
-    Inputs inputs{{1.0, 9.0, 2.0, 4.0}, {1.0, 3.0}};
-    std::vector<float> expected_output{{1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                        1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0,
-                                        1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}};
+    Inputs inputs{{1.0f, 9.0f, 2.0f, 4.0f}, {1.0f, 3.0f}};
+    std::vector<float> expected_output{{1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f,
+                                        1.0f, 1.0f, 1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f,
+                                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}};
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_multiple_inputs(inputs);
@@ -2851,7 +2852,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p1) {
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(data);
@@ -2870,7 +2871,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p2) {
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(data);
@@ -2889,7 +2890,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default) {
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(data);
@@ -2909,7 +2910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default_dynamic) {
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(data_shape, data);
@@ -2928,7 +2929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization) {
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
 
@@ -2953,7 +2954,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic) {
     std::vector<float> input_data{1.f, 2.f, 3.f};
     test_case.add_input<float>(Shape{1, 3, 1, 1}, input_data);
     test_case.add_expected_output<float>(Shape{1, 3, 1, 1},
-                                         {0.3341970741748809814, 0.3321160078048706055, 0.3407136797904968262});
+                                         {0.3341970741748809814f, 0.3321160078048706055f, 0.3407136797904968262f});
     test_case.run();
 }
 
@@ -3121,9 +3122,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_f32) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mod_sign_fmod_f32.onnx"));
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<float>({-4.3, 7.2, 5.0, 4.3, -7.2, 8.0});
-    test_case.add_input<float>({2.1, -3.4, 8.0, -2.1, 3.4, 5.0});
-    test_case.add_expected_output<float>(Shape{6}, {-0.10000038, 0.39999962, 5., 0.10000038, -0.39999962, 3.});
+    test_case.add_input<float>({-4.3f, 7.2f, 5.0f, 4.3f, -7.2f, 8.0f});
+    test_case.add_input<float>({2.1f, -3.4f, 8.0f, -2.1f, 3.4f, 5.0f});
+    test_case.add_expected_output<float>(Shape{6}, {-0.10000038f, 0.39999962f, 5.f, 0.10000038f, -0.39999962f, 3.f});
 
     test_case.run();
 }
@@ -3280,22 +3281,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_float_2D_neg_indices) {
 
     // clang-format off
     test_case.add_input<float>(Shape{3, 3},
-        {   0.0, 0.1, 0.2,
-            1.0, 1.1, 1.2,
-            2.0, 2.1, 2.2   });
+        {   0.0f, 0.1f, 0.2f,
+            1.0f, 1.1f, 1.2f,
+            2.0f, 2.1f, 2.2f   });
     test_case.add_input<int64_t>(Shape{2, 2},
         {   -1, -2,
             -3, -2      });
     test_case.add_expected_output<float>(Shape{3, 2, 2},
         {
-            0.2, 0.1,
-            0.0, 0.1,
+            0.2f, 0.1f,
+            0.0f, 0.1f,
 
-            1.2, 1.1,
-            1.0, 1.1,
+            1.2f, 1.1f,
+            1.0f, 1.1f,
 
-            2.2, 2.1,
-            2.0, 2.1    });
+            2.2f, 2.1f,
+            2.0f, 2.1f    });
     // clang-format on
 
     test_case.run();
@@ -3633,10 +3634,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_nearest_infer) {
 
     const Shape expected_output_shape{1, 1, 4, 6};
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
     test_case.add_expected_output<float>(expected_output_shape,
-                                         {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0,
-                                          3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0});
+                                         {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                          3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f});
     test_case.run();
 }
 
@@ -3650,10 +3651,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_linear_infer) {
 
     const Shape expected_output_shape{1, 1, 4, 4};
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
     test_case.add_expected_output<float>(
         expected_output_shape,
-        {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0});
+        {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f});
     test_case.run();
 }
 
@@ -3669,10 +3670,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_nearest_infer) {
 
     const Shape expected_output_shape{1, 1, 4, 6};
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
     test_case.add_expected_output<float>(expected_output_shape,
-                                         {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0,
-                                          3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0});
+                                         {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                          3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f});
     test_case.run();
 }
 
@@ -3688,10 +3689,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_linear_infer) {
 
     const Shape expected_output_shape{1, 1, 4, 4};
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
     test_case.add_expected_output<float>(
         expected_output_shape,
-        {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0});
+        {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f});
     test_case.run();
 }
 
@@ -3700,8 +3701,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_image_scaler) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/image_scaler.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0});
-    test_case.add_expected_output<float>(Shape{1, 2, 2, 2}, {12.0, 14.0, 16.0, 18.0, 21.0, 41.0, 61.0, 81.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f});
+    test_case.add_expected_output<float>(Shape{1, 2, 2, 2}, {12.0f, 14.0f, 16.0f, 18.0f, 21.0f, 41.0f, 61.0f, 81.0f});
     test_case.run();
 }
 
@@ -3710,7 +3711,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_single) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_single.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
+    test_case.add_input<float>(Shape{2, 3}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
     test_case.add_expected_output<int64_t>(Shape{}, {6});
     test_case.run();
 }
@@ -3720,7 +3721,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_end) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_graph_end.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
     test_case.add_expected_output<int64_t>(Shape{}, {4});
     test_case.run();
 }
@@ -3731,8 +3732,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_middle) {
                                                                               "onnx/size_op_graph_middle.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
-    test_case.add_expected_output<float>(Shape{}, {4.0});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
+    test_case.add_expected_output<float>(Shape{}, {4.0f});
     test_case.run();
 }
 
@@ -3743,11 +3744,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_on_input_graph_middle) {
                                                             "onnx/size_op_on_input_graph_middle.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{1, 2, 4, 1, 3}, {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-                                                      0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.});
+    test_case.add_input<float>(Shape{1, 2, 4, 1, 3}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+                                                      0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
     test_case.add_expected_output<float>(Shape{1, 2, 4, 1, 3},
-                                         {24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24.,
-                                          24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24.});
+                                         {24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f,
+                                          24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f});
     test_case.run();
 }
 
@@ -3818,36 +3819,39 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_out_half_pixel) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(
-        {1.1,   2.2,   3.3,   4.4,   5.5,   6.6,   7.7,   8.8,   9.9,   11.,   12.1,  13.2,  14.3,  15.4,  16.5,  17.6,
-         18.7,  19.8,  20.9,  22.,   23.1,  24.2,  25.3,  26.4,  27.5,  28.6,  29.7,  30.8,  31.9,  33.,   34.1,  35.2,
-         36.3,  37.4,  38.5,  39.6,  40.7,  41.8,  42.9,  44.,   45.1,  46.2,  47.3,  48.4,  49.5,  50.6,  51.7,  52.8,
-         53.9,  55.,   56.1,  57.2,  58.3,  59.4,  60.5,  61.6,  62.7,  63.8,  64.9,  66.,   67.1,  68.2,  69.3,  70.4,
-         71.5,  72.6,  73.7,  74.8,  75.9,  77.,   78.1,  79.2,  80.3,  81.4,  82.5,  83.6,  84.7,  85.8,  86.9,  88.,
-         89.1,  90.2,  91.3,  92.4,  93.5,  94.6,  95.7,  96.8,  97.9,  99.,   100.1, 101.2, 102.3, 103.4, 104.5, 105.6,
-         106.7, 107.8, 108.9, 110.,  111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121.,  122.1, 123.2,
-         124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132.,  133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8,
-         141.9, 143.,  144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154.,  155.1, 156.2, 157.3, 158.4,
-         159.5, 160.6, 161.7, 162.8, 163.9, 165.,  166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176.,
-         177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187.,  188.1, 189.2, 190.3, 191.4, 192.5, 193.6,
-         194.7, 195.8, 196.9, 198.,  199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209.,  210.1, 211.2,
-         212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220.,  221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8,
-         229.9, 231.,  232.1, 233.2, 234.3, 235.4, 236.5, 237.6});
-
-    test_case.add_input<float>({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3});
+        {1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,   11.f,   12.1f,  13.2f,  14.3f,  15.4f,
+         16.5f,  17.6f,  18.7f,  19.8f,  20.9f,  22.f,   23.1f,  24.2f,  25.3f,  26.4f,  27.5f,  28.6f,  29.7f,  30.8f,
+         31.9f,  33.f,   34.1f,  35.2f,  36.3f,  37.4f,  38.5f,  39.6f,  40.7f,  41.8f,  42.9f,  44.f,   45.1f,  46.2f,
+         47.3f,  48.4f,  49.5f,  50.6f,  51.7f,  52.8f,  53.9f,  55.f,   56.1f,  57.2f,  58.3f,  59.4f,  60.5f,  61.6f,
+         62.7f,  63.8f,  64.9f,  66.f,   67.1f,  68.2f,  69.3f,  70.4f,  71.5f,  72.6f,  73.7f,  74.8f,  75.9f,  77.f,
+         78.1f,  79.2f,  80.3f,  81.4f,  82.5f,  83.6f,  84.7f,  85.8f,  86.9f,  88.f,   89.1f,  90.2f,  91.3f,  92.4f,
+         93.5f,  94.6f,  95.7f,  96.8f,  97.9f,  99.f,   100.1f, 101.2f, 102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f,
+         108.9f, 110.f,  111.1f, 112.2f, 113.3f, 114.4f, 115.5f, 116.6f, 117.7f, 118.8f, 119.9f, 121.f,  122.1f, 123.2f,
+         124.3f, 125.4f, 126.5f, 127.6f, 128.7f, 129.8f, 130.9f, 132.f,  133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f,
+         139.7f, 140.8f, 141.9f, 143.f,  144.1f, 145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f,
+         155.1f, 156.2f, 157.3f, 158.4f, 159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f,  166.1f, 167.2f, 168.3f, 169.4f,
+         170.5f, 171.6f, 172.7f, 173.8f, 174.9f, 176.f,  177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f,
+         185.9f, 187.f,  188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f,  199.1f, 200.2f,
+         201.3f, 202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f,  210.1f, 211.2f, 212.3f, 213.4f, 214.5f, 215.6f,
+         216.7f, 217.8f, 218.9f, 220.f,  221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f, 229.9f, 231.f,
+         232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f});
+
+    test_case.add_input<float>({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f});
 
     test_case.add_input<int64_t>({0, 2, 1});
     test_case.add_expected_output<float>(
         Shape{3, 2, 4, 4},
-        {2.145,     2.42,      2.6950002, 2.9700003, 3.96,      4.235,    4.51,      4.7850003, 5.775,     6.05,
-         6.325,     6.6000004, 7.59,      7.8650007, 8.14,      8.415001, 41.745003, 42.019997, 42.295,    42.57,
-         43.56,     43.835,    44.11,     44.385002, 45.375,    45.65,    45.925003, 46.200005, 47.190002, 47.465004,
-         47.74,     48.015,    162.77249, 163.0475,  163.32251, 163.5975, 164.42252, 164.69751, 164.9725,  165.2475,
-         166.07251, 166.3475,  166.6225,  166.8975,  167.72249, 167.9975, 168.27249, 168.5475,  202.3725,  202.6475,
-         202.9225,  203.19751, 204.02252, 204.2975,  204.57251, 204.8475, 205.6725,  205.94751, 206.2225,  206.4975,
-         207.32251, 207.5975,  207.8725,  208.1475,  91.162506, 91.4375,  91.7125,   91.9875,   92.8125,   93.0875,
-         93.3625,   93.6375,   94.4625,   94.7375,   95.0125,   95.28749, 96.1125,   96.3875,   96.6625,   96.9375,
-         130.76251, 131.0375,  131.3125,  131.5875,  132.4125,  132.6875, 132.9625,  133.2375,  134.0625,  134.33751,
-         134.6125,  134.88751, 135.7125,  135.9875,  136.26251, 136.53749});
+        {2.145f,     2.42f,      2.6950002f, 2.9700003f, 3.96f,      4.235f,     4.51f,      4.7850003f, 5.775f,
+         6.05f,      6.325f,     6.6000004f, 7.59f,      7.8650007f, 8.14f,      8.415001f,  41.745003f, 42.019997f,
+         42.295f,    42.57f,     43.56f,     43.835f,    44.11f,     44.385002f, 45.375f,    45.65f,     45.925003f,
+         46.200005f, 47.190002f, 47.465004f, 47.74f,     48.015f,    162.77249f, 163.0475f,  163.32251f, 163.5975f,
+         164.42252f, 164.69751f, 164.9725f,  165.2475f,  166.07251f, 166.3475f,  166.6225f,  166.8975f,  167.72249f,
+         167.9975f,  168.27249f, 168.5475f,  202.3725f,  202.6475f,  202.9225f,  203.19751f, 204.02252f, 204.2975f,
+         204.57251f, 204.8475f,  205.6725f,  205.94751f, 206.2225f,  206.4975f,  207.32251f, 207.5975f,  207.8725f,
+         208.1475f,  91.162506f, 91.4375f,   91.7125f,   91.9875f,   92.8125f,   93.0875f,   93.3625f,   93.6375f,
+         94.4625f,   94.7375f,   95.0125f,   95.28749f,  96.1125f,   96.3875f,   96.6625f,   96.9375f,   130.76251f,
+         131.0375f,  131.3125f,  131.5875f,  132.4125f,  132.6875f,  132.9625f,  133.2375f,  134.0625f,  134.33751f,
+         134.6125f,  134.88751f, 135.7125f,  135.9875f,  136.26251f, 136.53749f});
     test_case.run();
 }
 
@@ -3858,36 +3862,40 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_half_pixel) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(
-        {1.1,   2.2,   3.3,   4.4,   5.5,   6.6,   7.7,   8.8,   9.9,   11.,   12.1,  13.2,  14.3,  15.4,  16.5,  17.6,
-         18.7,  19.8,  20.9,  22.,   23.1,  24.2,  25.3,  26.4,  27.5,  28.6,  29.7,  30.8,  31.9,  33.,   34.1,  35.2,
-         36.3,  37.4,  38.5,  39.6,  40.7,  41.8,  42.9,  44.,   45.1,  46.2,  47.3,  48.4,  49.5,  50.6,  51.7,  52.8,
-         53.9,  55.,   56.1,  57.2,  58.3,  59.4,  60.5,  61.6,  62.7,  63.8,  64.9,  66.,   67.1,  68.2,  69.3,  70.4,
-         71.5,  72.6,  73.7,  74.8,  75.9,  77.,   78.1,  79.2,  80.3,  81.4,  82.5,  83.6,  84.7,  85.8,  86.9,  88.,
-         89.1,  90.2,  91.3,  92.4,  93.5,  94.6,  95.7,  96.8,  97.9,  99.,   100.1, 101.2, 102.3, 103.4, 104.5, 105.6,
-         106.7, 107.8, 108.9, 110.,  111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121.,  122.1, 123.2,
-         124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132.,  133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8,
-         141.9, 143.,  144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154.,  155.1, 156.2, 157.3, 158.4,
-         159.5, 160.6, 161.7, 162.8, 163.9, 165.,  166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176.,
-         177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187.,  188.1, 189.2, 190.3, 191.4, 192.5, 193.6,
-         194.7, 195.8, 196.9, 198.,  199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209.,  210.1, 211.2,
-         212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220.,  221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8,
-         229.9, 231.,  232.1, 233.2, 234.3, 235.4, 236.5, 237.6});
-
-    test_case.add_input<float>({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3});
+        {1.1f,     2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,   11.f,   12.1f,  13.2f,  14.3f,
+         15.4f,    16.5f,  17.6f,  18.7f,  19.8f,  20.9f,  22.f,   23.1f,  24.2f,  25.3f,  26.4f,  27.5f,  28.6f,
+         29.7f,    30.8f,  31.9f,  33.f,   34.1f,  35.2f,  36.3f,  37.4f,  38.5f,  39.6f,  40.7f,  41.8f,  42.9f,
+         44.f,     45.1f,  46.2f,  47.3f,  48.4f,  49.5f,  50.6f,  51.7f,  52.8f,  53.9f,  55.f,   56.1f,  57.2f,
+         58.3f,    59.4f,  60.5f,  61.6f,  62.7f,  63.8f,  64.9f,  66.f,   67.1f,  68.2f,  69.3f,  70.4f,  71.5f,
+         72.6f,    73.7f,  74.8f,  75.9f,  77.f,   78.1f,  79.2f,  80.3f,  81.4f,  82.5f,  83.6f,  84.7f,  85.8f,
+         86.9f,    88.f,   89.1f,  90.2f,  91.3f,  92.4f,  93.5f,  94.6f,  95.7f,  96.8f,  97.9f,  99.f,   100.1f,
+         101.2f,   102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f, 108.9f, 110.f,  111.1f, 112.2f, 113.3f, 114.4f,
+         115.5f,   116.6f, 117.7f, 118.8f, 119.9f, 121.f,  122.1f, 123.2f, 124.3f, 125.4f, 126.5f, 127.6f, 128.7f,
+         129.8f,   130.9f, 132.f,  133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f, 139.7f, 140.8f, 141.9f, 143.f,
+         144.1f,   145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f,  155.1f, 156.2f, 157.3f,
+         158.4f,   159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f,  166.1f, 167.2f, 168.3f, 169.4f, 170.5f, 171.6f,
+         172.7f,   173.8f, 174.9f, 176.f,  177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f, 185.9f,
+         187.198f, 188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f,  199.1f, 200.2f,
+         201.3f,   202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f,  210.1f, 211.2f, 212.3f, 213.4f, 214.5f,
+         215.6f,   216.7f, 217.8f, 218.9f, 220.f,  221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f,
+         229.9f,   231.f,  232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f});
+
+    test_case.add_input<float>({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f});
 
     test_case.add_input<int64_t>({0, 2, 1});
     test_case.add_expected_output<float>(
         Shape{3, 2, 4, 4},
-        {1.1,       1.1,       1.1,       1.1,       1.1,       1.1,       1.1,       1.1,       2.3375,    2.3375,
-         2.3375,    2.3375,    4.1525,    4.1525,    4.1525,    4.1525,    40.7,      40.7,      40.7,      40.7,
-         40.7,      40.7,      40.7,      40.7,      41.9375,   41.9375,   41.9375,   41.9375,   43.7525,   43.7525,
-         43.7525,   43.7525,   159.72,    159.94,    160.16,    160.38,    159.90562, 160.12563, 160.34563, 160.56563,
-         160.9575,  161.1775,  161.3975,  161.61751, 162.1125,  162.3325,  162.55249, 162.77249, 199.32,    199.54001,
-         199.76001, 199.97998, 199.50562, 199.72563, 199.94562, 200.16562, 200.5575,  200.7775,  200.9975,  201.2175,
-         201.7125,  201.93251, 202.1525,  202.37251, 86.9,      86.9,      86.9,      86.9,      86.9,      86.9,
-         86.9,      86.9,      86.9,      86.9,      86.9,      86.9,      86.9,      86.9,      86.9,      86.9,
-         126.5,     126.5,     126.5,     126.5,     126.5,     126.5,     126.5,     126.5,     126.5,     126.5,
-         126.5,     126.5,     126.5,     126.5,     126.5,     126.5});
+        {1.1f,       1.1f,       1.1f,       1.1f,       1.1f,       1.1f,       1.1f,       1.1f,       2.3375f,
+         2.3375f,    2.3375f,    2.3375f,    4.1525f,    4.1525f,    4.1525f,    4.1525f,    40.7f,      40.7f,
+         40.7f,      40.7f,      40.7f,      40.7f,      40.7f,      40.7f,      41.9375f,   41.9375f,   41.9375f,
+         41.9375f,   43.7525f,   43.7525f,   43.7525f,   43.7525f,   159.72f,    159.94f,    160.16f,    160.38f,
+         159.90562f, 160.12563f, 160.34563f, 160.56563f, 160.9575f,  161.1775f,  161.3975f,  161.61751f, 162.1125f,
+         162.3325f,  162.55249f, 162.77249f, 199.32f,    199.54001f, 199.76001f, 199.97998f, 199.50562f, 199.72563f,
+         199.94562f, 200.16562f, 200.5575f,  200.7775f,  200.9975f,  201.2175f,  201.7125f,  201.93251f, 202.1525f,
+         202.37251f, 86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,
+         86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      86.9f,      126.5f,
+         126.5f,     126.5f,     126.5f,     126.5f,     126.5f,     126.5f,     126.5f,     126.5f,     126.5f,
+         126.5f,     126.5f,     126.5f,     126.5f,     126.5f,     126.5f});
     test_case.run_with_tolerance_as_fp(0.01f);
 }
 
@@ -3898,9 +3906,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern) {
     auto test_case = test::TestCase(function, s_device);
     // scale == 3.0
     // zero point == 10
-    test_case.add_input<float>({9.0, 10.0, 15.0, 20.0, 30.0});
-    test_case.add_input<float>({1});
-    test_case.add_expected_output<float>(Shape{5}, {9.0, 9.0, 15.0, 21.0, 30.0});
+    test_case.add_input<float>({9.0f, 10.0f, 15.0f, 20.0f, 30.0f});
+    test_case.add_input<float>({1.f});
+    test_case.add_expected_output<float>(Shape{5}, {9.0f, 9.0f, 15.0f, 21.0f, 30.0f});
     test_case.run();
 }
 
@@ -3912,9 +3920,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern_axis) {
     // axis = 1
     // scale == {2.0, 3.0, 4.0}
     // zero point == {10, 20, 30}
-    test_case.add_input<float>({1.0, 2.0, 3.0, 10.0, 20.0, 30.0, 40.0, 50.0, 100.0});
-    test_case.add_expected_output<float>(Shape{3, 3}, {0, 3, 4, 10, 21, 32, 40, 51, 100});
-    test_case.add_input<float>({1});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 100.0f});
+    test_case.add_expected_output<float>(Shape{3, 3}, {0.f, 3.f, 4.f, 10.f, 21.f, 32.f, 40.f, 51.f, 100.f});
+    test_case.add_input<float>({1.f});
     test_case.run();
 }
 
@@ -3923,8 +3931,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_0D) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_0D.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({3.141592});
-    test_case.add_expected_output<float>({0.0});
+    test_case.add_input<float>({3.141592f});
+    test_case.add_expected_output<float>({0.0f});
     test_case.run();
 }
 
@@ -3934,7 +3942,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_1D) {
     auto test_case = test::TestCase(function, s_device);
 
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
-    test_case.add_expected_output<float>(Shape{3}, {-2.4076061, -1.407606, -0.407606});
+    test_case.add_expected_output<float>(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f});
     test_case.run();
 }
 
@@ -3944,7 +3952,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_1D) {
     auto test_case = test::TestCase(function, s_device);
 
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
-    test_case.add_expected_output<float>(Shape{3}, {-2.4076061, -1.407606, -0.407606});
+    test_case.add_expected_output<float>(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f});
     test_case.run();
 }
 
@@ -3953,10 +3961,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/logsoftmax13_2D.onnx"));
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<float>({0.0f, 1.0f, 2.0f, 3.0f, 10000, 10001, 10002, 10003});
+    test_case.add_input<float>({0.0f, 1.0f, 2.0f, 3.0f, 10000.f, 10001.f, 10002.f, 10003.f});
     test_case.add_expected_output<float>(
         Shape{2, 4},
-        {-3.4401896, -2.4401896, -1.4401896, -0.44018966, -3.4401896, -2.4401896, -1.4401896, -0.44018966});
+        {-3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f, -3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f});
     test_case.run_with_tolerance_as_fp();
 }
 
@@ -4004,7 +4012,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axis_1) {
 
     Shape shape{1, 3, 2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(
@@ -4036,7 +4044,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_no_axis) {
 
     Shape shape{2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f});
     test_case.add_expected_output<float>(shape, {3.0f, 6.0f, 9.0f, 12.0f});
@@ -4061,7 +4069,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7_broadcast) {
 
     Shape shape{1, 2, 3};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(shape, {3.0f, 8.0f, 15.0f, 12.0f, 20.0f, 30.0f});
@@ -4076,7 +4084,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axis_1) {
 
     Shape shape{1, 3, 2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(
@@ -4107,7 +4115,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_no_axis) {
 
     Shape shape{2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f});
     test_case.add_expected_output<float>(shape, {4.0f, 5.0f, 6.0f, 7.0f});
@@ -4133,7 +4141,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axis_1) {
 
     Shape shape{1, 3, 2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(shape,
@@ -4164,7 +4172,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_no_axis) {
 
     Shape shape{2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f});
     test_case.add_expected_output<float>(shape, {-2.0f, -1.0f, 0.0f, 1.0f});
@@ -4189,7 +4197,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7_broadcast) {
 
     Shape shape{1, 2, 3};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(shape, {-2.0f, -2.0f, -2.0f, 1.0f, 1.0f, 1.0f});
@@ -4204,7 +4212,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axis_1) {
 
     Shape shape{1, 3, 2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(
@@ -4237,7 +4245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_no_axis) {
 
     Shape shape{2, 2};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({2.0f});
     test_case.add_expected_output<float>(shape, {0.5f, 1.0f, 1.5f, 2.0f});
@@ -4262,7 +4270,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7_broadcast) {
 
     Shape shape{1, 2, 3};
     std::vector<float> A(shape_size(shape));
-    std::iota(A.begin(), A.end(), 1);
+    std::iota(A.begin(), A.end(), 1.f);
     test_case.add_input<float>(A);
     test_case.add_input<float>({3.0f, 4.0f, 5.0f});
     test_case.add_expected_output<float>(shape, {0.3333333f, 0.5f, 0.6f, 1.3333333f, 1.25f, 1.2f});
@@ -4299,7 +4307,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max) {
                                                                         "onnx/clip_no_min_no_max.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
 
     test_case.add_input<float>(data);
 
@@ -4315,12 +4323,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) {
     auto test_case = test::TestCase(function, s_device);
     const std::vector<float> data{std::numeric_limits<float>::infinity(),
                                   -std::numeric_limits<float>::infinity(),
-                                  static_cast<float>(std::numeric_limits<double>::max()),
+                                  static_cast<float>(std::numeric_limits<float>::max()),
                                   std::numeric_limits<float>::min(),
                                   std::numeric_limits<float>::max(),
                                   std::numeric_limits<float>::lowest(),
-                                  0,
-                                  -1};
+                                  0.f,
+                                  -1.f};
 
     const std::vector<float> expected_output{std::numeric_limits<float>::max(),
                                              std::numeric_limits<float>::lowest(),
@@ -4328,13 +4336,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) {
                                              std::numeric_limits<float>::min(),
                                              std::numeric_limits<float>::max(),
                                              std::numeric_limits<float>::lowest(),
-                                             0,
-                                             -1};
+                                             0.f,
+                                             -1.f};
 
     test_case.add_input<float>(data);
 
     test_case.add_expected_output<float>(Shape{2, 4}, expected_output);
-    test_case.run_with_tolerance_as_fp(0);
+    test_case.run_with_tolerance_as_fp(0.f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) {
@@ -4343,9 +4351,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) {
                                                                         "onnx/clip_no_min_set_max.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
-    const std::vector<float> max_val{2.01};
-    const std::vector<float> output{-1.6, -0.1, 2.01, 0., -10., 1.99, 2.01, 2.01};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
+    const std::vector<float> max_val{2.01f};
+    const std::vector<float> output{-1.6f, -0.1f, 2.01f, 0.f, -10.f, 1.99f, 2.01f, 2.01f};
 
     test_case.add_input<float>(data);
     test_case.add_input<float>(max_val);
@@ -4360,9 +4368,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max) {
                                                                         "onnx/clip_set_min_no_max.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
-    const std::vector<float> min_val{-1.59};
-    const std::vector<float> output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
+    const std::vector<float> min_val{-1.59f};
+    const std::vector<float> output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f};
 
     test_case.add_input<float>(data);
     test_case.add_input<float>(min_val);
@@ -4408,8 +4416,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max_initializers) {
                                                                         "onnx/clip_set_min_no_max_initializers.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
-    const std::vector<float> output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
+    const std::vector<float> output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f};
 
     test_case.add_input<float>(data);
 
@@ -4423,10 +4431,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max) {
                                                                         "onnx/clip_set_min_set_max.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
-    const std::vector<float> min_val{-1.59};
-    const std::vector<float> max_val{2.01};
-    const std::vector<float> output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
+    const std::vector<float> min_val{-1.59f};
+    const std::vector<float> max_val{2.01f};
+    const std::vector<float> output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f};
 
     test_case.add_input<float>(data);
     test_case.add_input<float>(min_val);
@@ -4442,8 +4450,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max_initializers) {
                                                                         "onnx/clip_set_min_set_max_initializers.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    const std::vector<float> data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.};
-    const std::vector<float> output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01};
+    const std::vector<float> data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f};
+    const std::vector<float> output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f};
 
     test_case.add_input<float>(data);
 
@@ -4456,16 +4464,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mvn_v6.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.8439683,  0.5665144, 0.05836735, 0.02916367, 0.12964272, 0.5060197, 0.79538304,
-                                0.9411346,  0.9546573, 0.17730942, 0.46192095, 0.26480448, 0.6746842, 0.01665257,
-                                0.62473077, 0.9240844, 0.9722341,  0.11965699, 0.41356155, 0.9129373, 0.59330076,
-                                0.81929934, 0.7862604, 0.11799799, 0.69248444, 0.54119414, 0.07513223});
+    test_case.add_input<float>({0.8439683f,  0.5665144f, 0.05836735f, 0.02916367f, 0.12964272f, 0.5060197f, 0.79538304f,
+                                0.9411346f,  0.9546573f, 0.17730942f, 0.46192095f, 0.26480448f, 0.6746842f, 0.01665257f,
+                                0.62473077f, 0.9240844f, 0.9722341f,  0.11965699f, 0.41356155f, 0.9129373f, 0.59330076f,
+                                0.81929934f, 0.7862604f, 0.11799799f, 0.69248444f, 0.54119414f, 0.07513223f});
     test_case.add_expected_output<float>(
         Shape{3, 3, 3, 1},
-        {1.3546423,  0.33053496, -1.5450814,  -1.2106764,  -0.8925952,  0.29888135, 0.38083088,
-         0.81808794, 0.85865635, -1.1060555,  -0.05552877, -0.78310335, 0.83281356, -1.250282,
-         0.67467856, 0.7669372,  0.9113869,   -1.6463585,  -0.23402764, 1.6092131,  0.42940593,
-         1.2906139,  1.1860244,  -0.92945826, 0.0721334,   -0.38174,    -1.7799333});
+        {1.3546423f,  0.33053496f, -1.5450814f,  -1.2106764f,  -0.8925952f,  0.29888135f, 0.38083088f,
+         0.81808794f, 0.85865635f, -1.1060555f,  -0.05552877f, -0.78310335f, 0.83281356f, -1.250282f,
+         0.67467856f, 0.7669372f,  0.9113869f,   -1.6463585f,  -0.23402764f, 1.6092131f,  0.42940593f,
+         1.2906139f,  1.1860244f,  -0.92945826f, 0.0721334f,   -0.38174f,    -1.7799333f});
     test_case.run();
 }
 
@@ -4578,17 +4586,17 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_not_const_training_mode) {
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_multiple_slices_last_layer) {
     std::vector<float> data(1 * 30 * 320 * 320);
-    std::fill(data.begin(), data.end(), 1);
+    std::fill(data.begin(), data.end(), 1.f);
 
     const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
                                                                               SERIALIZED_ZOO,
                                                                               "onnx/multiple_slices_last_layer.onnx"));
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> o1(1 * 320 * 320 * 21);
-    std::fill(o1.begin(), o1.end(), 1);
+    std::fill(o1.begin(), o1.end(), 1.f);
 
     std::vector<float> o2(1 * 320 * 320 * 9);
-    std::fill(o2.begin(), o2.end(), 1);
+    std::fill(o2.begin(), o2.end(), 1.f);
 
     test_case.add_input<float>(data);
     test_case.add_expected_output<float>(Shape{1, 320, 320, 21}, o1);
@@ -4613,23 +4621,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_softmax_crossentropy_loss_mean) {
                                                                         "onnx/softmax_crossentropy_loss_mean.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.54881352186203,
-                                0.7151893377304077,
-                                0.6027633547782898,
-                                0.5448831915855408,
-                                0.42365479469299316,
-                                0.6458941102027893,
-                                0.4375872015953064,
-                                0.891772985458374,
-                                0.9636627435684204,
-                                0.3834415078163147,
-                                0.7917250394821167,
-                                0.5288949012756348,
-                                0.5680445432662964,
-                                0.9255966544151306,
-                                0.07103605568408966});
+    test_case.add_input<float>({0.54881352186203f,
+                                0.7151893377304077f,
+                                0.6027633547782898f,
+                                0.5448831915855408f,
+                                0.42365479469299316f,
+                                0.6458941102027893f,
+                                0.4375872015953064f,
+                                0.891772985458374f,
+                                0.9636627435684204f,
+                                0.3834415078163147f,
+                                0.7917250394821167f,
+                                0.5288949012756348f,
+                                0.5680445432662964f,
+                                0.9255966544151306f,
+                                0.07103605568408966f});
     test_case.add_input<int64_t>({1, 4, 3});
-    test_case.add_expected_output<float>(Shape{}, {1.561384797096252441});
+    test_case.add_expected_output<float>(Shape{}, {1.561384797096252441f});
     test_case.run();
 }
 
@@ -4640,15 +4648,15 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>({
-        0.54881352186203,    0.7151893377304077,   0.6027633547782898, 0.5448831915855408, 0.42365479469299316,
-        0.6458941102027893,  0.4375872015953064,   0.891772985458374,  0.9636627435684204, 0.3834415078163147,
-        0.7917250394821167,  0.5288949012756348,   0.5680445432662964, 0.9255966544151306, 0.07103605568408966,
-        0.08712930232286453, 0.020218396559357643, 0.832619845867157,  0.7781567573547363, 0.8700121641159058,
-        0.978618323802948,   0.7991585731506348,   0.4614793658256531, 0.7805292010307312, 0.11827442795038223,
-        0.6399210095405579,  0.14335328340530396,  0.9446688890457153, 0.5218483209609985, 0.4146619439125061,
+        0.54881352186203f,    0.7151893377304077f,   0.6027633547782898f, 0.5448831915855408f, 0.42365479469299316f,
+        0.6458941102027893f,  0.4375872015953064f,   0.891772985458374f,  0.9636627435684204f, 0.3834415078163147f,
+        0.7917250394821167f,  0.5288949012756348f,   0.5680445432662964f, 0.9255966544151306f, 0.07103605568408966f,
+        0.08712930232286453f, 0.020218396559357643f, 0.832619845867157f,  0.7781567573547363f, 0.8700121641159058f,
+        0.978618323802948f,   0.7991585731506348f,   0.4614793658256531f, 0.7805292010307312f, 0.11827442795038223f,
+        0.6399210095405579f,  0.14335328340530396f,  0.9446688890457153f, 0.5218483209609985f, 0.4146619439125061f,
     });
     test_case.add_input<int64_t>({3, 3, 2, 4, 2, 0});
-    test_case.add_expected_output<float>(Shape{}, {-0.531306922435760498});
+    test_case.add_expected_output<float>(Shape{}, {-0.531306922435760498f});
     test_case.run();
 }
 
@@ -4958,19 +4966,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/einsum_sum.onnx"));
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{3, 4},
-                               {1.764052345967664,
-                                0.4001572083672233,
-                                0.9787379841057392,
-                                2.240893199201458,
-                                1.8675579901499675,
-                                -0.977277879876411,
-                                0.9500884175255894,
-                                -0.1513572082976979,
-                                -0.10321885179355784,
-                                0.41059850193837233,
-                                0.144043571160878,
-                                1.454273506962975});
-    test_case.add_expected_output<float>(Shape{3}, {5.3838407376420845, 1.689011319501448, 1.9056967282686674});
+                               {1.764052345967664f,
+                                0.4001572083672233f,
+                                0.9787379841057392f,
+                                2.240893199201458f,
+                                1.8675579901499675f,
+                                -0.977277879876411f,
+                                0.9500884175255894f,
+                                -0.1513572082976979f,
+                                -0.10321885179355784f,
+                                0.41059850193837233f,
+                                0.144043571160878f,
+                                1.454273506962975f});
+    test_case.add_expected_output<float>(Shape{3}, {5.3838407376420845f, 1.689011319501448f, 1.9056967282686674f});
     test_case.run();
 }
 
@@ -5074,7 +5082,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_uniform.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_expected_output<float>(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294});
+    test_case.add_expected_output<float>(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f});
     test_case.run();
 }
 
@@ -5085,7 +5093,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform_like) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{2, 2}, {41, 42, 43, 44});
-    test_case.add_expected_output<float>(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294});
+    test_case.add_expected_output<float>(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f});
     test_case.run();
 }
 
@@ -5094,7 +5102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_normal.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_expected_output<float>(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282});
+    test_case.add_expected_output<float>(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f});
     test_case.run();
 }
 
@@ -5105,50 +5113,50 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal_like) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{2, 2}, {0, 0, 0, 0});
-    test_case.add_expected_output<float>(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282});
+    test_case.add_expected_output<float>(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f});
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2in) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2fin) {
     const auto function =
         onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
                                                             SERIALIZED_ZOO,
                                                             "onnx/aten_embedding_sum_packed_2in.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
     test_case.add_input<int32_t>(Shape{3, 2}, {0, 2, 1, 2, 3, 4});  // indices
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f});
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3in_offsets_none) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3fin_offsets_none) {
     const auto function =
         onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
                                                             SERIALIZED_ZOO,
                                                             "onnx/aten_embedding_sum_packed_3in_offset_none.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
     test_case.add_input<int32_t>(Shape{3, 2}, {0, 2, 1, 2, 3, 4});  // indices
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f});
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_per_sample_weights) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4fin_per_sample_weights) {
     const auto function = onnx_import::import_onnx_model(
         file_util::path_join(CommonTestUtils::getExecutableDirectory(),
                              SERIALIZED_ZOO,
                              "onnx/aten_embedding_sum_packed_4in_per_sample_weights.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
-    test_case.add_input<int32_t>(Shape{3, 2}, {0, 2, 1, 2, 3, 4});            // indices
-    test_case.add_input<float>(Shape{3, 2}, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5});  // per_sample_weights
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
+    test_case.add_input<int32_t>(Shape{3, 2}, {0, 2, 1, 2, 3, 4});                  // indices
+    test_case.add_input<float>(Shape{3, 2}, {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f});  // per_sample_weights
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-1.05, -1.2, -1., -1.1, -0.09999999, 0.4});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-1.05f, -1.2f, -1.f, -1.1f, -0.09999999f, 0.4f});
     test_case.run();
 }
 
@@ -5159,10 +5167,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_two_none) {
                                                             "onnx/aten_embedding_sum_packed_4in_two_none.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
     test_case.add_input<int32_t>(Shape{3, 2}, {0, 2, 1, 2, 3, 4});  // indices
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f});
     test_case.run();
 }
 
@@ -5173,11 +5181,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_3in) {
                                                             "onnx/aten_embedding_sum_offset_3in.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
     test_case.add_input<int32_t>(Shape{4}, {0, 2, 3, 4});  // indices
     test_case.add_input<int32_t>(Shape{3}, {0, 2, 2});     // offsets
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f});
     test_case.run();
 }
 
@@ -5188,12 +5196,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_4in) {
                                                             "onnx/aten_embedding_sum_offset_4in.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
-    test_case.add_input<int32_t>(Shape{4}, {0, 2, 3, 4});        // indices
-    test_case.add_input<int32_t>(Shape{3}, {0, 2, 2});           // offsets
-    test_case.add_input<float>(Shape{4}, {0.5, 0.5, 0.5, 0.5});  // per_sample_weights
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
+    test_case.add_input<int32_t>(Shape{4}, {0, 2, 3, 4});            // indices
+    test_case.add_input<int32_t>(Shape{3}, {0, 2, 2});               // offsets
+    test_case.add_input<float>(Shape{4}, {0.5f, 0.5f, 0.5f, 0.5f});  // per_sample_weights
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-1.05, -1.2, 0., 0., -0.09999999, 0.4});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-1.05f, -1.2f, 0.f, 0.f, -0.09999999f, 0.4f});
     test_case.run();
 }
 
@@ -5208,11 +5216,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_many_node_outputs) {
     EXPECT_EQ(function->get_results().size(), 1);
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7});
+    test_case.add_input<float>(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f});
     test_case.add_input<int32_t>(Shape{4}, {0, 2, 3, 4});  // indices
     test_case.add_input<int32_t>(Shape{3}, {0, 2, 2});     // offsets
 
-    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f});
     test_case.run();
 }
 
@@ -5388,13 +5396,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_rev) {
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{}, {0});
     test_case.add_input<float>(Shape{}, {1});
-    test_case.add_input<float>(Shape{10}, std::vector<float>{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9});
+    test_case.add_input<float>(Shape{10},
+                               std::vector<float>{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f});
 
-    test_case.add_expected_output<float>(Shape{}, {0.14897026});
-    test_case.add_expected_output<float>(Shape{}, {0.});
+    test_case.add_expected_output<float>(Shape{}, {0.14897026f});
+    test_case.add_expected_output<float>(Shape{}, {0.f});
     test_case.add_expected_output<float>(
         Shape{10},
-        {0.9, 1.52, 1.694, 1.9284, 1.8112, 1.4958401, 0.9921121, 0.49759045, 0.14897026, 0.});
+        {0.9f, 1.52f, 1.694f, 1.9284f, 1.8112f, 1.4958401f, 0.9921121f, 0.49759045f, 0.14897026f, 0.f});
     test_case.run();
 }
 
@@ -5407,13 +5416,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_out_rev) {
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{}, {0});
     test_case.add_input<float>(Shape{}, {1});
-    test_case.add_input<float>(Shape{10}, std::vector<float>{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9});
+    test_case.add_input<float>(Shape{10},
+                               std::vector<float>{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f});
 
-    test_case.add_expected_output<float>(Shape{}, {0.14897026});
+    test_case.add_expected_output<float>(Shape{}, {0.14897026f});
     test_case.add_expected_output<float>(Shape{}, {0.});
     test_case.add_expected_output<float>(
         Shape{10},
-        {0., 0.14897026, 0.49759045, 0.9921121, 1.4958401, 1.8112, 1.9284, 1.694, 1.52, 0.9});
+        {0.f, 0.14897026f, 0.49759045f, 0.9921121f, 1.4958401f, 1.8112f, 1.9284f, 1.694f, 1.52f, 0.9f});
     test_case.run();
 }
 
@@ -5435,27 +5445,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_ones) {
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15f_ND_mixed_vals) {
     const auto function = onnx_import::import_onnx_model(
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_ND_mixed.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0});
-    test_case.add_input<float>(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1});
-    std::vector<float> sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.,  1.1, 1.2, 1.3, 1.4, 1.5,
-                                     1.6, 1.7, 1.8, 1.9, 2.,  2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.};
+    test_case.add_input<float>(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
+    test_case.add_input<float>(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
+    std::vector<float> sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f,
+                                     1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f,
+                                     2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f};
     test_case.add_input<float>(Shape{1, 3, 5, 2}, sequence_vals);  // multiply factor (reverse)
     test_case.add_input<float>(Shape{1, 5, 3, 2}, sequence_vals);  // div factor
 
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443});
+                                         {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f});
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517});
+                                         {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f});
     test_case.add_expected_output<float>(
         Shape{1, 3, 2, 5},
-        {0.40161943, 2.7327938, 7.3076925, 10.,       9.,       0.5274726, 2.1428573, 4.714286,  6.,        5.,
-         16.80789,   21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727,  15.799998, 10.799999, 5.,
-         59.98805,   49.765778, 33.074867, 16.690908, 5.8,      50.518517, 41.444443, 27.444445, 14.,       5.});
+        {0.40161943f, 2.7327938f, 7.3076925f, 10.f,       9.f,        0.5274726f, 2.1428573f, 4.714286f,
+         6.f,         5.f,        16.80789f,  21.070545f, 20.185184f, 13.851851f, 6.333333f,  14.025973f,
+         16.92727f,   15.799998f, 10.799999f, 5.f,        59.98805f,  49.765778f, 33.074867f, 16.690908f,
+         5.8f,        50.518517f, 41.444443f, 27.444445f, 14.f,       5.f});
     test_case.run();
 }
 
@@ -5466,22 +5478,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals_neg_axes) {
                                                                               "onnx/scan15_ND_mixed_neg_axes.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0});
-    test_case.add_input<float>(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1});
-    std::vector<float> sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.,  1.1, 1.2, 1.3, 1.4, 1.5,
-                                     1.6, 1.7, 1.8, 1.9, 2.,  2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.};
+    test_case.add_input<float>(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
+    test_case.add_input<float>(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
+    std::vector<float> sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f,
+                                     1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f,
+                                     2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f};
     test_case.add_input<float>(Shape{1, 3, 5, 2}, sequence_vals);  // multiply factor (reverse)
     test_case.add_input<float>(Shape{1, 5, 3, 2}, sequence_vals);  // div factor
 
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443});
+                                         {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f});
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517});
+                                         {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f});
     test_case.add_expected_output<float>(
         Shape{1, 3, 2, 5},
-        {0.40161943, 2.7327938, 7.3076925, 10.,       9.,       0.5274726, 2.1428573, 4.714286,  6.,        5.,
-         16.80789,   21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727,  15.799998, 10.799999, 5.,
-         59.98805,   49.765778, 33.074867, 16.690908, 5.8,      50.518517, 41.444443, 27.444445, 14.,       5.});
+        {0.40161943f, 2.7327938f, 7.3076925f, 10.f,       9.f,        0.5274726f, 2.1428573f, 4.714286f,
+         6.f,         5.f,        16.80789f,  21.070545f, 20.185184f, 13.851851f, 6.333333f,  14.025973f,
+         16.92727f,   15.799998f, 10.799999f, 5.f,        59.98805f,  49.765778f, 33.074867f, 16.690908f,
+         5.8f,        50.518517f, 41.444443f, 27.444445f, 14.f,       5.f});
     test_case.run();
 }
 
@@ -5490,22 +5504,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_dyn_rank_vals) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_dyn_rank.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0});
-    test_case.add_input<float>(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1});
-    std::vector<float> sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.,  1.1, 1.2, 1.3, 1.4, 1.5,
-                                     1.6, 1.7, 1.8, 1.9, 2.,  2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.};
+    test_case.add_input<float>(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
+    test_case.add_input<float>(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
+    std::vector<float> sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f,
+                                     1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f,
+                                     2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f};
     test_case.add_input<float>(Shape{1, 3, 5, 2}, sequence_vals);  // multiply factor (reverse)
     test_case.add_input<float>(Shape{1, 5, 3, 2}, sequence_vals);  // div factor
 
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443});
+                                         {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f});
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517});
+                                         {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f});
     test_case.add_expected_output<float>(
         Shape{1, 3, 2, 5},
-        {0.40161943, 2.7327938, 7.3076925, 10.,       9.,       0.5274726, 2.1428573, 4.714286,  6.,        5.,
-         16.80789,   21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727,  15.799998, 10.799999, 5.,
-         59.98805,   49.765778, 33.074867, 16.690908, 5.8,      50.518517, 41.444443, 27.444445, 14.,       5.});
+        {0.40161943f, 2.7327938f, 7.3076925f, 10.f,       9.f,        0.5274726f, 2.1428573f, 4.714286f,
+         6.f,         5.f,        16.80789f,  21.070545f, 20.185184f, 13.851851f, 6.333333f,  14.025973f,
+         16.92727f,   15.799998f, 10.799999f, 5.f,        59.98805f,  49.765778f, 33.074867f, 16.690908f,
+         5.8f,        50.518517f, 41.444443f, 27.444445f, 14.f,       5.f});
     test_case.run();
 }
 
@@ -5529,43 +5545,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_b4_input_rev_vals) {
                                                                               "onnx/scan15_ND_b4_input_rev.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 0));
-    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 1));
+    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 0.f));
+    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 1.f));
     std::vector<float> sequence_vals{
-        0.1,  0.2, 0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1.,   1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,
-        1.9,  2.,  2.1,  2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3.,   3.1,  3.2,  3.3,  3.4,  3.5,  3.6,
-        3.7,  3.8, 3.9,  4.,   4.1,  4.2,  4.3,  4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5.,   5.1,  5.2,  5.3,  5.4,
-        5.5,  5.6, 5.7,  5.8,  5.9,  6.,   6.1,  6.2,  6.3,  6.4,  6.5,  6.6,  6.7,  6.8,  6.9,  7.,   7.1,  7.2,
-        7.3,  7.4, 7.5,  7.6,  7.7,  7.8,  7.9,  8.,   8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,  8.8,  8.9,  9.,
-        9.1,  9.2, 9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9,  10.,  10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8,
-        10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.};
-    test_case.add_input<float>(Shape{4, 5, 3, 2}, sequence_vals);  // multiply factor (reverse)
+        0.1f,  0.2f,  0.3f,  0.4f,  0.5f, 0.6f,  0.7f,  0.8f,  0.9f,  1.f,   1.1f,  1.2f,  1.3f,  1.4f,  1.5f,
+        1.6f,  1.7f,  1.8f,  1.9f,  2.f,  2.1f,  2.2f,  2.3f,  2.4f,  2.5f,  2.6f,  2.7f,  2.8f,  2.9f,  3.f,
+        3.1f,  3.2f,  3.3f,  3.4f,  3.5f, 3.6f,  3.7f,  3.8f,  3.9f,  4.f,   4.1f,  4.2f,  4.3f,  4.4f,  4.5f,
+        4.6f,  4.7f,  4.8f,  4.9f,  5.f,  5.1f,  5.2f,  5.3f,  5.4f,  5.5f,  5.6f,  5.7f,  5.8f,  5.9f,  6.f,
+        6.1f,  6.2f,  6.3f,  6.4f,  6.5f, 6.6f,  6.7f,  6.8f,  6.9f,  7.f,   7.1f,  7.2f,  7.3f,  7.4f,  7.5f,
+        7.6f,  7.7f,  7.8f,  7.9f,  8.f,  8.1f,  8.2f,  8.3f,  8.4f,  8.5f,  8.6f,  8.7f,  8.8f,  8.9f,  9.f,
+        9.1f,  9.2f,  9.3f,  9.4f,  9.5f, 9.6f,  9.7f,  9.8f,  9.9f,  10.f,  10.1f, 10.2f, 10.3f, 10.4f, 10.5f,
+        10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f};
+    test_case.add_input<float>(Shape{4, 5, 3, 2}, sequence_vals);  // multiply factor (areverse)
     test_case.add_input<float>(Shape{4, 5, 3, 2}, sequence_vals);  // div factor
 
     test_case.add_expected_output<float>(
         Shape{4, 3, 2},
-        {61.210526, 33.2,      23.857145, 19.181818, 16.373913, 14.5,      6.8880844, 6.83,
-         6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226,  5.9382715, 5.9237804,
-         5.9097314, 5.896105,  5.652082,  5.645059,  5.638186,  5.6314588, 5.624872,  5.618421});
+        {61.210526f, 33.2f,      23.857145f, 19.181818f, 16.373913f, 14.5f,      6.8880844f, 6.83f,
+         6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f,  5.9382715f, 5.9237804f,
+         5.9097314f, 5.896105f,  5.652082f,  5.645059f,  5.638186f,  5.6314588f, 5.624872f,  5.618421f});
     test_case.add_expected_output<float>(
         Shape{4, 3, 2},
-        {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3,       6.9531364, 6.970527,
-         6.987378, 7.003712,  7.019554,  7.034921,  7.30868,   7.3164845, 7.324116,  7.3315806,
-         7.338885, 7.346032,  7.485426,  7.489783,  7.494067,  7.49828,   7.5024257, 7.506502});
+        {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f,       6.9531364f, 6.970527f,
+         6.987378f, 7.003712f,  7.019554f,  7.034921f,  7.30868f,   7.3164845f, 7.324116f,  7.3315806f,
+         7.338885f, 7.346032f,  7.485426f,  7.489783f,  7.494067f,  7.49828f,   7.5024257f, 7.506502f});
     test_case.add_expected_output<float>(
         Shape{5, 4, 3, 2},
-        {25.,       13.,       9.,        7.,        5.8,       5.,        1.7741936, 1.75,      1.7272727, 1.7058823,
-         1.6857144, 1.6666667, 1.3934426, 1.3870969, 1.3809522, 1.375,     1.3692307, 1.3636364, 1.2637362, 1.2608696,
-         1.2580644, 1.2553192, 1.2526315, 1.25,      70.57143,  35.,       23.333334, 17.6,      14.218181, 12.,
-         3.6739323, 3.618421,  3.5664334, 3.5176468, 3.471777,  3.4285717, 2.822119,  2.8083491, 2.7950313, 2.7821426,
-         2.7696643, 2.757576,  2.543786,  2.5377107, 2.5317693, 2.5259573, 2.520271,  2.514706,  95.57143,  47.999996,
-         32.333336, 24.6,      20.01818,  17.,       5.448126,  5.368421,  5.293706,  5.223529,  5.157491,  5.0952387,
-         4.215562,  4.195446,  4.1759834, 4.1571426, 4.138895,  4.1212125, 3.8075223, 3.7985802, 3.7898335, 3.7812767,
-         3.7729027, 3.764706,  61.210526, 33.2,      23.857145, 19.181818, 16.373913, 14.5,      6.8880844, 6.83,
-         6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226,  5.9382715, 5.9237804, 5.9097314, 5.896105,
-         5.652082,  5.645059,  5.638186,  5.6314588, 5.624872,  5.618421,  6.271278,  6.2461543, 6.2433867, 6.2545457,
-         6.2744985, 6.3,       6.9531364, 6.970527,  6.987378,  7.003712,  7.019554,  7.034921,  7.30868,   7.3164845,
-         7.324116,  7.3315806, 7.338885,  7.346032,  7.485426,  7.489783,  7.494067,  7.49828,   7.5024257, 7.506502});
+        {25.f,       13.f,       9.f,        7.f,        5.8f,       5.f,        1.7741936f, 1.75f,      1.7272727f,
+         1.7058823f, 1.6857144f, 1.6666667f, 1.3934426f, 1.3870969f, 1.3809522f, 1.375f,     1.3692307f, 1.3636364f,
+         1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f,      70.57143f,  35.f,       23.333334f,
+         17.6f,      14.218181f, 12.f,       3.6739323f, 3.618421f,  3.5664334f, 3.5176468f, 3.471777f,  3.4285717f,
+         2.822119f,  2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f,  2.543786f,  2.5377107f, 2.5317693f,
+         2.5259573f, 2.520271f,  2.514706f,  95.57143f,  47.999996f, 32.333336f, 24.6f,      20.01818f,  17.f,
+         5.448126f,  5.368421f,  5.293706f,  5.223529f,  5.157491f,  5.0952387f, 4.215562f,  4.195446f,  4.1759834f,
+         4.1571426f, 4.138895f,  4.1212125f, 3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f,
+         61.210526f, 33.2f,      23.857145f, 19.181818f, 16.373913f, 14.5f,      6.8880844f, 6.83f,      6.7754016f,
+         6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f,  5.9382715f, 5.9237804f, 5.9097314f, 5.896105f,
+         5.652082f,  5.645059f,  5.638186f,  5.6314588f, 5.624872f,  5.618421f,  6.271278f,  6.2461543f, 6.2433867f,
+         6.2545457f, 6.2744985f, 6.3f,       6.9531364f, 6.970527f,  6.987378f,  7.003712f,  7.019554f,  7.034921f,
+         7.30868f,   7.3164845f, 7.324116f,  7.3315806f, 7.338885f,  7.346032f,  7.485426f,  7.489783f,  7.494067f,
+         7.49828f,   7.5024257f, 7.506502f});
     test_case.run();
 }
 
@@ -5600,43 +5619,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan8_ND_b4_input_rev_vals) {
                                                                               "onnx/scan8_ND_b4_input_rev.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 0));
-    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 1));
+    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 0.f));
+    test_case.add_input<float>(Shape{4, 3, 2}, std::vector<float>(24, 1.f));
     std::vector<float> sequence_vals{
-        0.1,  0.2, 0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1.,   1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,
-        1.9,  2.,  2.1,  2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3.,   3.1,  3.2,  3.3,  3.4,  3.5,  3.6,
-        3.7,  3.8, 3.9,  4.,   4.1,  4.2,  4.3,  4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5.,   5.1,  5.2,  5.3,  5.4,
-        5.5,  5.6, 5.7,  5.8,  5.9,  6.,   6.1,  6.2,  6.3,  6.4,  6.5,  6.6,  6.7,  6.8,  6.9,  7.,   7.1,  7.2,
-        7.3,  7.4, 7.5,  7.6,  7.7,  7.8,  7.9,  8.,   8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,  8.8,  8.9,  9.,
-        9.1,  9.2, 9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9,  10.,  10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8,
-        10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.};
+        0.1f,  0.2f,  0.3f,  0.4f,  0.5f, 0.6f,  0.7f,  0.8f,  0.9f,  1.f,   1.1f,  1.2f,  1.3f,  1.4f,  1.5f,
+        1.6f,  1.7f,  1.8f,  1.9f,  2.f,  2.1f,  2.2f,  2.3f,  2.4f,  2.5f,  2.6f,  2.7f,  2.8f,  2.9f,  3.f,
+        3.1f,  3.2f,  3.3f,  3.4f,  3.5f, 3.6f,  3.7f,  3.8f,  3.9f,  4.f,   4.1f,  4.2f,  4.3f,  4.4f,  4.5f,
+        4.6f,  4.7f,  4.8f,  4.9f,  5.f,  5.1f,  5.2f,  5.3f,  5.4f,  5.5f,  5.6f,  5.7f,  5.8f,  5.9f,  6.f,
+        6.1f,  6.2f,  6.3f,  6.4f,  6.5f, 6.6f,  6.7f,  6.8f,  6.9f,  7.f,   7.1f,  7.2f,  7.3f,  7.4f,  7.5f,
+        7.6f,  7.7f,  7.8f,  7.9f,  8.f,  8.1f,  8.2f,  8.3f,  8.4f,  8.5f,  8.6f,  8.7f,  8.8f,  8.9f,  9.f,
+        9.1f,  9.2f,  9.3f,  9.4f,  9.5f, 9.6f,  9.7f,  9.8f,  9.9f,  10.f,  10.1f, 10.2f, 10.3f, 10.4f, 10.5f,
+        10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f};
     test_case.add_input<float>(Shape{4, 5, 3, 2}, sequence_vals);  // multiply factor (reverse)
     test_case.add_input<float>(Shape{4, 5, 3, 2}, sequence_vals);  // div factor
 
     test_case.add_expected_output<float>(
         Shape{4, 3, 2},
-        {61.210526, 33.2,      23.857145, 19.181818, 16.373913, 14.5,      6.8880844, 6.83,
-         6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226,  5.9382715, 5.9237804,
-         5.9097314, 5.896105,  5.652082,  5.645059,  5.638186,  5.6314588, 5.624872,  5.618421});
+        {61.210526f, 33.2f,      23.857145f, 19.181818f, 16.373913f, 14.5f,      6.8880844f, 6.83f,
+         6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f,  5.9382715f, 5.9237804f,
+         5.9097314f, 5.896105f,  5.652082f,  5.645059f,  5.638186f,  5.6314588f, 5.624872f,  5.618421f});
     test_case.add_expected_output<float>(
         Shape{4, 3, 2},
-        {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3,       6.9531364, 6.970527,
-         6.987378, 7.003712,  7.019554,  7.034921,  7.30868,   7.3164845, 7.324116,  7.3315806,
-         7.338885, 7.346032,  7.485426,  7.489783,  7.494067,  7.49828,   7.5024257, 7.506502});
+        {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f,       6.9531364f, 6.970527f,
+         6.987378f, 7.003712f,  7.019554f,  7.034921f,  7.30868f,   7.3164845f, 7.324116f,  7.3315806f,
+         7.338885f, 7.346032f,  7.485426f,  7.489783f,  7.494067f,  7.49828f,   7.5024257f, 7.506502f});
     test_case.add_expected_output<float>(
         Shape{4, 5, 3, 2},
-        {25.,       13.,       9.,        7.,        5.8,       5.,        70.57143,  35.,       23.333334, 17.6,
-         14.218181, 12.,       95.57143,  47.999996, 32.333336, 24.6,      20.01818,  17.,       61.210526, 33.2,
-         23.857145, 19.181818, 16.373913, 14.5,      6.271278,  6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3,
-         1.7741936, 1.75,      1.7272727, 1.7058823, 1.6857144, 1.6666667, 3.6739323, 3.618421,  3.5664334, 3.5176468,
-         3.471777,  3.4285717, 5.448126,  5.368421,  5.293706,  5.223529,  5.157491,  5.0952387, 6.8880844, 6.83,
-         6.7754016, 6.7239814, 6.6754713, 6.6296296, 6.9531364, 6.970527,  6.987378,  7.003712,  7.019554,  7.034921,
-         1.3934426, 1.3870969, 1.3809522, 1.375,     1.3692307, 1.3636364, 2.822119,  2.8083491, 2.7950313, 2.7821426,
-         2.7696643, 2.757576,  4.215562,  4.195446,  4.1759834, 4.1571426, 4.138895,  4.1212125, 5.9686656, 5.953226,
-         5.9382715, 5.9237804, 5.9097314, 5.896105,  7.30868,   7.3164845, 7.324116,  7.3315806, 7.338885,  7.346032,
-         1.2637362, 1.2608696, 1.2580644, 1.2553192, 1.2526315, 1.25,      2.543786,  2.5377107, 2.5317693, 2.5259573,
-         2.520271,  2.514706,  3.8075223, 3.7985802, 3.7898335, 3.7812767, 3.7729027, 3.764706,  5.652082,  5.645059,
-         5.638186,  5.6314588, 5.624872,  5.618421,  7.485426,  7.489783,  7.494067,  7.49828,   7.5024257, 7.506502});
+        {25.f,       13.f,       9.f,        7.f,        5.8f,       5.f,        70.57143f,  35.f,       23.333334f,
+         17.6f,      14.218181f, 12.f,       95.57143f,  47.999996f, 32.333336f, 24.6f,      20.01818f,  17.f,
+         61.210526f, 33.2f,      23.857145f, 19.181818f, 16.373913f, 14.5f,      6.271278f,  6.2461543f, 6.2433867f,
+         6.2545457f, 6.2744985f, 6.3f,       1.7741936f, 1.75f,      1.7272727f, 1.7058823f, 1.6857144f, 1.6666667f,
+         3.6739323f, 3.618421f,  3.5664334f, 3.5176468f, 3.471777f,  3.4285717f, 5.448126f,  5.368421f,  5.293706f,
+         5.223529f,  5.157491f,  5.0952387f, 6.8880844f, 6.83f,      6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f,
+         6.9531364f, 6.970527f,  6.987378f,  7.003712f,  7.019554f,  7.034921f,  1.3934426f, 1.3870969f, 1.3809522f,
+         1.375f,     1.3692307f, 1.3636364f, 2.822119f,  2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f,
+         4.215562f,  4.195446f,  4.1759834f, 4.1571426f, 4.138895f,  4.1212125f, 5.9686656f, 5.953226f,  5.9382715f,
+         5.9237804f, 5.9097314f, 5.896105f,  7.30868f,   7.3164845f, 7.324116f,  7.3315806f, 7.338885f,  7.346032f,
+         1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f,      2.543786f,  2.5377107f, 2.5317693f,
+         2.5259573f, 2.520271f,  2.514706f,  3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f,
+         5.652082f,  5.645059f,  5.638186f,  5.6314588f, 5.624872f,  5.618421f,  7.485426f,  7.489783f,  7.494067f,
+         7.49828f,   7.5024257f, 7.506502f});
     test_case.run();
 }
 
@@ -5658,10 +5680,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softsign) {
     auto model = onnx_import::import_onnx_model(
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softsign.onnx"));
 
-    Inputs inputs{std::vector<float>{1.0, 0.1, 20.0, 12.0, -12.0, -0.2, 0.5, 100.0, 0.0, -1.0}};
+    Inputs inputs{std::vector<float>{1.0f, 0.1f, 20.0f, 12.0f, -12.0f, -0.2f, 0.5f, 100.0f, 0.0f, -1.0f}};
 
-    std::vector<float>
-        output{0.5, 0.09090909, 0.95238096, 0.9230769, -0.9230769, -0.16666666, 0.33333334, 0.990099, 0., -0.5};
+    std::vector<float> output{0.5f,
+                              0.09090909f,
+                              0.95238096f,
+                              0.9230769f,
+                              -0.9230769f,
+                              -0.16666666f,
+                              0.33333334f,
+                              0.990099f,
+                              0.f,
+                              -0.5f};
 
     auto test_case = test::TestCase(model, s_device);
     test_case.add_multiple_inputs(inputs);
@@ -6215,7 +6245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_uint32) {
 
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<ngraph::float16>(Shape{1, 1, 2, 2}, std::vector<ngraph::float16>{1.5, 2.3, 3, 4});
+    test_case.add_input<ngraph::float16>(Shape{1, 1, 2, 2}, std::vector<ngraph::float16>{1.5f, 2.3f, 3.f, 4.f});
     test_case.add_input<uint32_t>(Shape{4}, {1, 2, 3, 4});
     test_case.add_expected_output<uint32_t>(std::vector<uint32_t>{1, 2, 3, 4});
 
@@ -6229,7 +6259,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_int64) {
 
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<ngraph::float16>(Shape{1, 1, 2, 2}, std::vector<ngraph::float16>{1.5, 2.3, 3, 4});
+    test_case.add_input<ngraph::float16>(Shape{1, 1, 2, 2}, std::vector<ngraph::float16>{1.5f, 2.3f, 3.f, 4.f});
     test_case.add_input<int64_t>(Shape{4}, {1, 2, 3, 4});
     test_case.add_expected_output<int64_t>(std::vector<int64_t>{1, 2, 3, 4});
 
@@ -6287,7 +6317,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_int32_to_float) {
 
     test_case.add_input<int32_t>(Shape{1, 1, 2, 2}, std::vector<int32_t>{-1, 2, 3, 4});
     test_case.add_input<float>(Shape{4}, {1, 2, 3, 4});
-    test_case.add_expected_output<float>(std::vector<float>{-1.0, 2.0, 3.0, 4.0});
+    test_case.add_expected_output<float>(std::vector<float>{-1.0f, 2.0f, 3.0f, 4.0f});
 
     test_case.run();
 }
@@ -6299,7 +6329,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float64_to_int32) {
 
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<float>(Shape{1, 1, 2, 2}, std::vector<float>{-107374.9876543, -2.2, 3.3, 4.4});
+    test_case.add_input<float>(Shape{1, 1, 2, 2}, std::vector<float>{-107374.9876543f, -2.2f, 3.3f, 4.4f});
     test_case.add_input<int32_t>(Shape{4}, {1, 2, 3, 4});
     test_case.add_expected_output<int32_t>(std::vector<int32_t>{-107374, -2, 3, 4});
 
@@ -6313,9 +6343,13 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_float32_to_bfloat16) {
 
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<float>(Shape{3, 4}, std::vector<float>{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12});
-    test_case.add_input<bfloat16>(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-    test_case.add_expected_output<bfloat16>(std::vector<bfloat16>{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12});
+    test_case.add_input<float>(
+        Shape{3, 4},
+        std::vector<float>{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f});
+    test_case.add_input<bfloat16>(Shape{3, 4},
+                                  {1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f, 9.5f, 10.5f, 11.5f, 12.5f});
+    test_case.add_expected_output<bfloat16>(
+        std::vector<bfloat16>{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f});
 
     test_case.run();
 }
@@ -6327,9 +6361,12 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_bfloat16_to_float32) {
 
     auto test_case = test::TestCase(function, s_device);
 
-    test_case.add_input<bfloat16>(Shape{3, 4}, std::vector<bfloat16>{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12});
+    test_case.add_input<bfloat16>(
+        Shape{3, 4},
+        std::vector<bfloat16>{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f});
     test_case.add_input<float>(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-    test_case.add_expected_output<float>(std::vector<float>{121.5, 122.7, 3, 4, 5, 6, 7, 8.75, 9, 10, 11, 12});
+    test_case.add_expected_output<float>(
+        std::vector<float>{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.75f, 9.f, 10.f, 11.f, 12.f});
 
     test_case.run();
 }
diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
index 45e4876998acc5..e231334cfc87e8 100644
--- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
@@ -33,19 +33,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) {
                                                                               "onnx/com.microsoft/bias_gelu.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.5488135,
-                                0.71518934,
-                                0.60276335,
-                                0.5448832,
-                                0.4236548,
-                                0.6458941,
-                                0.4375872,
-                                0.891773,
-                                0.96366274,
-                                0.3834415});
-    test_case.add_input<float>({0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606});
-    test_case.add_expected_output<float>(
-        {1.2198428, 1.1112978, 1.0293297, 1.366493, 0.3411342, 1.329408, 0.8051748, 1.354462, 1.8336612, 0.3068893});
+    test_case.add_input<float>({0.5488135f,
+                                0.71518934f,
+                                0.60276335f,
+                                0.5448832f,
+                                0.4236548f,
+                                0.6458941f,
+                                0.4375872f,
+                                0.891773f,
+                                0.96366274f,
+                                0.3834415f});
+    test_case.add_input<float>({0.79172504f, 0.5288949f, 0.56804454f, 0.92559665f, 0.07103606f});
+    test_case.add_expected_output<float>({1.2198428f,
+                                          1.1112978f,
+                                          1.0293297f,
+                                          1.366493f,
+                                          0.3411342f,
+                                          1.329408f,
+                                          0.8051748f,
+                                          1.354462f,
+                                          1.8336612f,
+                                          0.3068893f});
     test_case.run();
 }
 
@@ -56,19 +64,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta
                              "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.onnx"));
 
     std::vector<float> input = {
-        0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
-        0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930,
-        0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920,
+        0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f,
+        0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f,
+        0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f,
     };
     std::vector<float> skip = {
-        0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370,
-        0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027,
-        0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629,
+        0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f,
+        0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f,
+        0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f,
     };
     std::vector<float> expected = {
-        -0.19721794, -0.42944565, 0.18620640, 0.61282152,  -0.11097327, -0.59518522, 0.13393641,  0.66901535,
-        0.04256713,  -0.71902490, 0.23107991, 0.17300847,  -0.04390603, -0.31109563, 0.51021838,  -0.66914201,
-        -0.20009395, -0.43313017, 0.67281967, -0.01712347, 0.09767530,  -0.43024653, -0.01836969, -0.29238200,
+        -0.19721794f, -0.42944565f, 0.18620640f, 0.61282152f,  -0.11097327f, -0.59518522f, 0.13393641f,  0.66901535f,
+        0.04256713f,  -0.71902490f, 0.23107991f, 0.17300847f,  -0.04390603f, -0.31109563f, 0.51021838f,  -0.66914201f,
+        -0.20009395f, -0.43313017f, 0.67281967f, -0.01712347f, 0.09767530f,  -0.43024653f, -0.01836969f, -0.29238200f,
     };
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input);
@@ -84,19 +92,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta
                              "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.onnx"));
 
     std::vector<float> input = {
-        0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
-        0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930,
-        0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920,
+        0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f,
+        0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f,
+        0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f,
     };
     std::vector<float> skip = {
-        0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370,
-        0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027,
-        0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629,
+        0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f,
+        0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f,
+        0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f,
     };
     std::vector<float> expected = {
-        -0.17974678, -0.23946194, -0.04376268, 0.46959469,  -0.11171167, -0.41859278, -0.11082965, 0.64513868,
-        0.07773457,  -0.51403606, -0.13661698, 0.11262375,  -0.05096011, -0.10416907, 0.10070466,  -0.50876135,
-        -0.22290939, -0.27663514, 0.55416691,  -0.08064821, 0.04857478,  -0.25121087, -0.15912610, -0.26637587,
+        -0.17974678f, -0.23946194f, -0.04376268f, 0.46959469f,  -0.11171167f, -0.41859278f, -0.11082965f, 0.64513868f,
+        0.07773457f,  -0.51403606f, -0.13661698f, 0.11262375f,  -0.05096011f, -0.10416907f, 0.10070466f,  -0.50876135f,
+        -0.22290939f, -0.27663514f, 0.55416691f,  -0.08064821f, 0.04857478f,  -0.25121087f, -0.15912610f, -0.26637587f,
     };
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input);
@@ -112,19 +120,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) {
                              "onnx/com.microsoft/skip_layer_normalization_with_gamma.onnx"));
 
     std::vector<float> input = {
-        0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
-        0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930,
-        0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920,
+        0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f,
+        0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f,
+        0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f,
     };
     std::vector<float> skip = {
-        0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370,
-        0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027,
-        0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629,
+        0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f,
+        0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f,
+        0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f,
     };
     std::vector<float> expected = {
-        -0.10974677, 0.16053806,  -0.26376268, 0.46959469,  -0.04171166, -0.01859277, -0.33082965, 0.64513868,
-        0.14773457,  -0.11403608, -0.35661697, 0.11262375,  0.01903989,  0.29583094,  -0.11929534, -0.50876135,
-        -0.15290938, 0.12336487,  0.33416691,  -0.08064821, 0.11857478,  0.14878914,  -0.37912610, -0.26637587,
+        -0.10974677f, 0.16053806f,  -0.26376268f, 0.46959469f,  -0.04171166f, -0.01859277f, -0.33082965f, 0.64513868f,
+        0.14773457f,  -0.11403608f, -0.35661697f, 0.11262375f,  0.01903989f,  0.29583094f,  -0.11929534f, -0.50876135f,
+        -0.15290938f, 0.12336487f,  0.33416691f,  -0.08064821f, 0.11857478f,  0.14878914f,  -0.37912610f, -0.26637587f,
     };
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input);
@@ -140,37 +148,37 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes)
                              "onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.onnx"));
 
     std::vector<float> input = {
-        0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
-        0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930,
-        0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920,
+        0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f,
+        0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f,
+        0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f,
     };
     std::vector<float> skip = {
-        0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370,
-        0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027,
-        0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629,
+        0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f,
+        0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f,
+        0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f,
     };
     std::vector<float> gamma = {
-        0.31542835,
-        0.36371076,
-        0.57019675,
-        0.43860152,
+        0.31542835f,
+        0.36371076f,
+        0.57019675f,
+        0.43860152f,
     };
     std::vector<float> beta = {
-        0.98837382,
-        0.10204481,
-        0.20887676,
-        0.16130951,
+        0.98837382f,
+        0.10204481f,
+        0.20887676f,
+        0.16130951f,
     };
     std::vector<float> bias = {
-        0.65310830,
-        0.25329161,
-        0.46631077,
-        0.24442559,
+        0.65310830f,
+        0.25329161f,
+        0.46631077f,
+        0.24442559f,
     };
     std::vector<float> expected = {
-        0.76600611, 0.34308332,  -0.48470584, 0.71335256,  1.10028172, -0.13354334, -0.45232186, 0.79840088,
-        1.52454257, -0.19450217, -0.13759643, 0.03988872,  1.27861762, 0.39529073,  0.12247884,  -0.52944231,
-        0.64228040, 0.21059875,  1.05966032,  -0.14278713, 1.46366918, 0.21215858,  -0.31640187, -0.22832340,
+        0.76600611f, 0.34308332f,  -0.48470584f, 0.71335256f,  1.10028172f, -0.13354334f, -0.45232186f, 0.79840088f,
+        1.52454257f, -0.19450217f, -0.13759643f, 0.03988872f,  1.27861762f, 0.39529073f,  0.12247884f,  -0.52944231f,
+        0.64228040f, 0.21059875f,  1.05966032f,  -0.14278713f, 1.46366918f, 0.21215858f,  -0.31640187f, -0.22832340f,
     };
 
     auto test_case = test::TestCase(function, s_device);
@@ -193,21 +201,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) {
         8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9,
     };
     std::vector<float> expected_output = {
-        -0.06615843, -0.18040463, 0.02199928,  0.01868065,  0.05397778,  -0.11761580, -0.09138932, -0.02506775,
-        -0.02368510, -0.10373901, -0.05551499, -0.20972314, 0.01365213,  0.01132561,  -0.08603337, -0.08906764,
-        0.09692993,  -0.04444099, -0.02037602, -0.03453060, -0.10214549, -0.13331436, -0.02665862, -0.01228805,
-        -0.14232540, -0.07032782, 0.05511986,  -0.00120272, -0.04875736, -0.13051267, -0.05709254, 0.17854357,
-        -0.01759873, -0.01819968, 0.07573269,  0.00557164,  0.06232717,  0.00530490,  -0.01565807, -0.14841977,
-        -0.02299280, 0.02038561,  -0.00049481, 0.02575402,  0.10081697,  -0.12517214, -0.09316762, -0.00974943,
-        -0.03093284, -0.06309240, -0.05551499, -0.20972314, 0.01365213,  0.01132561,  -0.08603337, -0.06176658,
-        0.08304203,  -0.05025182, 0.00383657,  -0.02288112, -0.11407227, -0.01386134, -0.04411830, -0.00537948,
-        0.00164397,  -0.03739140, 0.09941526,  0.00333974,  -0.04251949, -0.12992151, -0.09509478, -0.11811313,
-        -0.03307065, -0.00866115, -0.15162414, 0.01106802,  0.06037656,  0.00035292,  -0.00223284, -0.11215645,
-        -0.01390734, 0.07064321,  0.04028325,  -0.00290875, 0.12875907,  -0.12517214, -0.09316762, -0.00974943,
-        -0.03093284, -0.06309240, -0.08723789, 0.03130914,  0.03131931,  -0.01526242, 0.20811458,  -0.05696163,
-        0.16304255,  -0.02407495, -0.02955675, -0.03086288, -0.08130091, -0.05001551, -0.04875683, 0.00143666,
-        -0.12153473, -0.00018507, 0.10957482,  -0.00416618, -0.01612359, -0.11605026, -0.08593204, 0.09055272,
-        -0.03054028, -0.03603891, -0.08479506, -0.00034568, 0.03713699,  0.00163411,  -0.01738501, -0.18267182,
+        -0.06615843f, -0.18040463f, 0.02199928f,  0.01868065f,  0.05397778f,  -0.11761580f, -0.09138932f, -0.02506775f,
+        -0.02368510f, -0.10373901f, -0.05551499f, -0.20972314f, 0.01365213f,  0.01132561f,  -0.08603337f, -0.08906764f,
+        0.09692993f,  -0.04444099f, -0.02037602f, -0.03453060f, -0.10214549f, -0.13331436f, -0.02665862f, -0.01228805f,
+        -0.14232540f, -0.07032782f, 0.05511986f,  -0.00120272f, -0.04875736f, -0.13051267f, -0.05709254f, 0.17854357f,
+        -0.01759873f, -0.01819968f, 0.07573269f,  0.00557164f,  0.06232717f,  0.00530490f,  -0.01565807f, -0.14841977f,
+        -0.02299280f, 0.02038561f,  -0.00049481f, 0.02575402f,  0.10081697f,  -0.12517214f, -0.09316762f, -0.00974943f,
+        -0.03093284f, -0.06309240f, -0.05551499f, -0.20972314f, 0.01365213f,  0.01132561f,  -0.08603337f, -0.06176658f,
+        0.08304203f,  -0.05025182f, 0.00383657f,  -0.02288112f, -0.11407227f, -0.01386134f, -0.04411830f, -0.00537948f,
+        0.00164397f,  -0.03739140f, 0.09941526f,  0.00333974f,  -0.04251949f, -0.12992151f, -0.09509478f, -0.11811313f,
+        -0.03307065f, -0.00866115f, -0.15162414f, 0.01106802f,  0.06037656f,  0.00035292f,  -0.00223284f, -0.11215645f,
+        -0.01390734f, 0.07064321f,  0.04028325f,  -0.00290875f, 0.12875907f,  -0.12517214f, -0.09316762f, -0.00974943f,
+        -0.03093284f, -0.06309240f, -0.08723789f, 0.03130914f,  0.03131931f,  -0.01526242f, 0.20811458f,  -0.05696163f,
+        0.16304255f,  -0.02407495f, -0.02955675f, -0.03086288f, -0.08130091f, -0.05001551f, -0.04875683f, 0.00143666f,
+        -0.12153473f, -0.00018507f, 0.10957482f,  -0.00416618f, -0.01612359f, -0.11605026f, -0.08593204f, 0.09055272f,
+        -0.03054028f, -0.03603891f, -0.08479506f, -0.00034568f, 0.03713699f,  0.00163411f,  -0.01738501f, -0.18267182f,
     };
 
     auto test_case = test::TestCase(function, s_device);
@@ -229,21 +237,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
         0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1,
     };
     std::vector<float> expected_output = {
-        -0.06044213, -0.14845914, 0.02457689,  0.02091519,  0.09514004,  -0.10280035, -0.02087995, -0.03323204,
-        -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531,  0.02176395,  0.04972410,  -0.07360736,
-        0.12192874,  -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486,
-        -0.16770349, -0.07382569, 0.06230322,  0.02215859,  -0.05212611, -0.03934773, -0.04748865, 0.18134241,
-        -0.01965741, -0.02202452, 0.01973994,  0.01575558,  0.04300199,  0.01436110,  -0.00198062, -0.09065692,
-        -0.02923042, -0.00748686, 0.00717049,  0.02638642,  0.12174864,  -0.12973398, -0.11872391, -0.00549398,
-        -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021,  -0.14687485, -0.05033565,
-        0.03818212,  -0.04939338, 0.00961064,  -0.07407621, -0.09624685, 0.05594898,  -0.04948713, -0.01305631,
-        -0.03779668, -0.01469170, 0.12346989,  0.02082030,  -0.03449103, -0.06029151, -0.09300473, -0.16308543,
-        -0.02370042, 0.01066893,  -0.06523034, 0.00497636,  0.01933458,  -0.00900802, 0.00430878,  -0.13999483,
-        -0.02377289, 0.01760014,  0.03896973,  0.00831112,  0.15634246,  -0.11109130, -0.11997811, -0.02304414,
-        -0.01989413, -0.12763791, -0.05698400, 0.17125534,  0.00499324,  -0.02953288, 0.09178342,  -0.05001877,
-        0.16157132,  -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476,
-        -0.07525793, -0.00207180, 0.03993115,  -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153,
-        -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031,
+        -0.06044213f, -0.14845914f, 0.02457689f,  0.02091519f,  0.09514004f,  -0.10280035f, -0.02087995f, -0.03323204f,
+        -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f,  0.02176395f,  0.04972410f,  -0.07360736f,
+        0.12192874f,  -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f,
+        -0.16770349f, -0.07382569f, 0.06230322f,  0.02215859f,  -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f,
+        -0.01965741f, -0.02202452f, 0.01973994f,  0.01575558f,  0.04300199f,  0.01436110f,  -0.00198062f, -0.09065692f,
+        -0.02923042f, -0.00748686f, 0.00717049f,  0.02638642f,  0.12174864f,  -0.12973398f, -0.11872391f, -0.00549398f,
+        -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f,  -0.14687485f, -0.05033565f,
+        0.03818212f,  -0.04939338f, 0.00961064f,  -0.07407621f, -0.09624685f, 0.05594898f,  -0.04948713f, -0.01305631f,
+        -0.03779668f, -0.01469170f, 0.12346989f,  0.02082030f,  -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f,
+        -0.02370042f, 0.01066893f,  -0.06523034f, 0.00497636f,  0.01933458f,  -0.00900802f, 0.00430878f,  -0.13999483f,
+        -0.02377289f, 0.01760014f,  0.03896973f,  0.00831112f,  0.15634246f,  -0.11109130f, -0.11997811f, -0.02304414f,
+        -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f,  0.00499324f,  -0.02953288f, 0.09178342f,  -0.05001877f,
+        0.16157132f,  -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f,
+        -0.07525793f, -0.00207180f, 0.03993115f,  -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f,
+        -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f,
     };
 
     std::vector<int> expected_mask_index = {
@@ -257,7 +265,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
     test_case.add_input<int>(segment_ids);
     test_case.add_expected_output<float>(expected_output);
     test_case.add_expected_output<int>(expected_mask_index);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) {
@@ -276,21 +284,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
         1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
     };
     std::vector<float> expected_output = {
-        -0.06044213, -0.14845914, 0.02457689,  0.02091519,  0.09514004,  -0.10280035, -0.02087995, -0.03323204,
-        -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531,  0.02176395,  0.04972410,  -0.07360736,
-        0.12192874,  -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486,
-        -0.16770349, -0.07382569, 0.06230322,  0.02215859,  -0.05212611, -0.03934773, -0.04748865, 0.18134241,
-        -0.01965741, -0.02202452, 0.01973994,  0.01575558,  0.04300199,  0.01436110,  -0.00198062, -0.09065692,
-        -0.02923042, -0.00748686, 0.00717049,  0.02638642,  0.12174864,  -0.12973398, -0.11872391, -0.00549398,
-        -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021,  -0.14687485, -0.05033565,
-        0.03818212,  -0.04939338, 0.00961064,  -0.07407621, -0.09624685, 0.05594898,  -0.04948713, -0.01305631,
-        -0.03779668, -0.01469170, 0.12346989,  0.02082030,  -0.03449103, -0.06029151, -0.09300473, -0.16308543,
-        -0.02370042, 0.01066893,  -0.06523034, 0.00497636,  0.01933458,  -0.00900802, 0.00430878,  -0.13999483,
-        -0.02377289, 0.01760014,  0.03896973,  0.00831112,  0.15634246,  -0.11109130, -0.11997811, -0.02304414,
-        -0.01989413, -0.12763791, -0.05698400, 0.17125534,  0.00499324,  -0.02953288, 0.09178342,  -0.05001877,
-        0.16157132,  -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476,
-        -0.07525793, -0.00207180, 0.03993115,  -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153,
-        -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031,
+        -0.06044213f, -0.14845914f, 0.02457689f,  0.02091519f,  0.09514004f,  -0.10280035f, -0.02087995f, -0.03323204f,
+        -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f,  0.02176395f,  0.04972410f,  -0.07360736f,
+        0.12192874f,  -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f,
+        -0.16770349f, -0.07382569f, 0.06230322f,  0.02215859f,  -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f,
+        -0.01965741f, -0.02202452f, 0.01973994f,  0.01575558f,  0.04300199f,  0.01436110f,  -0.00198062f, -0.09065692f,
+        -0.02923042f, -0.00748686f, 0.00717049f,  0.02638642f,  0.12174864f,  -0.12973398f, -0.11872391f, -0.00549398f,
+        -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f,  -0.14687485f, -0.05033565f,
+        0.03818212f,  -0.04939338f, 0.00961064f,  -0.07407621f, -0.09624685f, 0.05594898f,  -0.04948713f, -0.01305631f,
+        -0.03779668f, -0.01469170f, 0.12346989f,  0.02082030f,  -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f,
+        -0.02370042f, 0.01066893f,  -0.06523034f, 0.00497636f,  0.01933458f,  -0.00900802f, 0.00430878f,  -0.13999483f,
+        -0.02377289f, 0.01760014f,  0.03896973f,  0.00831112f,  0.15634246f,  -0.11109130f, -0.11997811f, -0.02304414f,
+        -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f,  0.00499324f,  -0.02953288f, 0.09178342f,  -0.05001877f,
+        0.16157132f,  -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f,
+        -0.07525793f, -0.00207180f, 0.03993115f,  -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f,
+        -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f,
     };
     std::vector<int> expected_mask_index = {
         5,
@@ -304,7 +312,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
     test_case.add_input<int>(mask);
     test_case.add_expected_output<float>(expected_output);
     test_case.add_expected_output<int>(expected_mask_index);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) {
@@ -320,70 +328,71 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes
         0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1,
     };
     std::vector<float> word_embeddings = {
-        0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407,
-        0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763,
-        0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499,
-        0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352,
-        0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760,
-        0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412,
+        0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f,
+        0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f,
+        0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f,
+        0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f,
+        0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f,
+        0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f,
+        0.84234208f, 0.64717412f,
     };
     std::vector<float> position_embeddings = {
-        0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840,
-        0.70373726, 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442,
-        0.68005556, 0.44919774, 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627,
-        0.31321833, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857,
-        0.47486752, 0.47013220, 0.71607453, 0.28799102, 0.38346222, 0.74916983, 0.87845218, 0.10286336,
+        0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f,
+        0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f,
+        0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f,
+        0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f,
+        0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f,
     };
     std::vector<float> segment_embeddings = {
-        0.09237389,
-        0.35404667,
-        0.55181628,
-        0.03362509,
-        0.96896178,
-        0.32099724,
-        0.22126268,
-        0.14126390,
-        0.09725992,
-        0.98404223,
-        0.26034093,
-        0.53702253,
-        0.44792616,
-        0.09956909,
-        0.35231167,
+        0.09237389f,
+        0.35404667f,
+        0.55181628f,
+        0.03362509f,
+        0.96896178f,
+        0.32099724f,
+        0.22126268f,
+        0.14126390f,
+        0.09725992f,
+        0.98404223f,
+        0.26034093f,
+        0.53702253f,
+        0.44792616f,
+        0.09956909f,
+        0.35231167f,
     };
     std::vector<float> gamma = {
-        0.46924916,
-        0.84114015,
-        0.90464777,
-        0.03755938,
-        0.50831544,
+        0.46924916f,
+        0.84114015f,
+        0.90464777f,
+        0.03755938f,
+        0.50831544f,
     };
     std::vector<float> beta = {
-        0.16684751,
-        0.77905101,
-        0.86493331,
-        0.41139671,
-        0.13997258,
+        0.16684751f,
+        0.77905101f,
+        0.86493331f,
+        0.41139671f,
+        0.13997258f,
     };
     std::vector<int> mask = {
         1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
     };
     std::vector<float> expected_output = {
-        -0.04089922, 0.35108989,  0.30442458,  0.39546335,  1.15422225,  0.10419128,  -0.19301927, 0.01070970,
-        0.43977541,  0.89119899,  -0.51436460, 1.99256825,  1.41077507,  0.38642293,  0.17583044,  0.03320138,
-        1.16508031,  -0.24356931, 0.47440714,  -0.17844005, 0.20463173,  1.90038323,  1.14138567,  0.34504607,
-        0.16403235,  -0.24976699, 0.29362509,  0.34502214,  0.41751838,  1.09390712,  0.12354189,  1.83025289,
-        1.05569196,  0.34413773,  0.35469764,  -0.69760042, 0.76338542,  1.75443077,  0.44126555,  0.18181801,
-        0.73277575,  0.45443264,  0.17068321,  0.36591727,  0.72869974,  -0.56090516, 0.14415455,  1.47314119,
-        0.42908576,  0.73084539,  -0.22373237, 2.26550221,  0.05606699,  0.39417523,  0.35234636,  0.78569502,
-        0.77521765,  -0.65131050, 0.40168875,  0.45527256,  0.38715565,  0.98521245,  2.21446753,  0.36345237,
-        -0.33269632, 0.36558092,  1.36846578,  1.37523413,  0.33698002,  0.28889543,  -0.40639281, 1.01643157,
-        0.59668219,  0.39197800,  1.03101778,  0.02551098,  -0.03612846, -0.01371557, 0.43444607,  0.96746695,
-        0.60583955,  -0.10362893, 0.40574494,  0.38046724,  0.87445319,  -0.00880148, -0.15437943, 0.08118075,
-        0.44650543,  0.85956848,  -0.27865338, 2.10837507,  0.04798460,  0.43948367,  -0.10185169, 0.19978794,
-        1.32323360,  1.20525467,  0.44288942,  -0.84200430, 0.52563053,  0.69949460,  0.73987913,  0.34668452,
-        0.74545687,  0.57696682,  0.22452033,  -0.27099937, 0.39649010,  0.87083614,  -0.18965788, 0.58206403,
-        -0.08108193, 0.42067638,  1.05117214,  -0.34287399, 0.20424896,  0.27994895,  0.46011117,  0.70890665,
+        -0.04089922f, 0.35108989f,  0.30442458f,  0.39546335f,  1.15422225f,  0.10419128f,  -0.19301927f, 0.01070970f,
+        0.43977541f,  0.89119899f,  -0.51436460f, 1.99256825f,  1.41077507f,  0.38642293f,  0.17583044f,  0.03320138f,
+        1.16508031f,  -0.24356931f, 0.47440714f,  -0.17844005f, 0.20463173f,  1.90038323f,  1.14138567f,  0.34504607f,
+        0.16403235f,  -0.24976699f, 0.29362509f,  0.34502214f,  0.41751838f,  1.09390712f,  0.12354189f,  1.83025289f,
+        1.05569196f,  0.34413773f,  0.35469764f,  -0.69760042f, 0.76338542f,  1.75443077f,  0.44126555f,  0.18181801f,
+        0.73277575f,  0.45443264f,  0.17068321f,  0.36591727f,  0.72869974f,  -0.56090516f, 0.14415455f,  1.47314119f,
+        0.42908576f,  0.73084539f,  -0.22373237f, 2.26550221f,  0.05606699f,  0.39417523f,  0.35234636f,  0.78569502f,
+        0.77521765f,  -0.65131050f, 0.40168875f,  0.45527256f,  0.38715565f,  0.98521245f,  2.21446753f,  0.36345237f,
+        -0.33269632f, 0.36558092f,  1.36846578f,  1.37523413f,  0.33698002f,  0.28889543f,  -0.40639281f, 1.01643157f,
+        0.59668219f,  0.39197800f,  1.03101778f,  0.02551098f,  -0.03612846f, -0.01371557f, 0.43444607f,  0.96746695f,
+        0.60583955f,  -0.10362893f, 0.40574494f,  0.38046724f,  0.87445319f,  -0.00880148f, -0.15437943f, 0.08118075f,
+        0.44650543f,  0.85956848f,  -0.27865338f, 2.10837507f,  0.04798460f,  0.43948367f,  -0.10185169f, 0.19978794f,
+        1.32323360f,  1.20525467f,  0.44288942f,  -0.84200430f, 0.52563053f,  0.69949460f,  0.73987913f,  0.34668452f,
+        0.74545687f,  0.57696682f,  0.22452033f,  -0.27099937f, 0.39649010f,  0.87083614f,  -0.18965788f, 0.58206403f,
+        -0.08108193f, 0.42067638f,  1.05117214f,  -0.34287399f, 0.20424896f,  0.27994895f,  0.46011117f,  0.70890665f,
     };
     std::vector<int> expected_mask_index = {
         6,
@@ -402,7 +411,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes
     test_case.add_input<int>(Shape{3, 8}, mask);
     test_case.add_expected_output<float>(Shape{3, 8, 5}, expected_output);
     test_case.add_expected_output<int>(Shape{3}, expected_mask_index);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_pos_embed_len) {
@@ -418,70 +427,72 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p
         0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1,
     };
     std::vector<float> word_embeddings = {
-        0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407,
-        0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763,
-        0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499,
-        0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352,
-        0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760,
-        0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412,
+        0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f,
+        0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f,
+        0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f,
+        0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f,
+        0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f,
+        0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f,
+        0.84234208f, 0.64717412f,
     };
     std::vector<float> position_embeddings = {
-        0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, 0.70373726,
-        0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, 0.68005556, 0.44919774,
-        0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.96541619, 0.58846509,
-        0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.28799102,
-        0.38346222, 0.74916983, 0.87845218, 0.10286336, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046,
+        0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f,
+        0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f,
+        0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f,
+        0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f,
+        0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f,
+        0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f,
     };
     std::vector<float> segment_embeddings = {
-        0.09237389,
-        0.35404667,
-        0.55181628,
-        0.03362509,
-        0.96896178,
-        0.32099724,
-        0.22126268,
-        0.14126390,
-        0.09725992,
-        0.98404223,
-        0.26034093,
-        0.53702253,
-        0.44792616,
-        0.09956909,
-        0.35231167,
+        0.09237389f,
+        0.35404667f,
+        0.55181628f,
+        0.03362509f,
+        0.96896178f,
+        0.32099724f,
+        0.22126268f,
+        0.14126390f,
+        0.09725992f,
+        0.98404223f,
+        0.26034093f,
+        0.53702253f,
+        0.44792616f,
+        0.09956909f,
+        0.35231167f,
     };
     std::vector<float> gamma = {
-        0.46924916,
-        0.84114015,
-        0.90464777,
-        0.03755938,
-        0.50831544,
+        0.46924916f,
+        0.84114015f,
+        0.90464777f,
+        0.03755938f,
+        0.50831544f,
     };
     std::vector<float> beta = {
-        0.16684751,
-        0.77905101,
-        0.86493331,
-        0.41139671,
-        0.13997258,
+        0.16684751f,
+        0.77905101f,
+        0.86493331f,
+        0.41139671f,
+        0.13997258f,
     };
     std::vector<int> mask = {
         1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
     };
     std::vector<float> expected_output = {
-        -0.04089922, 0.35108989,  0.30442458,  0.39546335,  1.15422225,  0.10419128,  -0.19301927, 0.01070970,
-        0.43977541,  0.89119899,  -0.51436460, 1.99256825,  1.41077507,  0.38642293,  0.17583044,  0.03320138,
-        1.16508031,  -0.24356931, 0.47440714,  -0.17844005, 0.20463173,  1.90038323,  1.14138567,  0.34504607,
-        0.16403235,  -0.24976699, 0.29362509,  0.34502214,  0.41751838,  1.09390712,  0.12354189,  1.83025289,
-        1.05569196,  0.34413773,  0.35469764,  -0.69760042, 0.76338542,  1.75443077,  0.44126555,  0.18181801,
-        0.73277575,  0.45443264,  0.17068321,  0.36591727,  0.72869974,  -0.56090516, 0.14415455,  1.47314119,
-        0.42908576,  0.73084539,  -0.22373237, 2.26550221,  0.05606699,  0.39417523,  0.35234636,  0.78569502,
-        0.77521765,  -0.65131050, 0.40168875,  0.45527256,  0.38715565,  0.98521245,  2.21446753,  0.36345237,
-        -0.33269632, 0.36558092,  1.36846578,  1.37523413,  0.33698002,  0.28889543,  -0.40639281, 1.01643157,
-        0.59668219,  0.39197800,  1.03101778,  0.02551098,  -0.03612846, -0.01371557, 0.43444607,  0.96746695,
-        0.60583955,  -0.10362893, 0.40574494,  0.38046724,  0.87445319,  -0.00880148, -0.15437943, 0.08118075,
-        0.44650543,  0.85956848,  -0.27865338, 2.10837507,  0.04798460,  0.43948367,  -0.10185169, 0.19978794,
-        1.32323360,  1.20525467,  0.44288942,  -0.84200430, 0.52563053,  0.69949460,  0.73987913,  0.34668452,
-        0.74545687,  0.57696682,  0.22452033,  -0.27099937, 0.39649010,  0.87083614,  -0.18965788, 0.58206403,
-        -0.08108193, 0.42067638,  1.05117214,  -0.34287399, 0.20424896,  0.27994895,  0.46011117,  0.70890665,
+        -0.04089922f, 0.35108989f,  0.30442458f,  0.39546335f,  1.15422225f,  0.10419128f,  -0.19301927f, 0.01070970f,
+        0.43977541f,  0.89119899f,  -0.51436460f, 1.99256825f,  1.41077507f,  0.38642293f,  0.17583044f,  0.03320138f,
+        1.16508031f,  -0.24356931f, 0.47440714f,  -0.17844005f, 0.20463173f,  1.90038323f,  1.14138567f,  0.34504607f,
+        0.16403235f,  -0.24976699f, 0.29362509f,  0.34502214f,  0.41751838f,  1.09390712f,  0.12354189f,  1.83025289f,
+        1.05569196f,  0.34413773f,  0.35469764f,  -0.69760042f, 0.76338542f,  1.75443077f,  0.44126555f,  0.18181801f,
+        0.73277575f,  0.45443264f,  0.17068321f,  0.36591727f,  0.72869974f,  -0.56090516f, 0.14415455f,  1.47314119f,
+        0.42908576f,  0.73084539f,  -0.22373237f, 2.26550221f,  0.05606699f,  0.39417523f,  0.35234636f,  0.78569502f,
+        0.77521765f,  -0.65131050f, 0.40168875f,  0.45527256f,  0.38715565f,  0.98521245f,  2.21446753f,  0.36345237f,
+        -0.33269632f, 0.36558092f,  1.36846578f,  1.37523413f,  0.33698002f,  0.28889543f,  -0.40639281f, 1.01643157f,
+        0.59668219f,  0.39197800f,  1.03101778f,  0.02551098f,  -0.03612846f, -0.01371557f, 0.43444607f,  0.96746695f,
+        0.60583955f,  -0.10362893f, 0.40574494f,  0.38046724f,  0.87445319f,  -0.00880148f, -0.15437943f, 0.08118075f,
+        0.44650543f,  0.85956848f,  -0.27865338f, 2.10837507f,  0.04798460f,  0.43948367f,  -0.10185169f, 0.19978794f,
+        1.32323360f,  1.20525467f,  0.44288942f,  -0.84200430f, 0.52563053f,  0.69949460f,  0.73987913f,  0.34668452f,
+        0.74545687f,  0.57696682f,  0.22452033f,  -0.27099937f, 0.39649010f,  0.87083614f,  -0.18965788f, 0.58206403f,
+        -0.08108193f, 0.42067638f,  1.05117214f,  -0.34287399f, 0.20424896f,  0.27994895f,  0.46011117f,  0.70890665f,
     };
     std::vector<int> expected_mask_index = {
         6,
@@ -500,7 +511,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p
     test_case.add_input<int>(Shape{3, 8}, mask);
     test_case.add_expected_output<float>(Shape{3, 8, 5}, expected_output);
     test_case.add_expected_output<int>(Shape{3}, expected_mask_index);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ids) {
@@ -516,50 +527,52 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_
         0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1,
     };
     std::vector<float> word_embeddings = {
-        0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407,
-        0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763,
-        0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499,
-        0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352,
-        0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760,
-        0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412,
+        0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f,
+        0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f,
+        0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f,
+        0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f,
+        0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f,
+        0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f,
+        0.84234208f, 0.64717412f,
     };
     std::vector<float> position_embeddings = {
-        0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.43328807, 0.75610667, 0.39609829, 0.89603841,
-        0.63892108, 0.36980811, 0.14644176, 0.56961840, 0.70373726, 0.28847644, 0.89155442, 0.68005556, 0.44919774,
-        0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.28799102, 0.38346222,
-        0.74916983, 0.87845218, 0.10286336, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930,
-        0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046,
+        0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.43328807f, 0.75610667f, 0.39609829f,
+        0.89603841f, 0.63892108f, 0.36980811f, 0.14644176f, 0.56961840f, 0.70373726f, 0.28847644f, 0.89155442f,
+        0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f,
+        0.31321833f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, 0.96541619f, 0.58846509f,
+        0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, 0.47486752f, 0.47013220f, 0.71607453f,
+        0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f,
     };
     std::vector<float> segment_embeddings = {
-        0.09237389,
-        0.35404667,
-        0.55181628,
-        0.03362509,
-        0.96896178,
-        0.32099724,
-        0.22126268,
-        0.14126390,
-        0.09725992,
-        0.98404223,
-        0.26034093,
-        0.53702253,
-        0.44792616,
-        0.09956909,
-        0.35231167,
+        0.09237389f,
+        0.35404667f,
+        0.55181628f,
+        0.03362509f,
+        0.96896178f,
+        0.32099724f,
+        0.22126268f,
+        0.14126390f,
+        0.09725992f,
+        0.98404223f,
+        0.26034093f,
+        0.53702253f,
+        0.44792616f,
+        0.09956909f,
+        0.35231167f,
     };
     std::vector<float> gamma = {
-        0.46924916,
-        0.84114015,
-        0.90464777,
-        0.03755938,
-        0.50831544,
+        0.46924916f,
+        0.84114015f,
+        0.90464777f,
+        0.03755938f,
+        0.50831544f,
     };
     std::vector<float> beta = {
-        0.16684751,
-        0.77905101,
-        0.86493331,
-        0.41139671,
-        0.13997258,
+        0.16684751f,
+        0.77905101f,
+        0.86493331f,
+        0.41139671f,
+        0.13997258f,
     };
     std::vector<int> mask = {
         1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
@@ -568,21 +581,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_
         0, 2, 1, 3, 4, 6, 7, 5, 8, 2, 1, 3, 4, 6, 7, 5, 0, 2, 1, 3, 4, 6, 7, 5,
     };
     std::vector<float> expected_output = {
-        -0.04089922, 0.35108989,  0.30442458,  0.39546335,  1.15422225,  0.10419128,  -0.19301927, 0.01070970,
-        0.43977541,  0.89119899,  -0.51436460, 1.99256825,  1.41077507,  0.38642293,  0.17583044,  0.03320138,
-        1.16508031,  -0.24356931, 0.47440714,  -0.17844005, 0.20463173,  1.90038323,  1.14138567,  0.34504607,
-        0.16403235,  -0.24976699, 0.29362509,  0.34502214,  0.41751838,  1.09390712,  0.12354189,  1.83025289,
-        1.05569196,  0.34413773,  0.35469764,  -0.69760042, 0.76338542,  1.75443077,  0.44126555,  0.18181801,
-        0.73277575,  0.45443264,  0.17068321,  0.36591727,  0.72869974,  -0.56090516, 0.14415455,  1.47314119,
-        0.42908576,  0.73084539,  -0.22373237, 2.26550221,  0.05606699,  0.39417523,  0.35234636,  0.78569502,
-        0.77521765,  -0.65131050, 0.40168875,  0.45527256,  0.38715565,  0.98521245,  2.21446753,  0.36345237,
-        -0.33269632, 0.36558092,  1.36846578,  1.37523413,  0.33698002,  0.28889543,  -0.40639281, 1.01643157,
-        0.59668219,  0.39197800,  1.03101778,  0.02551098,  -0.03612846, -0.01371557, 0.43444607,  0.96746695,
-        0.60583955,  -0.10362893, 0.40574494,  0.38046724,  0.87445319,  -0.00880148, -0.15437943, 0.08118075,
-        0.44650543,  0.85956848,  -0.27865338, 2.10837507,  0.04798460,  0.43948367,  -0.10185169, 0.19978794,
-        1.32323360,  1.20525467,  0.44288942,  -0.84200430, 0.52563053,  0.69949460,  0.73987913,  0.34668452,
-        0.74545687,  0.57696682,  0.22452033,  -0.27099937, 0.39649010,  0.87083614,  -0.18965788, 0.58206403,
-        -0.08108193, 0.42067638,  1.05117214,  -0.34287399, 0.20424896,  0.27994895,  0.46011117,  0.70890665,
+        -0.04089922f, 0.35108989f,  0.30442458f,  0.39546335f,  1.15422225f,  0.10419128f,  -0.19301927f, 0.01070970f,
+        0.43977541f,  0.89119899f,  -0.51436460f, 1.99256825f,  1.41077507f,  0.38642293f,  0.17583044f,  0.03320138f,
+        1.16508031f,  -0.24356931f, 0.47440714f,  -0.17844005f, 0.20463173f,  1.90038323f,  1.14138567f,  0.34504607f,
+        0.16403235f,  -0.24976699f, 0.29362509f,  0.34502214f,  0.41751838f,  1.09390712f,  0.12354189f,  1.83025289f,
+        1.05569196f,  0.34413773f,  0.35469764f,  -0.69760042f, 0.76338542f,  1.75443077f,  0.44126555f,  0.18181801f,
+        0.73277575f,  0.45443264f,  0.17068321f,  0.36591727f,  0.72869974f,  -0.56090516f, 0.14415455f,  1.47314119f,
+        0.42908576f,  0.73084539f,  -0.22373237f, 2.26550221f,  0.05606699f,  0.39417523f,  0.35234636f,  0.78569502f,
+        0.77521765f,  -0.65131050f, 0.40168875f,  0.45527256f,  0.38715565f,  0.98521245f,  2.21446753f,  0.36345237f,
+        -0.33269632f, 0.36558092f,  1.36846578f,  1.37523413f,  0.33698002f,  0.28889543f,  -0.40639281f, 1.01643157f,
+        0.59668219f,  0.39197800f,  1.03101778f,  0.02551098f,  -0.03612846f, -0.01371557f, 0.43444607f,  0.96746695f,
+        0.60583955f,  -0.10362893f, 0.40574494f,  0.38046724f,  0.87445319f,  -0.00880148f, -0.15437943f, 0.08118075f,
+        0.44650543f,  0.85956848f,  -0.27865338f, 2.10837507f,  0.04798460f,  0.43948367f,  -0.10185169f, 0.19978794f,
+        1.32323360f,  1.20525467f,  0.44288942f,  -0.84200430f, 0.52563053f,  0.69949460f,  0.73987913f,  0.34668452f,
+        0.74545687f,  0.57696682f,  0.22452033f,  -0.27099937f, 0.39649010f,  0.87083614f,  -0.18965788f, 0.58206403f,
+        -0.08108193f, 0.42067638f,  1.05117214f,  -0.34287399f, 0.20424896f,  0.27994895f,  0.46011117f,  0.70890665f,
     };
     std::vector<int> expected_mask_index = {
         6,
@@ -602,7 +615,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_
     test_case.add_input<int>(Shape{3, 8}, position_ids);
     test_case.add_expected_output<float>(Shape{3, 8, 5}, expected_output);
     test_case.add_expected_output<int>(Shape{3}, expected_mask_index);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) {
@@ -612,20 +625,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.91475844, 0.91523546, 0.82536930, 0.37491974, 0.22384071, 0.05941105, 0.01902100, 0.70131350,
-        0.09603709, 0.44200060, 0.53106076, 0.79464376, 0.35469049, 0.25225943, 0.25179818, 0.29592562,
-        0.24836586, 0.65088797, 0.93126643, 0.67980725, 0.85708112, 0.59808528, 0.46321425, 0.19301885,
+        0.91475844f, 0.91523546f, 0.82536930f, 0.37491974f, 0.22384071f, 0.05941105f, 0.01902100f, 0.70131350f,
+        0.09603709f, 0.44200060f, 0.53106076f, 0.79464376f, 0.35469049f, 0.25225943f, 0.25179818f, 0.29592562f,
+        0.24836586f, 0.65088797f, 0.93126643f, 0.67980725f, 0.85708112f, 0.59808528f, 0.46321425f, 0.19301885f,
     };
     std::vector<float> output = {
-        0.07966283, 0.10783536, -0.19424979, 0.54514766, 0.07965867, 0.10783093, -0.19424866, 0.54510003,
-        0.07965846, 0.10783067, -0.19424550, 0.54509139, 0.07966217, 0.10783640, -0.19424903, 0.54512268,
-        0.06940663, 0.10962760, -0.19698445, 0.53492010, 0.06940675, 0.10962828, -0.19698484, 0.53492326,
-        0.06940714, 0.10963022, -0.19698712, 0.53494006, 0.06940673, 0.10962812, -0.19698519, 0.53492481,
+        0.07966283f, 0.10783536f, -0.19424979f, 0.54514766f, 0.07965867f, 0.10783093f, -0.19424866f, 0.54510003f,
+        0.07965846f, 0.10783067f, -0.19424550f, 0.54509139f, 0.07966217f, 0.10783640f, -0.19424903f, 0.54512268f,
+        0.06940663f, 0.10962760f, -0.19698445f, 0.53492010f, 0.06940675f, 0.10962828f, -0.19698484f, 0.53492326f,
+        0.06940714f, 0.10963022f, -0.19698712f, 0.53494006f, 0.06940673f, 0.10962812f, -0.19698519f, 0.53492481f,
     };
 
     test_case.add_input<float>(input);
     test_case.add_expected_output<float>(output);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) {
@@ -636,24 +649,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.56477863, 0.60309958, 0.35158035, 0.03123519, 0.81918180, 0.76905495, 0.47219241, 0.72016627,
-        0.59377003, 0.91380632, 0.56797302, 0.34846428, 0.83839595, 0.16394103, 0.34676281, 0.09161621,
-        0.45562279, 0.23317528, 0.37197968, 0.06727808, 0.08500192, 0.84915495, 0.68266946, 0.00227691,
+        0.56477863f, 0.60309958f, 0.35158035f, 0.03123519f, 0.81918180f, 0.76905495f, 0.47219241f, 0.72016627f,
+        0.59377003f, 0.91380632f, 0.56797302f, 0.34846428f, 0.83839595f, 0.16394103f, 0.34676281f, 0.09161621f,
+        0.45562279f, 0.23317528f, 0.37197968f, 0.06727808f, 0.08500192f, 0.84915495f, 0.68266946f, 0.00227691f,
     };
     std::vector<float> output = {
-        -0.59370947, -0.30300471, 0.12048547, -0.09029539, 0.08041390, 0.10250041, -0.19381392, 0.55126983,
-        -0.59370828, -0.30301332, 0.12049319, -0.09029691, 0.08041921, 0.10250521, -0.19381438, 0.55127531,
-        -0.59370869, -0.30301058, 0.12049074, -0.09029643, 0.08041564, 0.10250199, -0.19381410, 0.55127168,
-        -0.59370929, -0.30300608, 0.12048667, -0.09029562, 0.08041184, 0.10249855, -0.19381374, 0.55126774,
-        -0.59681994, -0.26327702, 0.07638434, -0.06311120, 0.06671587, 0.10916986, -0.19412412, 0.51977092,
-        -0.59682053, -0.26328400, 0.07638102, -0.06311222, 0.06671817, 0.10917170, -0.19412397, 0.51977223,
-        -0.59682077, -0.26328647, 0.07637984, -0.06311259, 0.06671739, 0.10917108, -0.19412403, 0.51977175,
-        -0.59682101, -0.26328778, 0.07637922, -0.06311278, 0.06671065, 0.10916568, -0.19412443, 0.51976782,
+        -0.59370947f, -0.30300471f, 0.12048547f, -0.09029539f, 0.08041390f, 0.10250041f, -0.19381392f, 0.55126983f,
+        -0.59370828f, -0.30301332f, 0.12049319f, -0.09029691f, 0.08041921f, 0.10250521f, -0.19381438f, 0.55127531f,
+        -0.59370869f, -0.30301058f, 0.12049074f, -0.09029643f, 0.08041564f, 0.10250199f, -0.19381410f, 0.55127168f,
+        -0.59370929f, -0.30300608f, 0.12048667f, -0.09029562f, 0.08041184f, 0.10249855f, -0.19381374f, 0.55126774f,
+        -0.59681994f, -0.26327702f, 0.07638434f, -0.06311120f, 0.06671587f, 0.10916986f, -0.19412412f, 0.51977092f,
+        -0.59682053f, -0.26328400f, 0.07638102f, -0.06311222f, 0.06671817f, 0.10917170f, -0.19412397f, 0.51977223f,
+        -0.59682077f, -0.26328647f, 0.07637984f, -0.06311259f, 0.06671739f, 0.10917108f, -0.19412403f, 0.51977175f,
+        -0.59682101f, -0.26328778f, 0.07637922f, -0.06311278f, 0.06671065f, 0.10916568f, -0.19412443f, 0.51976782f,
     };
 
     test_case.add_input<float>(input);
     test_case.add_expected_output<float>(output);
-    test_case.run_with_tolerance_as_fp(1e-4);
+    test_case.run_with_tolerance_as_fp(1e-4f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) {
@@ -664,34 +677,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.89578921, 0.42421508, 0.35630688, 0.77461642, 0.65753633, 0.09723099, 0.62597734, 0.72117692,
-        0.57636845, 0.17104276, 0.13245547, 0.59879875, 0.15624641, 0.44903454, 0.50483286, 0.92975074,
-        0.36934483, 0.29919949, 0.57185954, 0.83036488, 0.08384345, 0.20378476, 0.74684393, 0.46716982,
+        0.89578921f, 0.42421508f, 0.35630688f, 0.77461642f, 0.65753633f, 0.09723099f, 0.62597734f, 0.72117692f,
+        0.57636845f, 0.17104276f, 0.13245547f, 0.59879875f, 0.15624641f, 0.44903454f, 0.50483286f, 0.92975074f,
+        0.36934483f, 0.29919949f, 0.57185954f, 0.83036488f, 0.08384345f, 0.20378476f, 0.74684393f, 0.46716982f,
     };
     std::vector<float> output = {
-        0.05604819, 0.09000472, -0.19437021, 0.52487367, 0.06211422, 0.08740954, -0.19139624, 0.52762908,
-        0.06708897, 0.08992603, -0.19214047, 0.53631783, 0.06896879, 0.10248676, -0.19485690, 0.53477794,
-        0.08577005, 0.12807365, -0.19762954, 0.54432857, 0.06929274, 0.10893210, -0.19599904, 0.53184807,
-        0.07348281, 0.10215081, -0.19280069, 0.53552240, 0.07861833, 0.10517240, -0.19285706, 0.54126489,
+        0.05604819f, 0.09000472f, -0.19437021f, 0.52487367f, 0.06211422f, 0.08740954f, -0.19139624f, 0.52762908f,
+        0.06708897f, 0.08992603f, -0.19214047f, 0.53631783f, 0.06896879f, 0.10248676f, -0.19485690f, 0.53477794f,
+        0.08577005f, 0.12807365f, -0.19762954f, 0.54432857f, 0.06929274f, 0.10893210f, -0.19599904f, 0.53184807f,
+        0.07348281f, 0.10215081f, -0.19280069f, 0.53552240f, 0.07861833f, 0.10517240f, -0.19285706f, 0.54126489f,
     };
     std::vector<float> present = {
-        -0.60427380, -0.25958878, -0.59609234, -0.24055196, -0.59613681, -0.30088067, -0.59633607, -0.33270463,
-        0.06899665,  -0.09284544, 0.08059876,  -0.06146053, 0.11841078,  -0.10019838, 0.10605468,  -0.09273906,
-        -0.59036821, -0.32410735, -0.60532302, -0.25127757, -0.58926487, -0.25271094, -0.58640373, -0.31730092,
-        0.12509561,  -0.07968873, 0.06005794,  -0.08937149, 0.10523240,  -0.05083811, 0.14162725,  -0.07438751,
-        0.05604819,  0.09000472,  0.06819826,  0.08480665,  0.07700446,  0.09494394,  0.07459175,  0.14003153,
-        -0.19437021, 0.52487367,  -0.18843602, 0.53037173,  -0.19362189, 0.55360907,  -0.20299932, 0.53020388,
-        0.08577005,  0.12807365,  0.05276009,  0.08972625,  0.08190014,  0.08852972,  0.09400313,  0.11423884,
-        -0.19762954, 0.54432857,  -0.19435294, 0.51924801,  -0.18643703, 0.54280555,  -0.19302703, 0.55837619,
+        -0.60427380f, -0.25958878f, -0.59609234f, -0.24055196f, -0.59613681f, -0.30088067f, -0.59633607f, -0.33270463f,
+        0.06899665f,  -0.09284544f, 0.08059876f,  -0.06146053f, 0.11841078f,  -0.10019838f, 0.10605468f,  -0.09273906f,
+        -0.59036821f, -0.32410735f, -0.60532302f, -0.25127757f, -0.58926487f, -0.25271094f, -0.58640373f, -0.31730092f,
+        0.12509561f,  -0.07968873f, 0.06005794f,  -0.08937149f, 0.10523240f,  -0.05083811f, 0.14162725f,  -0.07438751f,
+        0.05604819f,  0.09000472f,  0.06819826f,  0.08480665f,  0.07700446f,  0.09494394f,  0.07459175f,  0.14003153f,
+        -0.19437021f, 0.52487367f,  -0.18843602f, 0.53037173f,  -0.19362189f, 0.55360907f,  -0.20299932f, 0.53020388f,
+        0.08577005f,  0.12807365f,  0.05276009f,  0.08972625f,  0.08190014f,  0.08852972f,  0.09400313f,  0.11423884f,
+        -0.19762954f, 0.54432857f,  -0.19435294f, 0.51924801f,  -0.18643703f, 0.54280555f,  -0.19302703f, 0.55837619f,
     };
 
     test_case.add_input<float>(input);
     test_case.add_expected_output<float>(output);
     test_case.add_expected_output<float>(present);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1f) {
     const auto function =
         onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
                                                             SERIALIZED_ZOO,
@@ -699,29 +712,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.02841483, 0.47845092, 0.14633700, 0.54597300, 0.40160629, 0.55281311, 0.14931096, 0.64483738,
-        0.96559167, 0.05262021, 0.12391864, 0.20093553, 0.74290562, 0.19367455, 0.19253619, 0.41593507,
-        0.91188699, 0.61606920, 0.72673517, 0.86981291, 0.19963337, 0.22747350, 0.34308898, 0.57267183,
+        0.02841483f, 0.47845092f, 0.14633700f, 0.54597300f, 0.40160629f, 0.55281311f, 0.14931096f, 0.64483738f,
+        0.96559167f, 0.05262021f, 0.12391864f, 0.20093553f, 0.74290562f, 0.19367455f, 0.19253619f, 0.41593507f,
+        0.91188699f, 0.61606920f, 0.72673517f, 0.86981291f, 0.19963337f, 0.22747350f, 0.34308898f, 0.57267183f,
     };
     std::vector<int> mask_index = {
         0,
         1,
     };
     std::vector<float> output = {
-        0.08298690, 0.12711772, -0.19757506, 0.54029012, 0.08298548, 0.12711433, -0.19757731, 0.54031140,
-        0.08298430, 0.12711799, -0.19757695, 0.54031777, 0.08298548, 0.12711433, -0.19757444, 0.54028159,
-        0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606,
-        0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606,
+        0.08298690f, 0.12711772f, -0.19757506f, 0.54029012f, 0.08298548f, 0.12711433f, -0.19757731f, 0.54031140f,
+        0.08298430f, 0.12711799f, -0.19757695f, 0.54031777f, 0.08298548f, 0.12711433f, -0.19757444f, 0.54028159f,
+        0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f,
+        0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f,
     };
     std::vector<float> present = {
-        -0.58437425, -0.29483819, -0.59927911, -0.30336475, -0.59104657, -0.37327260, -0.59078789, -0.29863101,
-        0.11751597,  -0.04114649, 0.09933343,  -0.09884726, 0.16250694,  -0.12028439, 0.09319257,  -0.05129660,
-        -0.60341775, -0.25221461, -0.58933026, -0.31912822, -0.59271193, -0.25470981, -0.59399152, -0.32643768,
-        0.05398282,  -0.07468132, 0.14743008,  -0.09407346, 0.10399222,  -0.06682440, 0.11632499,  -0.08986320,
-        0.09104910,  0.12973849,  0.06917210,  0.11059431,  0.09356256,  0.12594685,  0.07814129,  0.14221822,
-        -0.19329809, 0.53526556,  -0.19787431, 0.53673857,  -0.20045389, 0.57165766,  -0.19869246, 0.51749766,
-        0.05380550,  0.10459180,  0.09169570,  0.09892380,  0.07746917,  0.08042616,  0.07953370,  0.12909687,
-        -0.19593412, 0.50907606,  -0.19202785, 0.56904894,  -0.18689045, 0.54643762,  -0.19969353, 0.53976399,
+        -0.58437425f, -0.29483819f, -0.59927911f, -0.30336475f, -0.59104657f, -0.37327260f, -0.59078789f, -0.29863101f,
+        0.11751597f,  -0.04114649f, 0.09933343f,  -0.09884726f, 0.16250694f,  -0.12028439f, 0.09319257f,  -0.05129660f,
+        -0.60341775f, -0.25221461f, -0.58933026f, -0.31912822f, -0.59271193f, -0.25470981f, -0.59399152f, -0.32643768f,
+        0.05398282f,  -0.07468132f, 0.14743008f,  -0.09407346f, 0.10399222f,  -0.06682440f, 0.11632499f,  -0.08986320f,
+        0.09104910f,  0.12973849f,  0.06917210f,  0.11059431f,  0.09356256f,  0.12594685f,  0.07814129f,  0.14221822f,
+        -0.19329809f, 0.53526556f,  -0.19787431f, 0.53673857f,  -0.20045389f, 0.57165766f,  -0.19869246f, 0.51749766f,
+        0.05380550f,  0.10459180f,  0.09169570f,  0.09892380f,  0.07746917f,  0.08042616f,  0.07953370f,  0.12909687f,
+        -0.19593412f, 0.50907606f,  -0.19202785f, 0.56904894f,  -0.18689045f, 0.54643762f,  -0.19969353f, 0.53976399f,
     };
 
     test_case.add_input<float>(input);
@@ -739,9 +752,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.75259578, 0.81492645, 0.46713001, 0.29483622, 0.06768602, 0.95105755, 0.32065326, 0.52417183,
-        0.73136383, 0.77176476, 0.60997742, 0.64625764, 0.16311000, 0.89680773, 0.01331447, 0.42468646,
-        0.58711547, 0.00345124, 0.13053808, 0.46278623, 0.13786320, 0.65182054, 0.74864876, 0.81506181,
+        0.75259578f, 0.81492645f, 0.46713001f, 0.29483622f, 0.06768602f, 0.95105755f, 0.32065326f, 0.52417183f,
+        0.73136383f, 0.77176476f, 0.60997742f, 0.64625764f, 0.16311000f, 0.89680773f, 0.01331447f, 0.42468646f,
+        0.58711547f, 0.00345124f, 0.13053808f, 0.46278623f, 0.13786320f, 0.65182054f, 0.74864876f, 0.81506181f,
     };
     std::vector<int> mask_index = {
         3,
@@ -750,20 +763,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) {
         1,
     };
     std::vector<float> output = {
-        0.07524174, 0.11320241, -0.19909523, 0.54785377, 0.06825337, 0.13981669, -0.20774621, 0.53718704,
-        0.07531278, 0.12957911, -0.20330518, 0.54547405, 0.07531209, 0.12958010, -0.20330583, 0.54547292,
-        0.08900890, 0.11150353, -0.18931937, 0.53757656, 0.07915881, 0.10416336, -0.18914750, 0.52921104,
-        0.08285815, 0.11462159, -0.19115375, 0.53077918, 0.08285838, 0.11462225, -0.19115454, 0.53077984,
+        0.07524174f, 0.11320241f, -0.19909523f, 0.54785377f, 0.06825337f, 0.13981669f, -0.20774621f, 0.53718704f,
+        0.07531278f, 0.12957911f, -0.20330518f, 0.54547405f, 0.07531209f, 0.12958010f, -0.20330583f, 0.54547292f,
+        0.08900890f, 0.11150353f, -0.18931937f, 0.53757656f, 0.07915881f, 0.10416336f, -0.18914750f, 0.52921104f,
+        0.08285815f, 0.11462159f, -0.19115375f, 0.53077918f, 0.08285838f, 0.11462225f, -0.19115454f, 0.53077984f,
     };
     std::vector<float> present = {
-        -0.59630549, -0.28110915, -0.60274345, -0.36154836, -0.59437746, -0.33717164, -0.60134649, -0.29849592,
-        0.11169122,  -0.09345293, 0.11103803,  -0.13096604, 0.13131849,  -0.10597084, 0.10463209,  -0.11332577,
-        -0.57949269, -0.27235535, -0.58941406, -0.25372508, -0.58658379, -0.28718373, -0.59821802, -0.32433146,
-        0.13244939,  -0.02865628, 0.09308393,  -0.04083736, 0.10948701,  -0.04423397, 0.13060363,  -0.12316251,
-        0.07509718,  0.08392500,  0.06825337,  0.13981669,  0.08239168,  0.11931328,  0.06770951,  0.09240761,
-        -0.19074154, 0.55260652,  -0.20774621, 0.53718704,  -0.19888818, 0.55371630,  -0.19559640, 0.54754448,
-        0.09983939,  0.10603377,  0.07915881,  0.10416336,  0.08655046,  0.12505992,  0.07738422,  0.09509270,
-        -0.18571433, 0.55095005,  -0.18914750, 0.52921104,  -0.19315663, 0.53234470,  -0.19601485, 0.56322992,
+        -0.59630549f, -0.28110915f, -0.60274345f, -0.36154836f, -0.59437746f, -0.33717164f, -0.60134649f, -0.29849592f,
+        0.11169122f,  -0.09345293f, 0.11103803f,  -0.13096604f, 0.13131849f,  -0.10597084f, 0.10463209f,  -0.11332577f,
+        -0.57949269f, -0.27235535f, -0.58941406f, -0.25372508f, -0.58658379f, -0.28718373f, -0.59821802f, -0.32433146f,
+        0.13244939f,  -0.02865628f, 0.09308393f,  -0.04083736f, 0.10948701f,  -0.04423397f, 0.13060363f,  -0.12316251f,
+        0.07509718f,  0.08392500f,  0.06825337f,  0.13981669f,  0.08239168f,  0.11931328f,  0.06770951f,  0.09240761f,
+        -0.19074154f, 0.55260652f,  -0.20774621f, 0.53718704f,  -0.19888818f, 0.55371630f,  -0.19559640f, 0.54754448f,
+        0.09983939f,  0.10603377f,  0.07915881f,  0.10416336f,  0.08655046f,  0.12505992f,  0.07738422f,  0.09509270f,
+        -0.18571433f, 0.55095005f,  -0.18914750f, 0.52921104f,  -0.19315663f, 0.53234470f,  -0.19601485f, 0.56322992f,
     };
 
     test_case.add_input<float>(input);
@@ -781,9 +794,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.33093750, 0.39181390, 0.14586255, 0.39709702, 0.98086524, 0.03891133, 0.72234219, 0.21966648,
-        0.79986620, 0.97251678, 0.04131543, 0.43971965, 0.50185394, 0.11452501, 0.88111717, 0.76076663,
-        0.31870860, 0.54107893, 0.91756296, 0.58112669, 0.99117357, 0.00256292, 0.58885485, 0.93481058,
+        0.33093750f, 0.39181390f, 0.14586255f, 0.39709702f, 0.98086524f, 0.03891133f, 0.72234219f, 0.21966648f,
+        0.79986620f, 0.97251678f, 0.04131543f, 0.43971965f, 0.50185394f, 0.11452501f, 0.88111717f, 0.76076663f,
+        0.31870860f, 0.54107893f, 0.91756296f, 0.58112669f, 0.99117357f, 0.00256292f, 0.58885485f, 0.93481058f,
     };
     std::vector<int> mask = {
         1,
@@ -796,27 +809,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) {
         1,
     };
     std::vector<float> output = {
-        0.07551830, 0.10666487, -0.19357042, 0.53683108, 0.07551410, 0.10666656, -0.19356072, 0.53684169,
-        0.07552745, 0.10666100, -0.19358172, 0.53682435, 0.07552218, 0.10666317, -0.19358677, 0.53681952,
-        0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713,
-        0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713,
+        0.07551830f, 0.10666487f, -0.19357042f, 0.53683108f, 0.07551410f, 0.10666656f, -0.19356072f, 0.53684169f,
+        0.07552745f, 0.10666100f, -0.19358172f, 0.53682435f, 0.07552218f, 0.10666317f, -0.19358677f, 0.53681952f,
+        0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f,
+        0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f,
     };
     std::vector<float> present = {
-        -0.59174627, -0.27471560, -0.58307797, -0.25967693, -0.60766846, -0.31754097, -0.61241394, -0.26291698,
-        0.09206123,  -0.05307099, 0.12491645,  -0.03853742, 0.08732655,  -0.13050151, 0.04073093,  -0.10792807,
-        -0.60556883, -0.34055573, -0.60474855, -0.28785610, -0.60757709, -0.32514900, -0.58872569, -0.37967020,
-        0.09779400,  -0.13136166, 0.07915612,  -0.10649752, 0.11043755,  -0.15124020, 0.16626491,  -0.11274654,
-        0.07639833,  0.11762549,  0.09370039,  0.09133558,  0.05661478,  0.11096847,  0.04019671,  0.10117501,
-        -0.19371650, 0.52530587,  -0.18429738, 0.55240726,  -0.20283231, 0.53265429,  -0.20036045, 0.50568837,
-        0.06171235,  0.12687264,  0.05802051,  0.10266830,  0.06172965,  0.08967118,  0.09727416,  0.13513327,
-        -0.20576829, 0.53365225,  -0.19832623, 0.52809310,  -0.19971462, 0.55584043,  -0.20121223, 0.57003713,
+        -0.59174627f, -0.27471560f, -0.58307797f, -0.25967693f, -0.60766846f, -0.31754097f, -0.61241394f, -0.26291698f,
+        0.09206123f,  -0.05307099f, 0.12491645f,  -0.03853742f, 0.08732655f,  -0.13050151f, 0.04073093f,  -0.10792807f,
+        -0.60556883f, -0.34055573f, -0.60474855f, -0.28785610f, -0.60757709f, -0.32514900f, -0.58872569f, -0.37967020f,
+        0.09779400f,  -0.13136166f, 0.07915612f,  -0.10649752f, 0.11043755f,  -0.15124020f, 0.16626491f,  -0.11274654f,
+        0.07639833f,  0.11762549f,  0.09370039f,  0.09133558f,  0.05661478f,  0.11096847f,  0.04019671f,  0.10117501f,
+        -0.19371650f, 0.52530587f,  -0.18429738f, 0.55240726f,  -0.20283231f, 0.53265429f,  -0.20036045f, 0.50568837f,
+        0.06171235f,  0.12687264f,  0.05802051f,  0.10266830f,  0.06172965f,  0.08967118f,  0.09727416f,  0.13513327f,
+        -0.20576829f, 0.53365225f,  -0.19832623f, 0.52809310f,  -0.19971462f, 0.55584043f,  -0.20121223f, 0.57003713f,
     };
 
     test_case.add_input<float>(input);
     test_case.add_input<int>(mask);
     test_case.add_expected_output<float>(output);
     test_case.add_expected_output<float>(present);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) {
@@ -827,35 +840,35 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.23565151, 0.58627969, 0.75137484, 0.68586946, 0.62750375, 0.13284931, 0.13347220, 0.36357051,
-        0.56910241, 0.48275986, 0.49440190, 0.45483324, 0.63547862, 0.97893149, 0.40630588, 0.38783622,
-        0.07172249, 0.46385381, 0.99764502, 0.22219376, 0.67735291, 0.40799847, 0.74337566, 0.87263006,
+        0.23565151f, 0.58627969f, 0.75137484f, 0.68586946f, 0.62750375f, 0.13284931f, 0.13347220f, 0.36357051f,
+        0.56910241f, 0.48275986f, 0.49440190f, 0.45483324f, 0.63547862f, 0.97893149f, 0.40630588f, 0.38783622f,
+        0.07172249f, 0.46385381f, 0.99764502f, 0.22219376f, 0.67735291f, 0.40799847f, 0.74337566f, 0.87263006f,
     };
     std::vector<int> mask = {
         1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
     };
     std::vector<float> output = {
-        0.07771622, 0.10724538, -0.19453585, 0.54342043, 0.07459468, 0.10934003, -0.19561143, 0.53936625,
-        0.07927690, 0.10619678, -0.19399606, 0.54543519, 0.07459468, 0.10934003, -0.19561143, 0.53936625,
-        0.05485561, 0.11278091, -0.20117569, 0.52096349, 0.06629646, 0.10195158, -0.19900991, 0.54654449,
-        0.06491723, 0.10292297, -0.19678673, 0.53451663, 0.06549793, 0.11126325, -0.19989857, 0.53717279,
+        0.07771622f, 0.10724538f, -0.19453585f, 0.54342043f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f,
+        0.07927690f, 0.10619678f, -0.19399606f, 0.54543519f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f,
+        0.05485561f, 0.11278091f, -0.20117569f, 0.52096349f, 0.06629646f, 0.10195158f, -0.19900991f, 0.54654449f,
+        0.06491723f, 0.10292297f, -0.19678673f, 0.53451663f, 0.06549793f, 0.11126325f, -0.19989857f, 0.53717279f,
     };
     std::vector<float> present = {
-        -0.59188855, -0.34495637, -0.59508181, -0.25013468, -0.59176934, -0.33229247, -0.59576762, -0.29731843,
-        0.14217430,  -0.10403840, 0.08584045,  -0.06193545, 0.12358667,  -0.08588549, 0.10515238,  -0.08629489,
-        -0.59092808, -0.28260738, -0.60047609, -0.30411413, -0.61210287, -0.28645760, -0.59391296, -0.34649473,
-        0.12789863,  -0.08159252, 0.08122411,  -0.08866425, 0.06395009,  -0.12896645, 0.14855847,  -0.11978809,
-        0.08783118,  0.12152332,  0.07067389,  0.09078297,  0.08385989,  0.13306075,  0.07459468,  0.10934003,
-        -0.19849420, 0.55928540,  -0.18948570, 0.53154731,  -0.19960676, 0.54237455,  -0.19561143, 0.53936625,
-        0.08509844,  0.08314656,  0.06388859,  0.12990499,  0.04582624,  0.09566365,  0.08674107,  0.10823163,
-        -0.18808734, 0.56137776,  -0.20168513, 0.51830697,  -0.20066255, 0.52363914,  -0.19737384, 0.56921995,
+        -0.59188855f, -0.34495637f, -0.59508181f, -0.25013468f, -0.59176934f, -0.33229247f, -0.59576762f, -0.29731843f,
+        0.14217430f,  -0.10403840f, 0.08584045f,  -0.06193545f, 0.12358667f,  -0.08588549f, 0.10515238f,  -0.08629489f,
+        -0.59092808f, -0.28260738f, -0.60047609f, -0.30411413f, -0.61210287f, -0.28645760f, -0.59391296f, -0.34649473f,
+        0.12789863f,  -0.08159252f, 0.08122411f,  -0.08866425f, 0.06395009f,  -0.12896645f, 0.14855847f,  -0.11978809f,
+        0.08783118f,  0.12152332f,  0.07067389f,  0.09078297f,  0.08385989f,  0.13306075f,  0.07459468f,  0.10934003f,
+        -0.19849420f, 0.55928540f,  -0.18948570f, 0.53154731f,  -0.19960676f, 0.54237455f,  -0.19561143f, 0.53936625f,
+        0.08509844f,  0.08314656f,  0.06388859f,  0.12990499f,  0.04582624f,  0.09566365f,  0.08674107f,  0.10823163f,
+        -0.18808734f, 0.56137776f,  -0.20168513f, 0.51830697f,  -0.20066255f, 0.52363914f,  -0.19737384f, 0.56921995f,
     };
 
     test_case.add_input<float>(input);
     test_case.add_input<int>(mask);
     test_case.add_expected_output<float>(output);
     test_case.add_expected_output<float>(present);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) {
@@ -866,9 +879,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.82966000, 0.77751911, 0.08977074, 0.06076468, 0.40659550, 0.19995944, 0.55544919, 0.83971608,
-        0.86254036, 0.30894691, 0.80156928, 0.83092463, 0.14506543, 0.32196075, 0.42209163, 0.24465553,
-        0.93944097, 0.73528159, 0.23347616, 0.60544974, 0.93329269, 0.67604774, 0.56349903, 0.26199624,
+        0.82966000f, 0.77751911f, 0.08977074f, 0.06076468f, 0.40659550f, 0.19995944f, 0.55544919f, 0.83971608f,
+        0.86254036f, 0.30894691f, 0.80156928f, 0.83092463f, 0.14506543f, 0.32196075f, 0.42209163f, 0.24465553f,
+        0.93944097f, 0.73528159f, 0.23347616f, 0.60544974f, 0.93329269f, 0.67604774f, 0.56349903f, 0.26199624f,
     };
     std::vector<int> mask = {
         1,
@@ -891,41 +904,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) {
         1,
     };
     std::vector<float> past = {
-        0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, 0.73566031,
-        0.75168055, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, 0.88056499, 0.08436447,
-        0.54744655, 0.25466520, 0.08500137, 0.19271941, 0.86525357, 0.21717627, 0.97158766, 0.42288730, 0.09890039,
-        0.01148765, 0.97024685, 0.19697112, 0.67671591, 0.67960924, 0.46656516, 0.30850092, 0.73536104, 0.73938161,
-        0.91650903, 0.57628596, 0.51164514, 0.11695814, 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873,
-        0.90233624, 0.84986305, 0.26141909, 0.84528726, 0.81416380, 0.00429944, 0.31476986, 0.00440918, 0.77413058,
-        0.13409913, 0.20965169, 0.61764991, 0.55266041, 0.56107825, 0.42051074, 0.16804738, 0.80362344, 0.52392679,
-        0.27550557, 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.60799408, 0.01458820,
-        0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745,
+        0.92467678f, 0.79873562f, 0.00939191f, 0.34891853f, 0.35521412f, 0.21872006f, 0.89974332f, 0.74132687f,
+        0.73566031f, 0.75168055f, 0.06773245f, 0.85702997f, 0.76256698f, 0.51739877f, 0.91567177f, 0.66617578f,
+        0.88056499f, 0.08436447f, 0.54744655f, 0.25466520f, 0.08500137f, 0.19271941f, 0.86525357f, 0.21717627f,
+        0.97158766f, 0.42288730f, 0.09890039f, 0.01148765f, 0.97024685f, 0.19697112f, 0.67671591f, 0.67960924f,
+        0.46656516f, 0.30850092f, 0.73536104f, 0.73938161f, 0.91650903f, 0.57628596f, 0.51164514f, 0.11695814f,
+        0.79792547f, 0.97192264f, 0.29246020f, 0.41030061f, 0.19014873f, 0.90233624f, 0.84986305f, 0.26141909f,
+        0.84528726f, 0.81416380f, 0.00429944f, 0.31476986f, 0.00440918f, 0.77413058f, 0.13409913f, 0.20965169f,
+        0.61764991f, 0.55266041f, 0.56107825f, 0.42051074f, 0.16804738f, 0.80362344f, 0.52392679f, 0.27550557f,
+        0.66738850f, 0.39348483f, 0.31801429f, 0.30325863f, 0.37068403f, 0.92767614f, 0.60799408f, 0.01458820f,
+        0.24194679f, 0.59596598f, 0.81762302f, 0.38094005f, 0.16618672f, 0.92488551f, 0.84298438f, 0.21752745f,
     };
     std::vector<float> output = {
-        0.26186451, 0.45950246, -0.04001215, 0.47680017, 0.26333901, 0.46158865, -0.04006424, 0.47588652,
-        0.26875457, 0.47031689, -0.03951600, 0.47674999, 0.26851410, 0.46987134, -0.03919901, 0.47629333,
-        0.18083976, 0.16579385, -0.05161894, 0.63075018, 0.18228555, 0.16642828, -0.04873618, 0.63316816,
-        0.18362364, 0.16702136, -0.05045432, 0.63178891, 0.18000112, 0.16541445, -0.05139139, 0.63105792,
+        0.26186451f, 0.45950246f, -0.04001215f, 0.47680017f, 0.26333901f, 0.46158865f, -0.04006424f, 0.47588652f,
+        0.26875457f, 0.47031689f, -0.03951600f, 0.47674999f, 0.26851410f, 0.46987134f, -0.03919901f, 0.47629333f,
+        0.18083976f, 0.16579385f, -0.05161894f, 0.63075018f, 0.18228555f, 0.16642828f, -0.04873618f, 0.63316816f,
+        0.18362364f, 0.16702136f, -0.05045432f, 0.63178891f, 0.18000112f, 0.16541445f, -0.05139139f, 0.63105792f,
     };
     std::vector<float> present = {
-        0.92467678,  0.79873562,  0.00939191,  0.34891853,  0.35521412,  0.21872006,  0.89974332,  0.74132687,
-        0.73566031,  0.75168055,  -0.59527576, -0.23625080, -0.58657664, -0.29827437, -0.59528387, -0.33578828,
-        -0.59068960, -0.34870598, 0.06773245,  0.85702997,  0.76256698,  0.51739877,  0.91567177,  0.66617578,
-        0.88056499,  0.08436447,  0.54744655,  0.25466520,  0.08536442,  -0.06134639, 0.11295843,  -0.04818217,
-        0.14562836,  -0.12305059, 0.15695867,  -0.11161390, 0.08500137,  0.19271941,  0.86525357,  0.21717627,
-        0.97158766,  0.42288730,  0.09890039,  0.01148765,  0.97024685,  0.19697112,  -0.59141791, -0.31600696,
-        -0.58647990, -0.34302223, -0.59306550, -0.36427227, -0.59695083, -0.26431620, 0.67671591,  0.67960924,
-        0.46656516,  0.30850092,  0.73536104,  0.73938161,  0.91650903,  0.57628596,  0.51164514,  0.11695814,
-        0.11255538,  -0.07302766, 0.16620418,  -0.09871224, 0.15272795,  -0.12076923, 0.08827571,  -0.07442430,
-        0.79792547,  0.97192264,  0.29246020,  0.41030061,  0.19014873,  0.90233624,  0.84986305,  0.26141909,
-        0.84528726,  0.81416380,  0.07014155,  0.07749540,  0.08745074,  0.13131952,  0.08430066,  0.09709007,
-        0.09247591,  0.11065811,  0.00429944,  0.31476986,  0.00440918,  0.77413058,  0.13409913,  0.20965169,
-        0.61764991,  0.55266041,  0.56107825,  0.42051074,  -0.18658412, 0.53568852,  -0.19482780, 0.53271860,
-        -0.19558203, 0.57155901,  -0.19633618, 0.57260245,  0.16804738,  0.80362344,  0.52392679,  0.27550557,
-        0.66738850,  0.39348483,  0.31801429,  0.30325863,  0.37068403,  0.92767614,  0.08172131,  0.13249113,
-        0.09947956,  0.10781212,  0.08890627,  0.12280971,  0.06911418,  0.09499176,  0.60799408,  0.01458820,
-        0.24194679,  0.59596598,  0.81762302,  0.38094005,  0.16618672,  0.92488551,  0.84298438,  0.21752745,
-        -0.19839945, 0.53462923,  -0.19349247, 0.57778782,  -0.20039621, 0.56689924,  -0.19190890, 0.53286803,
+        0.92467678f,  0.79873562f,  0.00939191f,  0.34891853f,  0.35521412f,  0.21872006f,  0.89974332f,  0.74132687f,
+        0.73566031f,  0.75168055f,  -0.59527576f, -0.23625080f, -0.58657664f, -0.29827437f, -0.59528387f, -0.33578828f,
+        -0.59068960f, -0.34870598f, 0.06773245f,  0.85702997f,  0.76256698f,  0.51739877f,  0.91567177f,  0.66617578f,
+        0.88056499f,  0.08436447f,  0.54744655f,  0.25466520f,  0.08536442f,  -0.06134639f, 0.11295843f,  -0.04818217f,
+        0.14562836f,  -0.12305059f, 0.15695867f,  -0.11161390f, 0.08500137f,  0.19271941f,  0.86525357f,  0.21717627f,
+        0.97158766f,  0.42288730f,  0.09890039f,  0.01148765f,  0.97024685f,  0.19697112f,  -0.59141791f, -0.31600696f,
+        -0.58647990f, -0.34302223f, -0.59306550f, -0.36427227f, -0.59695083f, -0.26431620f, 0.67671591f,  0.67960924f,
+        0.46656516f,  0.30850092f,  0.73536104f,  0.73938161f,  0.91650903f,  0.57628596f,  0.51164514f,  0.11695814f,
+        0.11255538f,  -0.07302766f, 0.16620418f,  -0.09871224f, 0.15272795f,  -0.12076923f, 0.08827571f,  -0.07442430f,
+        0.79792547f,  0.97192264f,  0.29246020f,  0.41030061f,  0.19014873f,  0.90233624f,  0.84986305f,  0.26141909f,
+        0.84528726f,  0.81416380f,  0.07014155f,  0.07749540f,  0.08745074f,  0.13131952f,  0.08430066f,  0.09709007f,
+        0.09247591f,  0.11065811f,  0.00429944f,  0.31476986f,  0.00440918f,  0.77413058f,  0.13409913f,  0.20965169f,
+        0.61764991f,  0.55266041f,  0.56107825f,  0.42051074f,  -0.18658412f, 0.53568852f,  -0.19482780f, 0.53271860f,
+        -0.19558203f, 0.57155901f,  -0.19633618f, 0.57260245f,  0.16804738f,  0.80362344f,  0.52392679f,  0.27550557f,
+        0.66738850f,  0.39348483f,  0.31801429f,  0.30325863f,  0.37068403f,  0.92767614f,  0.08172131f,  0.13249113f,
+        0.09947956f,  0.10781212f,  0.08890627f,  0.12280971f,  0.06911418f,  0.09499176f,  0.60799408f,  0.01458820f,
+        0.24194679f,  0.59596598f,  0.81762302f,  0.38094005f,  0.16618672f,  0.92488551f,  0.84298438f,  0.21752745f,
+        -0.19839945f, 0.53462923f,  -0.19349247f, 0.57778782f,  -0.20039621f, 0.56689924f,  -0.19190890f, 0.53286803f,
     };
 
     test_case.add_input<float>(input);
@@ -933,7 +947,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) {
     test_case.add_input<float>(past);
     test_case.add_expected_output<float>(output);
     test_case.add_expected_output<float>(present);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) {
@@ -944,9 +958,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.14930259, 0.11199699, 0.81292826, 0.08368169, 0.05704883, 0.41276145, 0.38760167, 0.00146112,
-        0.14275745, 0.54254925, 0.07962929, 0.31023681, 0.09597706, 0.60583973, 0.90233743, 0.33360451,
-        0.18193199, 0.19159532, 0.07869831, 0.86026299, 0.20683478, 0.40150928, 0.93124926, 0.31805834,
+        0.14930259f, 0.11199699f, 0.81292826f, 0.08368169f, 0.05704883f, 0.41276145f, 0.38760167f, 0.00146112f,
+        0.14275745f, 0.54254925f, 0.07962929f, 0.31023681f, 0.09597706f, 0.60583973f, 0.90233743f, 0.33360451f,
+        0.18193199f, 0.19159532f, 0.07869831f, 0.86026299f, 0.20683478f, 0.40150928f, 0.93124926f, 0.31805834f,
     };
     std::vector<int> mask = {
         0,
@@ -959,30 +973,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) {
         0,
     };
     std::vector<float> extra_add = {
-        0.73230380, 0.61824518, 0.19738488, 0.57034588, 0.22331032, 0.53262889, 0.60098642, 0.72943515,
-        0.09009175, 0.81116527, 0.47240964, 0.49679127, 0.41110733, 0.29418564, 0.93818313, 0.64175284,
-        0.06807775, 0.66733366, 0.78848422, 0.48788327, 0.38806340, 0.14002480, 0.72263688, 0.22772972,
-        0.24000823, 0.75820386, 0.64254439, 0.19385594, 0.95595860, 0.59840417, 0.93769604, 0.62474734,
-        0.36690548, 0.76047903, 0.62352085, 0.58574778, 0.64251810, 0.78072041, 0.43344691, 0.75383639,
-        0.73950553, 0.92625278, 0.05066428, 0.08448382, 0.25980917, 0.50312829, 0.97800279, 0.05422170,
-        0.05171391, 0.82828254, 0.42234898, 0.95752198, 0.96325767, 0.97909677, 0.35578200, 0.48091716,
-        0.03637243, 0.91552693, 0.43403026, 0.94275808, 0.51182085, 0.86773109, 0.38459453, 0.87822068,
+        0.73230380f, 0.61824518f, 0.19738488f, 0.57034588f, 0.22331032f, 0.53262889f, 0.60098642f, 0.72943515f,
+        0.09009175f, 0.81116527f, 0.47240964f, 0.49679127f, 0.41110733f, 0.29418564f, 0.93818313f, 0.64175284f,
+        0.06807775f, 0.66733366f, 0.78848422f, 0.48788327f, 0.38806340f, 0.14002480f, 0.72263688f, 0.22772972f,
+        0.24000823f, 0.75820386f, 0.64254439f, 0.19385594f, 0.95595860f, 0.59840417f, 0.93769604f, 0.62474734f,
+        0.36690548f, 0.76047903f, 0.62352085f, 0.58574778f, 0.64251810f, 0.78072041f, 0.43344691f, 0.75383639f,
+        0.73950553f, 0.92625278f, 0.05066428f, 0.08448382f, 0.25980917f, 0.50312829f, 0.97800279f, 0.05422170f,
+        0.05171391f, 0.82828254f, 0.42234898f, 0.95752198f, 0.96325767f, 0.97909677f, 0.35578200f, 0.48091716f,
+        0.03637243f, 0.91552693f, 0.43403026f, 0.94275808f, 0.51182085f, 0.86773109f, 0.38459453f, 0.87822068f,
     };
     std::vector<float> output = {
-        0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448,
-        0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448,
-        0.08714182, 0.12259886, -0.19516067, 0.54010558, 0.08671370, 0.12369543, -0.19658084, 0.54502594,
-        0.08458151, 0.12488046, -0.19519810, 0.53906947, 0.09063499, 0.12088943, -0.19583938, 0.54266596,
+        0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f,
+        0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f,
+        0.08714182f, 0.12259886f, -0.19516067f, 0.54010558f, 0.08671370f, 0.12369543f, -0.19658084f, 0.54502594f,
+        0.08458151f, 0.12488046f, -0.19519810f, 0.53906947f, 0.09063499f, 0.12088943f, -0.19583938f, 0.54266596f,
     };
     std::vector<float> present = {
-        -0.59800303, -0.35666457, -0.59420627, -0.31881350, -0.59887993, -0.27025288, -0.60216135, -0.27772796,
-        0.11659990,  -0.11224300, 0.09693416,  -0.07304113, 0.06023501,  -0.05941332, 0.06434284,  -0.07978789,
-        -0.59005713, -0.37009716, -0.59542215, -0.27914333, -0.57998544, -0.29826957, -0.58625919, -0.28872511,
-        0.15994480,  -0.11288825, 0.07906821,  -0.05991337, 0.14479136,  -0.04415035, 0.13493451,  -0.06541853,
-        0.07513385,  0.14411135,  0.07505661,  0.14532046,  0.06090815,  0.12919067,  0.05788904,  0.12018456,
-        -0.20586906, 0.53715372,  -0.20203318, 0.52092510,  -0.19883196, 0.50295448,  -0.19937295, 0.51055026,
-        0.09417956,  0.12943678,  0.06923291,  0.12574309,  0.10221909,  0.11366953,  0.09235901,  0.09584601,
-        -0.20036517, 0.56818324,  -0.19709785, 0.51547027,  -0.18871340, 0.55736589,  -0.18826833, 0.55965197,
+        -0.59800303f, -0.35666457f, -0.59420627f, -0.31881350f, -0.59887993f, -0.27025288f, -0.60216135f, -0.27772796f,
+        0.11659990f,  -0.11224300f, 0.09693416f,  -0.07304113f, 0.06023501f,  -0.05941332f, 0.06434284f,  -0.07978789f,
+        -0.59005713f, -0.37009716f, -0.59542215f, -0.27914333f, -0.57998544f, -0.29826957f, -0.58625919f, -0.28872511f,
+        0.15994480f,  -0.11288825f, 0.07906821f,  -0.05991337f, 0.14479136f,  -0.04415035f, 0.13493451f,  -0.06541853f,
+        0.07513385f,  0.14411135f,  0.07505661f,  0.14532046f,  0.06090815f,  0.12919067f,  0.05788904f,  0.12018456f,
+        -0.20586906f, 0.53715372f,  -0.20203318f, 0.52092510f,  -0.19883196f, 0.50295448f,  -0.19937295f, 0.51055026f,
+        0.09417956f,  0.12943678f,  0.06923291f,  0.12574309f,  0.10221909f,  0.11366953f,  0.09235901f,  0.09584601f,
+        -0.20036517f, 0.56818324f,  -0.19709785f, 0.51547027f,  -0.18871340f, 0.55736589f,  -0.18826833f, 0.55965197f,
     };
 
     test_case.add_input<float>(input);
@@ -990,7 +1004,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) {
     test_case.add_input<float>(extra_add);
     test_case.add_expected_output<float>(output);
     test_case.add_expected_output<float>(present);
-    test_case.run_with_tolerance_as_fp(1e-7);
+    test_case.run_with_tolerance_as_fp(1e-7f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) {
@@ -1001,29 +1015,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> input = {
-        0.42226878, 0.50984067, 0.80440795, 0.68040705, 0.93614250, 0.45104721, 0.71767306, 0.48596525,
-        0.70076728, 0.04500086, 0.28930107, 0.77435863, 0.19392140, 0.90290719, 0.91955870, 0.58811885,
-        0.76795286, 0.62884814, 0.23377730, 0.49212688, 0.87256873, 0.11944817, 0.57715887, 0.91886938,
+        0.42226878f, 0.50984067f, 0.80440795f, 0.68040705f, 0.93614250f, 0.45104721f, 0.71767306f, 0.48596525f,
+        0.70076728f, 0.04500086f, 0.28930107f, 0.77435863f, 0.19392140f, 0.90290719f, 0.91955870f, 0.58811885f,
+        0.76795286f, 0.62884814f, 0.23377730f, 0.49212688f, 0.87256873f, 0.11944817f, 0.57715887f, 0.91886938f,
     };
     std::vector<float> weights = {
-        0.99377930, 0.22733542, 0.43217131, 0.60717988, 0.97224706, 0.70020503, 0.92439449, 0.41512674, 0.47728160,
-        0.40306625, 0.72619593, 0.37954643, 0.36950976, 0.84305370, 0.61671126, 0.22251014, 0.73839295, 0.73471880,
-        0.37428924, 0.80240524, 0.23120961, 0.06072779, 0.92840081, 0.71558088, 0.08719950, 0.51666921, 0.53768843,
-        0.48113129, 0.46389169, 0.01036468, 0.37341005, 0.67195475, 0.53599644, 0.41795707, 0.58081782, 0.97939289,
+        0.99377930f, 0.22733542f, 0.43217131f, 0.60717988f, 0.97224706f, 0.70020503f, 0.92439449f, 0.41512674f,
+        0.47728160f, 0.40306625f, 0.72619593f, 0.37954643f, 0.36950976f, 0.84305370f, 0.61671126f, 0.22251014f,
+        0.73839295f, 0.73471880f, 0.37428924f, 0.80240524f, 0.23120961f, 0.06072779f, 0.92840081f, 0.71558088f,
+        0.08719950f, 0.51666921f, 0.53768843f, 0.48113129f, 0.46389169f, 0.01036468f, 0.37341005f, 0.67195475f,
+        0.53599644f, 0.41795707f, 0.58081782f, 0.97939289f,
     };
     std::vector<float> bias = {
-        0.77122736,
-        0.75600564,
-        0.86177206,
-        0.69982684,
-        0.74719858,
-        0.78054035,
-        0.80007398,
-        0.74902135,
-        0.81258053,
-        0.01575289,
-        0.08463049,
-        0.39671996,
+        0.77122736f,
+        0.75600564f,
+        0.86177206f,
+        0.69982684f,
+        0.74719858f,
+        0.78054035f,
+        0.80007398f,
+        0.74902135f,
+        0.81258053f,
+        0.01575289f,
+        0.08463049f,
+        0.39671996f,
     };
     std::vector<int> mask = {
         0,
@@ -1046,39 +1061,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) {
         0,
     };
     std::vector<float> past = {
-        0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437,
-        0.91846281, 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829,
-        0.89891797, 0.27753425, 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194,
-        0.62769043, 0.61990744, 0.59077013, 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718,
-        0.80831683, 0.29390740, 0.29051417, 0.51964313, 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689,
-        0.98571628, 0.07566493, 0.37537411, 0.42080343, 0.21715857, 0.57869565, 0.55962265, 0.82500041, 0.60776925,
-        0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, 0.98562658, 0.64355153, 0.69856495, 0.30377558,
-        0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, 0.94460547, 0.90907097, 0.22525074,
-        0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, 0.22606593,
+        0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f,
+        0.84574437f, 0.91846281f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f,
+        0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f,
+        0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 0.11058200f, 0.97370809f,
+        0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f,
+        0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f,
+        0.42080343f, 0.21715857f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f,
+        0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f,
+        0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 0.90907097f, 0.22525074f,
+        0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f,
     };
     std::vector<float> output = {
-        1.47439122, 0.50951630, 1.17974961, 1.58501005, 1.49403512, 0.51560062, 1.18972027, 1.59668207,
-        1.48384988, 0.51248586, 1.18596375, 1.59219086, 1.44181466, 0.50219649, 1.15537691, 1.55348074,
-        0.83429223, 0.59521818, 0.87688094, 0.13611843, 0.82936716, 0.61004817, 0.87633312, 0.13887596,
-        0.83155584, 0.59382534, 0.87496555, 0.14041223, 0.83309680, 0.58982348, 0.87517864, 0.13930768,
+        1.47439122f, 0.50951630f, 1.17974961f, 1.58501005f, 1.49403512f, 0.51560062f, 1.18972027f, 1.59668207f,
+        1.48384988f, 0.51248586f, 1.18596375f, 1.59219086f, 1.44181466f, 0.50219649f, 1.15537691f, 1.55348074f,
+        0.83429223f, 0.59521818f, 0.87688094f, 0.13611843f, 0.82936716f, 0.61004817f, 0.87633312f, 0.13887596f,
+        0.83155584f, 0.59382534f, 0.87496555f, 0.14041223f, 0.83309680f, 0.58982348f, 0.87517864f, 0.13930768f,
     };
     std::vector<float> present = {
-        0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437,
-        0.91846281, 1.90736914, 1.45914197, 2.30920029, 1.94944119, 2.12886763, 1.64736962, 1.36378694, 1.03263116,
-        0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, 0.89891797,
-        0.27753425, 1.68161881, 1.87394094, 1.94785213, 2.08572555, 1.90705216, 1.90777159, 1.23910809, 1.52017307,
-        0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, 0.62769043, 0.61990744,
-        0.59077013, 2.02901411, 1.58923888, 2.17776394, 1.76309133, 1.74264824, 1.31485105, 1.71575761, 1.29775190,
-        0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, 0.80831683, 0.29390740, 0.29051417,
-        0.51964313, 1.66065478, 2.17192268, 1.86598253, 2.03193212, 1.52620018, 1.82728052, 1.46963060, 1.87916136,
-        0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, 0.98571628, 0.07566493, 0.37537411, 0.42080343,
-        0.21715857, 1.56316149, 0.55312467, 1.59553123, 0.53537023, 1.64308119, 0.62742490, 1.31600118, 0.37510848,
-        0.57869565, 0.55962265, 0.82500041, 0.60776925, 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676,
-        0.98562658, 1.33183134, 1.70965421, 1.70983100, 1.76660407, 1.46399045, 1.70318413, 0.83565855, 1.37921953,
-        0.64355153, 0.69856495, 0.30377558, 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584,
-        0.94460547, 1.60677671, 0.53308368, 1.60789728, 0.56227136, 1.50563633, 0.50456268, 1.49554634, 0.48299593,
-        0.90907097, 0.22525074, 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738,
-        0.22606593, 1.59781134, 2.01703453, 1.58993423, 1.78536010, 1.21809304, 1.69219351, 1.24090374, 1.75499403,
+        0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f,
+        0.84574437f, 0.91846281f, 1.90736914f, 1.45914197f, 2.30920029f, 1.94944119f, 2.12886763f, 1.64736962f,
+        1.36378694f, 1.03263116f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f,
+        0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 1.68161881f, 1.87394094f, 1.94785213f, 2.08572555f,
+        1.90705216f, 1.90777159f, 1.23910809f, 1.52017307f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f,
+        0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 2.02901411f, 1.58923888f,
+        2.17776394f, 1.76309133f, 1.74264824f, 1.31485105f, 1.71575761f, 1.29775190f, 0.11058200f, 0.97370809f,
+        0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f,
+        1.66065478f, 2.17192268f, 1.86598253f, 2.03193212f, 1.52620018f, 1.82728052f, 1.46963060f, 1.87916136f,
+        0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f,
+        0.42080343f, 0.21715857f, 1.56316149f, 0.55312467f, 1.59553123f, 0.53537023f, 1.64308119f, 0.62742490f,
+        1.31600118f, 0.37510848f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f,
+        0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 1.33183134f, 1.70965421f, 1.70983100f, 1.76660407f,
+        1.46399045f, 1.70318413f, 0.83565855f, 1.37921953f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f,
+        0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 1.60677671f, 0.53308368f,
+        1.60789728f, 0.56227136f, 1.50563633f, 0.50456268f, 1.49554634f, 0.48299593f, 0.90907097f, 0.22525074f,
+        0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f,
+        1.59781134f, 2.01703453f, 1.58993423f, 1.78536010f, 1.21809304f, 1.69219351f, 1.24090374f, 1.75499403f,
     };
 
     test_case.add_input<float>(Shape{2, 4, 3}, input);
@@ -1088,7 +1106,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) {
     test_case.add_input<float>(Shape{2, 2, 2, 5, 2}, past);
     test_case.add_expected_output<float>(Shape{2, 4, 4}, output);
     test_case.add_expected_output<float>(Shape{2, 2, 2, 9, 2}, present);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) {
@@ -1098,67 +1116,68 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> inputA = {
-        0.760289272,
-        0.155913759,
-        0.781790674,
-        -0.916164881,
-        -0.599392663,
-        0.264654594,
-        0.793851873,
-        0.177088557,
-        0.082737454,
-        0.070692121,
-        -0.811413035,
-        -0.098108588,
-        0.650090827,
-        -0.987659751,
-        -0.815909968,
-        -0.375566031,
-        -0.192777789,
-        -0.843511765,
+        0.760289272f,
+        0.155913759f,
+        0.781790674f,
+        -0.916164881f,
+        -0.599392663f,
+        0.264654594f,
+        0.793851873f,
+        0.177088557f,
+        0.082737454f,
+        0.070692121f,
+        -0.811413035f,
+        -0.098108588f,
+        0.650090827f,
+        -0.987659751f,
+        -0.815909968f,
+        -0.375566031f,
+        -0.192777789f,
+        -0.843511765f,
     };
 
     std::vector<float> inputB = {
-        -0.599338344, -0.893724541, -0.362130441, -0.510642812, -0.943908814, -0.247790266, -0.732624930, 0.660286910,
-        -0.264866660, -0.907203793, 0.339617010,  -0.322529173, 0.714601048,  0.581729832,  -0.609115490, -0.369882312,
-        -0.462432785, -0.554824440, -0.833489997, -0.899945507, -0.088337136, -0.253637339, -0.443307744, -0.677004897,
+        -0.599338344f, -0.893724541f, -0.362130441f, -0.510642812f, -0.943908814f, -0.247790266f,
+        -0.732624930f, 0.660286910f,  -0.264866660f, -0.907203793f, 0.339617010f,  -0.322529173f,
+        0.714601048f,  0.581729832f,  -0.609115490f, -0.369882312f, -0.462432785f, -0.554824440f,
+        -0.833489997f, -0.899945507f, -0.088337136f, -0.253637339f, -0.443307744f, -0.677004897f,
     };
 
     std::vector<float> inputC = {
-        -0.540039918,
-        -0.235745675,
-        -0.337291175,
-        -0.702340580,
-        0.532629731,
-        -0.794515569,
-        -0.532012999,
-        0.372558416,
-        0.582367524,
-        -0.483044018,
-        0.656635884,
-        -0.655929499,
+        -0.540039918f,
+        -0.235745675f,
+        -0.337291175f,
+        -0.702340580f,
+        0.532629731f,
+        -0.794515569f,
+        -0.532012999f,
+        0.372558416f,
+        0.582367524f,
+        -0.483044018f,
+        0.656635884f,
+        -0.655929499f,
     };
 
     std::vector<float> output = {
-        -8.75421E-05,
-        -9.65321E-05,
-        0.239491309,
-        -2.70329E-05,
-        0.151090653,
-        -5.53371E-05,
-        -1.22197E-05,
-        0.413963711,
-        0.618195780,
-        0.011654445,
-        0.815541101,
-        -2.46706E-05,
+        -8.75421E-05f,
+        -9.65321E-05f,
+        0.239491309f,
+        -2.70329E-05f,
+        0.151090653f,
+        -5.53371E-05f,
+        -1.22197E-05f,
+        0.413963711f,
+        0.618195780f,
+        0.011654445f,
+        0.815541101f,
+        -2.46706E-05f,
     };
 
     test_case.add_input<float>(Shape{3, 6}, inputA);
     test_case.add_input<float>(Shape{6, 4}, inputB);
     test_case.add_input<float>(Shape{3, 4}, inputC);
     test_case.add_expected_output<float>(Shape{3, 4}, output);
-    test_case.run_with_tolerance_as_fp(1e-6);
+    test_case.run_with_tolerance_as_fp(1e-6f);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_com_microsoft_fused_conv_hard_sigmoid) {
diff --git a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp
index 1c9a1054bf674d..a6c151d0ee526d 100644
--- a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp
@@ -56,7 +56,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_scatter_elements) {
                                                                         SERIALIZED_ZOO,
                                                                         "onnx/scatter_elements_opset11.onnx"));
 
-    test_constant_folding<float>(fn, {1.0, 1.1, 3.0, 2.1, 5.0}, Shape{1, 5});
+    test_constant_folding<float>(fn, {1.0f, 1.1f, 3.0f, 2.1f, 5.0f}, Shape{1, 5});
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_scalar) {
diff --git a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp
index 0c1d0648d8abee..3cf38e223d07ed 100644
--- a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp
@@ -537,7 +537,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_same_inputs) {
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> x(40, 2);
     std::vector<float> y(40);
-    std::iota(y.begin(), y.end(), -20);
+    std::iota(y.begin(), y.end(), -20.f);
 
     // condition
     test_case.add_input<bool>({true});
@@ -577,7 +577,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_different_inputs) {
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> x(40, 2);
     std::vector<float> y(40);
-    std::iota(y.begin(), y.end(), -20);
+    std::iota(y.begin(), y.end(), -20.f);
 
     // condition
     test_case.add_input<bool>({true});
@@ -649,7 +649,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) {
     // expected value == x * y
     std::vector<float> x(40, 2);
     std::vector<float> y(40);
-    std::iota(y.begin(), y.end(), -20);
+    std::iota(y.begin(), y.end(), -20.f);
     std::vector<float> expected;
     std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float {
         return i * j;
@@ -663,7 +663,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) {
     // case when condition == true and all(x < y)
     // expected value == x + y
     std::iota(x.begin(), x.end(), -static_cast<float>(x.size()));
-    std::iota(y.begin(), y.end(), 1);
+    std::iota(y.begin(), y.end(), 1.f);
     std::transform(x.begin(), x.end(), y.begin(), expected.begin(), [](float i, float j) -> float {
         return i + j;
     });
@@ -703,13 +703,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_multiple_outputs) {
 
     // case when condition == true so split is along axis 0
     std::vector<float> x(36);
-    std::iota(x.begin(), x.end(), 0);
+    std::iota(x.begin(), x.end(), 0.f);
     std::vector<float> expected1(12);
-    std::iota(expected1.begin(), expected1.end(), 0);
+    std::iota(expected1.begin(), expected1.end(), 0.f);
     std::vector<float> expected2(12);
-    std::iota(expected2.begin(), expected2.end(), 12);
+    std::iota(expected2.begin(), expected2.end(), 12.f);
     std::vector<float> expected3(12);
-    std::iota(expected3.begin(), expected3.end(), 24);
+    std::iota(expected3.begin(), expected3.end(), 24.f);
     test_case.add_input<bool>({true});  // condition
     test_case.add_input<float>(x);
     test_case.add_expected_output<float>(expected1);
@@ -768,9 +768,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_with_only_indentity_in_else_branch) {
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> x(shape_size(Shape{1, 5, 2, 2}));
-    std::iota(x.begin(), x.end(), 0);
-    std::vector<float> expected{1.333333, 3,  4.666666, 6.333333, 8,  10, 12,       14, 16,        18,
-                                20,       22, 24,       26,       28, 30, 25.33333, 27, 28.666667, 30.33333};
+    std::iota(x.begin(), x.end(), 0.f);
+    std::vector<float> expected{1.333333f, 3.f,  4.666666f, 6.333333f, 8.f,        10.f,     12.f,
+                                14.f,      16.f, 18.f,      20.f,      22.f,       24.f,     26.f,
+                                28.f,      30.f, 25.33333f, 27.f,      28.666667f, 30.33333f};
     test_case.add_input<float>(x);
     test_case.add_expected_output<float>(expected);
     test_case.run();
@@ -820,7 +821,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_dynamic_inputs) {
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> x(40, 2);
     std::vector<float> y(40);
-    std::iota(y.begin(), y.end(), -20);
+    std::iota(y.begin(), y.end(), -20.f);
     std::vector<float> expected;
     std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float {
         return i + j;
diff --git a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp
index 2afbdef3999159..dfa888bb943ce9 100644
--- a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp
@@ -220,7 +220,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_3_2) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(Shape{3, 2}, {-1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f});
-    test_case.add_expected_output<float>(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632, 0.0f, 1.1947632f});
+    test_case.add_expected_output<float>(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632f, 0.0f, 1.1947632f});
 
     test_case.run();
 }
@@ -713,7 +713,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) {
     const auto elems_in_tensor = shape_size(shape);
 
     std::vector<float> input_values(elems_in_tensor);
-    std::iota(std::begin(input_values), std::end(input_values), 1);
+    std::iota(std::begin(input_values), std::end(input_values), 1.f);
 
     test_case.add_input<float>(shape, input_values);
 
@@ -729,9 +729,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) {
 
 namespace {
 Shape get_flattened_shape(const Shape& in_shape, size_t axis) {
-    size_t first_dim_size =
-        std::accumulate(begin(in_shape), next(begin(in_shape), axis), 1UL, std::multiplies<size_t>());
-    size_t last_dim_size = std::accumulate(next(begin(in_shape), axis), end(in_shape), 1UL, std::multiplies<size_t>());
+    size_t first_dim_size = std::accumulate(begin(in_shape),
+                                            next(begin(in_shape), axis),
+                                            static_cast<size_t>(1),
+                                            std::multiplies<size_t>());
+    size_t last_dim_size =
+        std::accumulate(next(begin(in_shape), axis), end(in_shape), static_cast<size_t>(1), std::multiplies<size_t>());
     return Shape{first_dim_size, last_dim_size};
 }
 }  // namespace
@@ -751,7 +754,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis_0) {
         const auto elems_in_tensor = shape_size(shape);
 
         std::vector<float> input_values(elems_in_tensor);
-        std::iota(input_values.begin(), input_values.end(), 1);
+        std::iota(input_values.begin(), input_values.end(), 1.f);
 
         test_case.add_input<float>(shape, input_values);
 
@@ -778,7 +781,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis) {
         const auto elems_in_tensor = shape_size(shape);
 
         std::vector<float> input_values(elems_in_tensor);
-        std::iota(input_values.begin(), input_values.end(), 1);
+        std::iota(input_values.begin(), input_values.end(), 1.f);
 
         test_case.add_input<float>(shape, input_values);
 
@@ -805,7 +808,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis) {
         const auto elems_in_tensor = shape_size(shape);
 
         std::vector<float> input_values(elems_in_tensor);
-        std::iota(input_values.begin(), input_values.end(), 1);
+        std::iota(input_values.begin(), input_values.end(), 1.f);
 
         test_case.add_input<float>(shape, input_values);
 
@@ -907,7 +910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input) {
 
     const Shape input_shape{3, 4, 1};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 0});
     test_case.add_input<int64_t>({2, 3});
@@ -926,7 +929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_neg_axes) {
 
     const Shape input_shape{3, 4, 1};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 0});
     test_case.add_input<int64_t>({2, 3});
@@ -945,7 +948,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_12_axes) {
 
     const Shape input_shape{4, 3, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 0});
     test_case.add_input<int64_t>({2, 1});
@@ -963,7 +966,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_20_axes) {
 
     const Shape input_shape{4, 3, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_shape, input_values);
     test_case.add_input<int64_t>({0, 1});
     test_case.add_input<int64_t>({1, 3});
@@ -982,7 +985,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes) {
 
     const Shape input_shape{2, 2, 2, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 0});
     test_case.add_input<int64_t>({1, 1});
@@ -1000,7 +1003,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_0231_axes_ends_ma
 
     const Shape input_shape{2, 2, 2, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 1, 1, 0});
     test_case.add_input<int64_t>({std::numeric_limits<int64_t>::max(),
@@ -1021,7 +1024,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_2103_axes_ends_ma
 
     const Shape input_shape{2, 2, 2, 5};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({1, 0, 0, 1});
     test_case.add_input<int64_t>({2,
@@ -1043,7 +1046,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes_21_steps)
 
     const Shape input_shape{2, 2, 6, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({0, 1});
     test_case.add_input<int64_t>({5, 2});
@@ -1060,7 +1063,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes) {
 
     const Shape input_shape{4, 3, 2};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_values);
     test_case.add_input<int64_t>({1, 1, 1});
     test_case.add_input<int64_t>({2, 2, 2});
@@ -1116,34 +1119,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_axis_2.onnx"));
 
     const std::vector<float> input = {
-        2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118, 0.21004745,  1.38337255,
-        1.19030397,  2.0940445,   -0.03551657, -0.78686039, 1.992782,    0.04300319,  -0.29230777,
-        -0.56797112, -1.26732165, -0.61935399, 0.57670432,  0.92844898,  2.82469233,
+        2.75793882f,  -0.50841322f, 0.82013929f,  -0.62409912f, -0.96136118f, 0.21004745f,  1.38337255f,
+        1.19030397f,  2.0940445f,   -0.03551657f, -0.78686039f, 1.992782f,    0.04300319f,  -0.29230777f,
+        -0.56797112f, -1.26732165f, -0.61935399f, 0.57670432f,  0.92844898f,  2.82469233f,
 
-        0.98721677,  -0.05100663, -1.21178917, -0.17530157, 1.40051805,  -0.13259761, -1.14313018,
-        0.2673723,   -0.87996154, 1.29053106,  1.55,        0.8396538,   1.20729817,  0.23727845,
-        -0.89113606, -1.70909842, 0.26460363,  -0.70566808, 2.383518,    1.07024615,
+        0.98721677f,  -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f,  -0.13259761f, -1.14313018f,
+        0.2673723f,   -0.87996154f, 1.29053106f,  1.55f,        0.8396538f,   1.20729817f,  0.23727845f,
+        -0.89113606f, -1.70909842f, 0.26460363f,  -0.70566808f, 2.383518f,    1.07024615f,
 
-        -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,  0.75425957,  -2.43721014,
-        -1.24478184, 2.65316853,  1.19509542,  -0.95523998, 0.5149006,   -0.01151649, 0.68327026,
-        -0.4589638,  -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
+        -1.21722605f, 0.82919357f,  0.55765697f,  0.12657686f,  0.63432172f,  0.75425957f,  -2.43721014f,
+        -1.24478184f, 2.65316853f,  1.19509542f,  -0.95523998f, 0.5149006f,   -0.01151649f, 0.68327026f,
+        -0.4589638f,  -0.46554745f, 0.21055324f,  0.39266729f,  2.05098086f,  1.83207919f};
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input);
 
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.80619486, 0.03075257, 0.1161086,  0.027393,   0.01955098, 0.07012682, 0.22670066,
-         0.18689779, 0.4614171,  0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265,
-         0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552,
+        {0.80619486f, 0.03075257f, 0.1161086f,  0.027393f,   0.01955098f, 0.07012682f, 0.22670066f,
+         0.18689779f, 0.4614171f,  0.05485763f, 0.04486172f, 0.72286838f, 0.10286818f, 0.07356265f,
+         0.05583908f, 0.01280724f, 0.02448298f, 0.08096658f, 0.11509768f, 0.76664552f,
 
-         0.30399806, 0.1076406,  0.03371745, 0.0950595,  0.4595844,  0.13369873, 0.04866969,
-         0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588,
-         0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432,
+         0.30399806f, 0.1076406f,  0.03371745f, 0.0950595f,  0.4595844f,  0.13369873f, 0.04866969f,
+         0.19944906f, 0.06332151f, 0.55486101f, 0.39101105f, 0.19217177f, 0.27755913f, 0.10521588f,
+         0.03404216f, 0.01150354f, 0.08279411f, 0.03137732f, 0.68902071f, 0.18530432f,
 
-         0.0402528,  0.31156222, 0.23747503, 0.1543129,  0.25639705, 0.10627912, 0.00436928,
-         0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175,  0.17468555, 0.34994439,
-         0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905});
+         0.0402528f,  0.31156222f, 0.23747503f, 0.1543129f,  0.25639705f, 0.10627912f, 0.00436928f,
+         0.01439711f, 0.70979614f, 0.16515835f, 0.06798343f, 0.2957175f,  0.17468555f, 0.34994439f,
+         0.11166912f, 0.03615172f, 0.07108136f, 0.08527994f, 0.44775794f, 0.35972905f});
 
     test_case.run(3);
 }
@@ -1184,7 +1187,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape) {
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
 
@@ -1207,7 +1210,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2) {
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
-    std::iota(std::begin(data), std::end(data), 1);
+    std::iota(std::begin(data), std::end(data), 1.f);
 
     auto test_case = test::TestCase(function, s_device);
 
@@ -1275,7 +1278,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_3d_input_21_axes_ends_max)
 
     const Shape input_shape{1, 2, 3, 4};
     std::vector<float> input_values(shape_size(input_shape));
-    std::iota(input_values.begin(), input_values.end(), 0);
+    std::iota(input_values.begin(), input_values.end(), 0.f);
     test_case.add_input<float>(input_shape, input_values);
     test_case.add_expected_output<float>(Shape{1, 1, 3, 3}, {13, 14, 15, 17, 18, 19, 21, 22, 23});
     test_case.run();
@@ -1315,7 +1318,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_dyn_rank_without_default_attrs)
 
     Shape input_shape{1, 1, 4, 4};
     std::vector<float> input(shape_size(input_shape));
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
     test_case.add_input<float>(input_shape, input);
     test_case.add_expected_output<float>(Shape{1, 1, 3, 3}, {5, 6, 7, 9, 10, 11, 13, 14, 15});
     test_case.run();
@@ -1327,7 +1330,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_dynamic_input) {
                                                                         "onnx/dynamic_shapes/depth_to_space.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{0.f,  8.f,  1.f,  9.f,  16.f, 24.f, 17.f, 25.f, 2.f,  10.f, 3.f,
                                        11.f, 18.f, 26.f, 19.f, 27.f, 4.f,  12.f, 5.f,  13.f, 20.f, 28.f,
@@ -1345,7 +1348,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_dynamic_input) {
                                                                         "onnx/dynamic_shapes/space_to_depth.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{
         0.f, 2.f, 8.f,  10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f,  11.f, 17.f, 19.f, 25.f, 27.f,
diff --git a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp
index fe85ef213db8ae..909eb4bb7bf2fb 100644
--- a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp
@@ -57,14 +57,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_prior_box) {
     std::vector<float> A(3 * 2 * 2);
     std::vector<float> B(3 * 6 * 6);
     std::vector<float> output = {
-        -2.3200002,  -2.3200002,  3.6533334, 3.6533334, -3.7053659, -3.7053659, 5.0386992, 5.0386992,
-        -0.98666668, -2.3200002,  4.9866667, 3.6533334, -2.3720326, -3.7053659, 6.3720322, 5.0386992,
-        -2.3200002,  -0.98666668, 3.6533334, 4.9866667, -3.7053659, -2.3720326, 5.0386992, 6.3720322,
-        -0.98666668, -0.98666668, 4.9866667, 4.9866667, -2.3720326, -2.3720326, 6.3720322, 6.3720322,
-        0.1,         0.1,         0.2,       0.2,       0.1,        0.1,        0.2,       0.2,
-        0.1,         0.1,         0.2,       0.2,       0.1,        0.1,        0.2,       0.2,
-        0.1,         0.1,         0.2,       0.2,       0.1,        0.1,        0.2,       0.2,
-        0.1,         0.1,         0.2,       0.2,       0.1,        0.1,        0.2,       0.2,
+        -2.3200002f,  -2.3200002f,  3.6533334f, 3.6533334f, -3.7053659f, -3.7053659f, 5.0386992f, 5.0386992f,
+        -0.98666668f, -2.3200002f,  4.9866667f, 3.6533334f, -2.3720326f, -3.7053659f, 6.3720322f, 5.0386992f,
+        -2.3200002f,  -0.98666668f, 3.6533334f, 4.9866667f, -3.7053659f, -2.3720326f, 5.0386992f, 6.3720322f,
+        -0.98666668f, -0.98666668f, 4.9866667f, 4.9866667f, -2.3720326f, -2.3720326f, 6.3720322f, 6.3720322f,
+        0.1f,         0.1f,         0.2f,       0.2f,       0.1f,        0.1f,        0.2f,       0.2f,
+        0.1f,         0.1f,         0.2f,       0.2f,       0.1f,        0.1f,        0.2f,       0.2f,
+        0.1f,         0.1f,         0.2f,       0.2f,       0.1f,        0.1f,        0.2f,       0.2f,
+        0.1f,         0.1f,         0.2f,       0.2f,       0.1f,        0.1f,        0.2f,       0.2f,
     };
     test_case.add_input<float>(A);
     test_case.add_input<float>(B);
@@ -78,11 +78,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered) {
                                                                         "onnx/priorbox_clustered.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    std::vector<float> A{15.0};
-    std::vector<float> B{10.0};
+    std::vector<float> A{15.0f};
+    std::vector<float> B{10.0f};
     std::vector<float> output = {
-        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-        0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2,
+        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+        0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f,
     };
     test_case.add_input<float>(A);
     test_case.add_input<float>(B);
@@ -101,22 +101,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_most_attrs_default) {
     std::iota(std::begin(A), std::end(A), 0.0f);
     std::vector<float> B(1 * 1 * 3 * 3);
     std::iota(std::begin(B), std::end(B), 0.0f);
-    std::vector<float> output = {-0.1666666716337203979,
-                                 -0.1666666716337203979,
-                                 0.1666666716337203979,
-                                 0.1666666716337203979,
-                                 -0.1666666716337203979,
-                                 0.3333333432674407959,
-                                 0.1666666716337203979,
-                                 0.6666666865348815918,
-                                 0.1,
-                                 0.1,
-                                 0.2,
-                                 0.2,
-                                 0.1,
-                                 0.1,
-                                 0.2,
-                                 0.2};
+    std::vector<float> output = {-0.1666666716337203979f,
+                                 -0.1666666716337203979f,
+                                 0.1666666716337203979f,
+                                 0.1666666716337203979f,
+                                 -0.1666666716337203979f,
+                                 0.3333333432674407959f,
+                                 0.1666666716337203979f,
+                                 0.6666666865348815918f,
+                                 0.1f,
+                                 0.1f,
+                                 0.2f,
+                                 0.2f,
+                                 0.1f,
+                                 0.1f,
+                                 0.2f,
+                                 0.2f};
     test_case.add_input<float>(A);
     test_case.add_input<float>(B);
     test_case.add_expected_output<float>(Shape{1, 2, 8}, output);
@@ -170,11 +170,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output) {
     std::vector<float> logits = gen_vector(12, -2, 2);
     std::vector<float> class_preds = gen_vector(9, 0, 1);
     std::vector<float> proposals = gen_vector(12 * 2, 0, 1);
-    std::vector<float> output = {0, 1, 0.777778, 0.279849,   0.283779,   0.562743,   0.695387,
-                                 0, 1, 0.444444, 0.12963,    0.176075,   0.212963,   0.284573,
-                                 0, 2, 0.888889, 0.279849,   0.283779,   0.562743,   0.695387,
-                                 0, 2, 0.555556, 0.12963,    0.176075,   0.212963,   0.284573,
-                                 0, 2, 0.222222, -0.0608094, -0.0142007, -0.0225239, 0.0304044};
+    std::vector<float> output = {0, 1, 0.777778f, 0.279849f,   0.283779f,   0.562743f,   0.695387f,
+                                 0, 1, 0.444444f, 0.12963f,    0.176075f,   0.212963f,   0.284573f,
+                                 0, 2, 0.888889f, 0.279849f,   0.283779f,   0.562743f,   0.695387f,
+                                 0, 2, 0.555556f, 0.12963f,    0.176075f,   0.212963f,   0.284573f,
+                                 0, 2, 0.222222f, -0.0608094f, -0.0142007f, -0.0225239f, 0.0304044f};
     test_case.add_input<float>(logits);
     test_case.add_input<float>(class_preds);
     test_case.add_input<float>(proposals);
@@ -188,18 +188,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm.onnx"));
     auto test_case = test::TestCase(function, s_device);
     Shape shape{2, 8, 2, 2};
-    int size = shape_size(shape);
+    const auto size = shape_size(shape);
     std::vector<float> data(size);
-    std::iota(data.begin(), data.end(), 0);
+    std::iota(data.begin(), data.end(), 0.f);
     std::vector<float> output = {
-        -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307,  4.1821785, 5.05505,
-        -1.5825753,  -0.27326822, 1.0360391, 2.3453465,  4.8728714, 6.618614,  8.364357,  10.1101,
-        -2.6376252,  -0.45544672, 1.726732,  3.9089108,  7.309307,  9.927921,  12.546536, 15.165151,
-        -3.6926756,  -0.6376257,  2.4174247, 5.472475,   9.745743,  13.237228, 16.728714, 20.2202,
-        -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307,  4.1821785, 5.05505,
-        -1.5825753,  -0.27326822, 1.0360391, 2.3453465,  4.8728714, 6.618614,  8.364357,  10.1101,
-        -2.6376252,  -0.45544672, 1.726732,  3.9089108,  7.309307,  9.927921,  12.546536, 15.165151,
-        -3.6926756,  -0.6376257,  2.4174247, 5.472475,   9.745743,  13.237228, 16.728714, 20.2202,
+        -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f,  4.1821785f, 5.05505f,
+        -1.5825753f,  -0.27326822f, 1.0360391f, 2.3453465f,  4.8728714f, 6.618614f,  8.364357f,  10.1101f,
+        -2.6376252f,  -0.45544672f, 1.726732f,  3.9089108f,  7.309307f,  9.927921f,  12.546536f, 15.165151f,
+        -3.6926756f,  -0.6376257f,  2.4174247f, 5.472475f,   9.745743f,  13.237228f, 16.728714f, 20.2202f,
+        -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f,  4.1821785f, 5.05505f,
+        -1.5825753f,  -0.27326822f, 1.0360391f, 2.3453465f,  4.8728714f, 6.618614f,  8.364357f,  10.1101f,
+        -2.6376252f,  -0.45544672f, 1.726732f,  3.9089108f,  7.309307f,  9.927921f,  12.546536f, 15.165151f,
+        -3.6926756f,  -0.6376257f,  2.4174247f, 5.472475f,   9.745743f,  13.237228f, 16.728714f, 20.2202f,
     };
 
     test_case.add_input<float>(data);
@@ -212,15 +212,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm_5d) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm_5d.onnx"));
     auto test_case = test::TestCase(function, s_device);
     Shape shape{2, 8, 1, 2, 1};
-    int size = shape_size(shape);
+    const auto size = shape_size(shape);
     std::vector<float> data(size);
-    std::iota(data.begin(), data.end(), 0);
-    std::vector<float> output = {
-        -0.34163546562, 0.55278813838,  2.89442372322,  4.68327093124,  -1.02490639686, 1.65836453437,  5.78884744644,
-        9.36654186248,  -1.70817732810, 2.76394081115,  8.68327140808,  14.04981231689, -2.39144825935, 3.86951708793,
-        11.57769489288, 18.73308372497, -0.34163546562, 0.55278813838,  2.89442372322,  4.68327093124,  -1.02490639686,
-        1.65836453437,  5.78884744644,  9.36654186248,  -1.70817732810, 2.76394081115,  8.68327140808,  14.04981231689,
-        -2.39144825935, 3.86951708793,  11.57769489288, 18.73308372497};
+    std::iota(data.begin(), data.end(), 0.f);
+    std::vector<float> output = {-0.34163546562f, 0.55278813838f,  2.89442372322f,  4.68327093124f,  -1.02490639686f,
+                                 1.65836453437f,  5.78884744644f,  9.36654186248f,  -1.70817732810f, 2.76394081115f,
+                                 8.68327140808f,  14.04981231689f, -2.39144825935f, 3.86951708793f,  11.57769489288f,
+                                 18.73308372497f, -0.34163546562f, 0.55278813838f,  2.89442372322f,  4.68327093124f,
+                                 -1.02490639686f, 1.65836453437f,  5.78884744644f,  9.36654186248f,  -1.70817732810f,
+                                 2.76394081115f,  8.68327140808f,  14.04981231689f, -2.39144825935f, 3.86951708793f,
+                                 11.57769489288f, 18.73308372497f};
 
     test_case.add_input<float>(data);
     test_case.add_expected_output<float>(shape, output);
@@ -232,20 +233,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_normalize) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/normalize.onnx"));
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> data(12);
-    std::iota(data.begin(), data.end(), 1);
+    std::iota(data.begin(), data.end(), 1.f);
     std::vector<float> output = {
-        0.19334731,
-        0.33806169,
-        0.44846106,
-        0.53452247,
-        1.4501048,
-        1.5212777,
-        1.5696137,
-        1.6035674,
-        3.4802516,
-        3.3806169,
-        3.2887144,
-        3.2071347,
+        0.19334731f,
+        0.33806169f,
+        0.44846106f,
+        0.53452247f,
+        1.4501048f,
+        1.5212777f,
+        1.5696137f,
+        1.6035674f,
+        3.4802516f,
+        3.3806169f,
+        3.2887144f,
+        3.2071347f,
     };
     test_case.add_input<float>(data);
     test_case.add_expected_output<float>(Shape{1, 3, 2, 2}, output);
@@ -260,7 +261,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_with_beta) {
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> input_data{-0.5f, 0, 0.5f};
     test_case.add_input<float>(input_data);
-    test_case.add_expected_output<float>(expected_output_shape, {-0.2036667, 0.0, 0.2963333});
+    test_case.add_expected_output<float>(expected_output_shape, {-0.2036667f, 0.0f, 0.2963333f});
 
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
@@ -274,7 +275,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_without_beta) {
     auto test_case = test::TestCase(function, s_device);
     std::vector<float> input_data{-0.5f, 0, 0.5f};
     test_case.add_input<float>(input_data);
-    test_case.add_expected_output<float>(expected_output_shape, {-0.18877034, 0.0, 0.31122968});
+    test_case.add_expected_output<float>(expected_output_shape, {-0.18877034f, 0.0f, 0.31122968f});
 
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
@@ -313,9 +314,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_detection_output)
     test_case.add_expected_output<float>(Shape{5, 4},
                                          {
                                              0.8929862f,
-                                             0.892986297607421875,
-                                             12.10701370239257812,
-                                             12.10701370239257812,
+                                             0.892986297607421875f,
+                                             12.10701370239257812f,
+                                             12.10701370239257812f,
                                              0,
                                              0.0f,
                                              0.0f,
@@ -440,18 +441,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_group_norm) {
 
     auto test_case = test::TestCase(function, s_device);
     Shape shape{2, 8, 2, 2};
-    int size = shape_size(shape);
+    const auto size = shape_size(shape);
     std::vector<float> data(size);
-    std::iota(data.begin(), data.end(), 0);
+    std::iota(data.begin(), data.end(), 0.f);
     std::vector<float> output = {
-        -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307,  4.1821785, 5.05505,
-        -1.5825753,  -0.27326822, 1.0360391, 2.3453465,  4.8728714, 6.618614,  8.364357,  10.1101,
-        -2.6376252,  -0.45544672, 1.726732,  3.9089108,  7.309307,  9.927921,  12.546536, 15.165151,
-        -3.6926756,  -0.6376257,  2.4174247, 5.472475,   9.745743,  13.237228, 16.728714, 20.2202,
-        -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307,  4.1821785, 5.05505,
-        -1.5825753,  -0.27326822, 1.0360391, 2.3453465,  4.8728714, 6.618614,  8.364357,  10.1101,
-        -2.6376252,  -0.45544672, 1.726732,  3.9089108,  7.309307,  9.927921,  12.546536, 15.165151,
-        -3.6926756,  -0.6376257,  2.4174247, 5.472475,   9.745743,  13.237228, 16.728714, 20.2202,
+        -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f,  4.1821785f, 5.05505f,
+        -1.5825753f,  -0.27326822f, 1.0360391f, 2.3453465f,  4.8728714f, 6.618614f,  8.364357f,  10.1101f,
+        -2.6376252f,  -0.45544672f, 1.726732f,  3.9089108f,  7.309307f,  9.927921f,  12.546536f, 15.165151f,
+        -3.6926756f,  -0.6376257f,  2.4174247f, 5.472475f,   9.745743f,  13.237228f, 16.728714f, 20.2202f,
+        -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f,  4.1821785f, 5.05505f,
+        -1.5825753f,  -0.27326822f, 1.0360391f, 2.3453465f,  4.8728714f, 6.618614f,  8.364357f,  10.1101f,
+        -2.6376252f,  -0.45544672f, 1.726732f,  3.9089108f,  7.309307f,  9.927921f,  12.546536f, 15.165151f,
+        -3.6926756f,  -0.6376257f,  2.4174247f, 5.472475f,   9.745743f,  13.237228f, 16.728714f, 20.2202f,
     };
 
     test_case.add_input<float>(data);
@@ -468,13 +469,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_prior_grid_genera
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> priors(shape_size(Shape{3, 4}));
-    std::iota(priors.begin(), priors.end(), 0);
+    std::iota(priors.begin(), priors.end(), 0.f);
 
     std::vector<float> feature_map(shape_size(Shape{1, 1, 1, 3}));
-    std::iota(feature_map.begin(), feature_map.end(), 0);
+    std::iota(feature_map.begin(), feature_map.end(), 0.f);
 
     std::vector<float> im_data(shape_size(Shape{1, 3, 4, 7}));
-    std::iota(im_data.begin(), im_data.end(), 0);
+    std::iota(im_data.begin(), im_data.end(), 0.f);
 
     test_case.add_input<float>(priors);
     test_case.add_input<float>(feature_map);
@@ -495,51 +496,51 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_roi_feature_extra
     auto test_case = test::TestCase(function, s_device);
 
     std::vector<float> rois(shape_size(Shape{2, 4}));
-    std::iota(rois.begin(), rois.end(), 0);
+    std::iota(rois.begin(), rois.end(), 0.f);
 
     std::vector<float> pyramid_layer_0(shape_size(Shape{1, 2, 2, 3}));
-    std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0);
+    std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0.f);
 
     test_case.add_input<float>(rois);
     test_case.add_input<float>(pyramid_layer_0);
 
     test_case.add_expected_output<float>(Shape{2, 2, 3, 3},
-                                         {1.416666746139526367,
-                                          1.750000119209289551,
-                                          2.083333492279052734,
-                                          2.416666746139526367,
-                                          2.75,
-                                          3.083333492279052734,
-                                          3.166666507720947266,
-                                          3.5,
-                                          3.833333492279052734,
-                                          7.416666507720947266,
-                                          7.75,
-                                          8.083333015441894531,
-                                          8.416666984558105469,
-                                          8.75,
-                                          9.083333969116210938,
-                                          9.166666030883789062,
-                                          9.5,
-                                          9.833333969116210938,
-                                          4.166666984558105469,
-                                          4.5,
-                                          4.833333492279052734,
-                                          4.166666984558105469,
-                                          4.5,
-                                          4.833333492279052734,
-                                          2.083333492279052734,
-                                          2.25,
-                                          2.416666746139526367,
-                                          10.16666603088378906,
-                                          10.5,
-                                          10.83333206176757812,
-                                          10.16666603088378906,
-                                          10.5,
-                                          10.83333206176757812,
-                                          5.083333015441894531,
-                                          5.25,
-                                          5.416666507720947266});
+                                         {1.416666746139526367f,
+                                          1.750000119209289551f,
+                                          2.083333492279052734f,
+                                          2.416666746139526367f,
+                                          2.75f,
+                                          3.083333492279052734f,
+                                          3.166666507720947266f,
+                                          3.5f,
+                                          3.833333492279052734f,
+                                          7.416666507720947266f,
+                                          7.75f,
+                                          8.083333015441894531f,
+                                          8.416666984558105469f,
+                                          8.75f,
+                                          9.083333969116210938f,
+                                          9.166666030883789062f,
+                                          9.5f,
+                                          9.833333969116210938f,
+                                          4.166666984558105469f,
+                                          4.5f,
+                                          4.833333492279052734f,
+                                          4.166666984558105469f,
+                                          4.5f,
+                                          4.833333492279052734f,
+                                          2.083333492279052734f,
+                                          2.25f,
+                                          2.416666746139526367f,
+                                          10.16666603088378906f,
+                                          10.5f,
+                                          10.83333206176757812f,
+                                          10.16666603088378906f,
+                                          10.5f,
+                                          10.83333206176757812f,
+                                          5.083333015441894531f,
+                                          5.25f,
+                                          5.416666507720947266f});
 
     test_case.add_expected_output<float>(Shape{2, 4}, {0, 1, 2, 3, 4, 5, 6, 7});
     test_case.run();
@@ -593,29 +594,32 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) {
     // scores
     test_case.add_input<float>(
         Shape{1, 3, 2, 6},
-        {0.56637216, 0.90457034, 0.69827306, 0.4353543,  0.47985056, 0.42658508, 0.14516132, 0.08081771, 0.1799732,
-         0.9229515,  0.42420176, 0.50857586, 0.82664067, 0.4972319,  0.3752427,  0.56731623, 0.18241242, 0.33252355,
-         0.30608943, 0.6572437,  0.69185436, 0.88646156, 0.36985755, 0.5590753,  0.5256446,  0.03342898, 0.1344396,
-         0.68642473, 0.37953874, 0.32575172, 0.21108444, 0.5661886,  0.45378175, 0.62126315, 0.26799858, 0.37272978});
+        {0.56637216f, 0.90457034f, 0.69827306f, 0.4353543f,  0.47985056f, 0.42658508f, 0.14516132f, 0.08081771f,
+         0.1799732f,  0.9229515f,  0.42420176f, 0.50857586f, 0.82664067f, 0.4972319f,  0.3752427f,  0.56731623f,
+         0.18241242f, 0.33252355f, 0.30608943f, 0.6572437f,  0.69185436f, 0.88646156f, 0.36985755f, 0.5590753f,
+         0.5256446f,  0.03342898f, 0.1344396f,  0.68642473f, 0.37953874f, 0.32575172f, 0.21108444f, 0.5661886f,
+         0.45378175f, 0.62126315f, 0.26799858f, 0.37272978f});
     // deltas
     test_case.add_input<float>(
         Shape{1, 12, 2, 6},
-        {0.5337073,  0.86607957, 0.55151343, 0.21626699, 0.4462629,  0.03985678, 0.5157072,  0.9932138,  0.7565954,
-         0.43803605, 0.802818,   0.14834064, 0.53932905, 0.14314,    0.3817048,  0.95075196, 0.05516243, 0.2567484,
-         0.25508744, 0.77438325, 0.43561,    0.2094628,  0.8299043,  0.44982538, 0.95615596, 0.5651084,  0.11801951,
-         0.05352486, 0.9774733,  0.14439464, 0.62644225, 0.14370479, 0.54161614, 0.557915,   0.53102225, 0.0840179,
-         0.7249888,  0.9843559,  0.5490522,  0.53788143, 0.822474,   0.3278008,  0.39688024, 0.3286012,  0.5117038,
-         0.04743988, 0.9408995,  0.29885054, 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
-         0.5401001,  0.7541649,  0.11998601, 0.5131829,  0.40606487, 0.327888,   0.27721855, 0.6378373,  0.22795396,
-         0.4961256,  0.3215895,  0.15607187, 0.14782153, 0.8908137,  0.8835288,  0.834191,   0.29907143, 0.7983525,
-         0.755875,   0.30837986, 0.0839176,  0.26624718, 0.04371626, 0.09472824, 0.20689541, 0.37622106, 0.1083321,
-         0.1342548,  0.05815459, 0.7676379,  0.8105144,  0.92348766, 0.26761323, 0.7183306,  0.8947588,  0.19020908,
-         0.42731014, 0.7473663,  0.85775334, 0.9340091,  0.3278848,  0.755993,   0.05307213, 0.39705503, 0.21003333,
-         0.5625373,  0.66188884, 0.80521655, 0.6125863,  0.44678232, 0.97802377, 0.0204936,  0.02686367, 0.7390654,
-         0.74631,    0.58399844, 0.5988792,  0.37413648, 0.5946692,  0.6955776,  0.36377597, 0.7891322,  0.40900692,
-         0.99139464, 0.50169915, 0.41435778, 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
-         0.6523203,  0.50259084, 0.7379765,  0.01171071, 0.94423133, 0.00841132, 0.97486794, 0.2921785,  0.7633071,
-         0.88477814, 0.03563205, 0.50833166, 0.01354555, 0.535081,   0.41366324, 0.0694767,  0.9944055,  0.9981207});
+        {0.5337073f,  0.86607957f, 0.55151343f, 0.21626699f, 0.4462629f,  0.03985678f, 0.5157072f,  0.9932138f,
+         0.7565954f,  0.43803605f, 0.802818f,   0.14834064f, 0.53932905f, 0.14314f,    0.3817048f,  0.95075196f,
+         0.05516243f, 0.2567484f,  0.25508744f, 0.77438325f, 0.43561f,    0.2094628f,  0.8299043f,  0.44982538f,
+         0.95615596f, 0.5651084f,  0.11801951f, 0.05352486f, 0.9774733f,  0.14439464f, 0.62644225f, 0.14370479f,
+         0.54161614f, 0.557915f,   0.53102225f, 0.0840179f,  0.7249888f,  0.9843559f,  0.5490522f,  0.53788143f,
+         0.822474f,   0.3278008f,  0.39688024f, 0.3286012f,  0.5117038f,  0.04743988f, 0.9408995f,  0.29885054f,
+         0.81039643f, 0.85277915f, 0.06807619f, 0.86430097f, 0.36225632f, 0.16606331f, 0.5401001f,  0.7541649f,
+         0.11998601f, 0.5131829f,  0.40606487f, 0.327888f,   0.27721855f, 0.6378373f,  0.22795396f, 0.4961256f,
+         0.3215895f,  0.15607187f, 0.14782153f, 0.8908137f,  0.8835288f,  0.834191f,   0.29907143f, 0.7983525f,
+         0.755875f,   0.30837986f, 0.0839176f,  0.26624718f, 0.04371626f, 0.09472824f, 0.20689541f, 0.37622106f,
+         0.1083321f,  0.1342548f,  0.05815459f, 0.7676379f,  0.8105144f,  0.92348766f, 0.26761323f, 0.7183306f,
+         0.8947588f,  0.19020908f, 0.42731014f, 0.7473663f,  0.85775334f, 0.9340091f,  0.3278848f,  0.755993f,
+         0.05307213f, 0.39705503f, 0.21003333f, 0.5625373f,  0.66188884f, 0.80521655f, 0.6125863f,  0.44678232f,
+         0.97802377f, 0.0204936f,  0.02686367f, 0.7390654f,  0.74631f,    0.58399844f, 0.5988792f,  0.37413648f,
+         0.5946692f,  0.6955776f,  0.36377597f, 0.7891322f,  0.40900692f, 0.99139464f, 0.50169915f, 0.41435778f,
+         0.17142445f, 0.26761186f, 0.31591868f, 0.14249913f, 0.12919712f, 0.5418711f,  0.6523203f,  0.50259084f,
+         0.7379765f,  0.01171071f, 0.94423133f, 0.00841132f, 0.97486794f, 0.2921785f,  0.7633071f,  0.88477814f,
+         0.03563205f, 0.50833166f, 0.01354555f, 0.535081f,   0.41366324f, 0.0694767f,  0.9944055f,  0.9981207f});
     // im_info
     test_case.add_input<float>(Shape{1, 3}, {200, 200, 0});
     // anchors
@@ -623,11 +627,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) {
 
     test_case.add_expected_output<float>(
         Shape{6, 4},
-        {0.12904608, 1.3703424, 3.6230984, 3.4675088, 0.9725206, 0.,        4.4917974, 4.9623675,
-         4.882682,   5.1236916, 7.1700497, 10.213073, 4.4913187, 4.305372,  8.750267,  8.803502,
-         0.9777608,  1.0317986, 3.228293,  4.495021,  4.125554,  5.4091997, 6.35439,   10.124915});
+        {0.12904608f, 1.3703424f, 3.6230984f, 3.4675088f, 0.9725206f, 0.,         4.4917974f, 4.9623675f,
+         4.882682f,   5.1236916f, 7.1700497f, 10.213073f, 4.4913187f, 4.305372f,  8.750267f,  8.803502f,
+         0.9777608f,  1.0317986f, 3.228293f,  4.495021f,  4.125554f,  5.4091997f, 6.35439f,   10.124915f});
     test_case.add_expected_output<float>(Shape{6},
-                                         {0.9229515, 0.90457034, 0.88646156, 0.82664067, 0.69827306, 0.69185436});
+                                         {0.9229515f, 0.90457034f, 0.88646156f, 0.82664067f, 0.69827306f, 0.69185436f});
     test_case.add_expected_output<int64_t>(Shape{1}, {6});
     test_case.run();
 }
diff --git a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp
index 110fbe656614de..d57b9c6ad64b32 100644
--- a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp
@@ -34,23 +34,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_nchw) {
                                                             "onnx/org.pytorch/adaptive_avg_pooling2d_nchw.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.9945,
-                                0.3466,
-                                0.2894,
-                                0.9318,
-                                0.0115,
-                                0.4867,
-                                0.7608,
-                                0.1550,
-                                0.8485,
-                                0.4971,
-                                0.8833,
-                                0.4579,
-                                0.3673,
-                                0.5410,
-                                0.2004,
-                                0.1519});
-    test_case.add_expected_output<float>(Shape{1, 1, 2, 2}, {0.4598249, 0.5342500, 0.5634750, 0.4233750});
+    test_case.add_input<float>({0.9945f,
+                                0.3466f,
+                                0.2894f,
+                                0.9318f,
+                                0.0115f,
+                                0.4867f,
+                                0.7608f,
+                                0.1550f,
+                                0.8485f,
+                                0.4971f,
+                                0.8833f,
+                                0.4579f,
+                                0.3673f,
+                                0.5410f,
+                                0.2004f,
+                                0.1519f});
+    test_case.add_expected_output<float>(Shape{1, 1, 2, 2}, {0.4598249f, 0.5342500f, 0.5634750f, 0.4233750f});
     test_case.run();
 }
 
@@ -61,8 +61,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_chw) {
                                                             "onnx/org.pytorch/adaptive_avg_pooling2d_chw.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({12.0, -1.0, -56.0, 20.0, 1.0, -8.0, 7.0, 9.0});
+    test_case.add_input<float>({12.0f, -1.0f, -56.0f, 20.0f, 1.0f, -8.0f, 7.0f, 9.0f});
 
-    test_case.add_expected_output<float>(Shape{1, 2, 2}, {5.5, -18.0, -3.5, 8.0});
+    test_case.add_expected_output<float>(Shape{1, 2, 2}, {5.5f, -18.0f, -3.5f, 8.0f});
     test_case.run();
 }
diff --git a/src/frontends/onnx/tests/onnx_import_quant.in.cpp b/src/frontends/onnx/tests/onnx_import_quant.in.cpp
index 784bb8a0ae2ad3..d49b96c22b7f91 100644
--- a/src/frontends/onnx/tests/onnx_import_quant.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_quant.in.cpp
@@ -1061,7 +1061,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_const_inputs_infer) {
     const Shape data_shape{1, 2, 3, 4};
     const auto n_elements = shape_size(data_shape);
     std::vector<float> input_data(n_elements);
-    std::iota(std::begin(input_data), std::end(input_data), 0);
+    std::iota(std::begin(input_data), std::end(input_data), 0.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input_data);
@@ -1081,7 +1081,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_nonconst_inputs_infer) {
     const Shape data_shape{1, 2, 3, 4};
     const size_t n_elements = shape_size(data_shape);
     std::vector<float> input_data(n_elements);
-    std::iota(std::begin(input_data), std::end(input_data), 0);
+    std::iota(std::begin(input_data), std::end(input_data), 0.f);
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(input_data);
diff --git a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp
index 771000f8091db8..645ccd8cc4cea7 100644
--- a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp
@@ -129,29 +129,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim) {
                                                                               "onnx/reshape_negative_dim.onnx"));
 
     // 2x3x4
-    auto input = test::NDArray<float, 3>({{{0.5488135, 0.71518934, 0.60276335, 0.5448832},
-                                           {0.4236548, 0.6458941, 0.4375872, 0.891773},
-                                           {0.96366274, 0.3834415, 0.79172504, 0.5288949}},
+    auto input = test::NDArray<float, 3>({{{0.5488135f, 0.71518934f, 0.60276335f, 0.5448832f},
+                                           {0.4236548f, 0.6458941f, 0.4375872f, 0.891773f},
+                                           {0.96366274f, 0.3834415f, 0.79172504f, 0.5288949f}},
 
-                                          {{0.56804454, 0.92559665, 0.07103606, 0.0871293},
-                                           {0.0202184, 0.83261985, 0.77815676, 0.87001216},
-                                           {0.9786183, 0.7991586, 0.46147937, 0.7805292}}})
+                                          {{0.56804454f, 0.92559665f, 0.07103606f, 0.0871293f},
+                                           {0.0202184f, 0.83261985f, 0.77815676f, 0.87001216f},
+                                           {0.9786183f, 0.7991586f, 0.46147937f, 0.7805292f}}})
                      .get_vector();
 
     // 2x6x2
-    auto expected_output = test::NDArray<float, 3>({{{0.5488135, 0.71518934},
-                                                     {0.60276335, 0.5448832},
-                                                     {0.4236548, 0.6458941},
-                                                     {0.4375872, 0.891773},
-                                                     {0.96366274, 0.3834415},
-                                                     {0.79172504, 0.5288949}},
-
-                                                    {{0.56804454, 0.92559665},
-                                                     {0.07103606, 0.0871293},
-                                                     {0.0202184, 0.83261985},
-                                                     {0.77815676, 0.87001216},
-                                                     {0.9786183, 0.7991586},
-                                                     {0.46147937, 0.7805292}}})
+    auto expected_output = test::NDArray<float, 3>({{{0.5488135f, 0.71518934f},
+                                                     {0.60276335f, 0.5448832f},
+                                                     {0.4236548f, 0.6458941f},
+                                                     {0.4375872f, 0.891773f},
+                                                     {0.96366274f, 0.3834415f},
+                                                     {0.79172504f, 0.5288949f}},
+
+                                                    {{0.56804454f, 0.92559665f},
+                                                     {0.07103606f, 0.0871293f},
+                                                     {0.0202184f, 0.83261985f},
+                                                     {0.77815676f, 0.87001216f},
+                                                     {0.9786183f, 0.7991586f},
+                                                     {0.46147937f, 0.7805292f}}})
                                .get_vector();
 
     auto test_case = test::TestCase(function, s_device);
@@ -207,7 +207,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{0.f,  8.f,  1.f,  9.f,  16.f, 24.f, 17.f, 25.f, 2.f,  10.f, 3.f,
                                        11.f, 18.f, 26.f, 19.f, 27.f, 4.f,  12.f, 5.f,  13.f, 20.f, 28.f,
@@ -224,7 +224,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_v1) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space_v1.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{0.f,  8.f,  1.f,  9.f,  16.f, 24.f, 17.f, 25.f, 2.f,  10.f, 3.f,
                                        11.f, 18.f, 26.f, 19.f, 27.f, 4.f,  12.f, 5.f,  13.f, 20.f, 28.f,
@@ -242,7 +242,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_crd) {
                                                                         "onnx/depth_to_space_crd.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{0.f,  4.f,  1.f,  5.f,  8.f,  12.f, 9.f,  13.f, 2.f,  6.f,  3.f,
                                        7.f,  10.f, 14.f, 11.f, 15.f, 16.f, 20.f, 17.f, 21.f, 24.f, 28.f,
@@ -304,7 +304,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/space_to_depth.onnx"));
 
     std::vector<float> input(32);
-    std::iota(input.begin(), input.end(), 0);
+    std::iota(input.begin(), input.end(), 0.f);
 
     std::vector<float> expected_output{
         0.f, 2.f, 8.f,  10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f,  11.f, 17.f, 19.f, 25.f, 27.f,
diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp
index fbe25a2b0b0227..06b28aa76a88e6 100644
--- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp
@@ -39,12 +39,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_default_const) {
                                                                         "onnx/lstm_fwd_default_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 1, 1, 2},
-                                         {-0.063373, -0.20347191, -0.07230289, -0.13298286});  // Y_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.07230289, -0.13298286});          // Y_h_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.1557954, -0.24502525});           // Y_c_data
+                                         {-0.063373f, -0.20347191f, -0.07230289f, -0.13298286f});  // Y_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.07230289f, -0.13298286f});            // Y_h_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.1557954f, -0.24502525f});             // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -55,12 +55,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_const) {
                                                                         "onnx/lstm_reverse_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 1, 1, 2},
-                                         {-0.06082131, -0.19985214, 0.00860566, 0.00920492});  // Y_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.06082131, -0.19985214});          // Y_h_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.25917438, -0.3832652});           // Y_c_data
+                                         {-0.06082131f, -0.19985214f, 0.00860566f, 0.00920492f});  // Y_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.06082131f, -0.19985214f});            // Y_h_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.25917438f, -0.3832652f});             // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -70,21 +70,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_const) {
         file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/lstm_bidir_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 2, 1, 2},
-                                         {-0.063373,
-                                          -0.20347191,
-                                          -0.06082131,
-                                          -0.19985214,
-                                          -0.07230289,
-                                          -0.13298286,
-                                          0.00860566,
-                                          0.00920492});  // Y_data
+                                         {-0.063373f,
+                                          -0.20347191f,
+                                          -0.06082131f,
+                                          -0.19985214f,
+                                          -0.07230289f,
+                                          -0.13298286f,
+                                          0.00860566f,
+                                          0.00920492f});  // Y_data
     test_case.add_expected_output<float>(Shape{2, 1, 2},
-                                         {-0.07230289, -0.13298286, -0.06082131, -0.19985214});  // Y_h_data
+                                         {-0.07230289f, -0.13298286f, -0.06082131f, -0.19985214f});  // Y_h_data
     test_case.add_expected_output<float>(Shape{2, 1, 2},
-                                         {-0.1557954, -0.24502525, -0.25917438, -0.3832652});  // Y_c_data
+                                         {-0.1557954f, -0.24502525f, -0.25917438f, -0.3832652f});  // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -95,12 +95,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_const) {
                                                                         "onnx/lstm_fwd_clip_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 1, 1, 2},
-                                         {-0.02391884, -0.02744377, -0.01024176, -0.01188637});  // Y_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.01024176, -0.01188637});            // Y_h_data
-    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.02039271, -0.02353566});            // Y_c_data
+                                         {-0.02391884f, -0.02744377f, -0.01024176f, -0.01188637f});  // Y_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.01024176f, -0.01188637f});              // Y_h_data
+    test_case.add_expected_output<float>(Shape{1, 1, 2}, {-0.02039271f, -0.02353566f});              // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -111,27 +111,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq_const) {
                                                                         "onnx/lstm_fwd_mixed_seq_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 1, 2, 3},
-                                         {0.13528088,
-                                          -0.1779867,
-                                          -0.07448981,
-                                          0.14769037,
-                                          -0.16327181,
-                                          -0.10419653,
+                                         {0.13528088f,
+                                          -0.1779867f,
+                                          -0.07448981f,
+                                          0.14769037f,
+                                          -0.16327181f,
+                                          -0.10419653f,
                                           0.,
                                           0.,
                                           0.,
-                                          0.08759661,
-                                          -0.04002844,
-                                          -0.08617793});  // Y_data
+                                          0.08759661f,
+                                          -0.04002844f,
+                                          -0.08617793f});  // Y_data
     test_case.add_expected_output<float>(
         Shape{1, 2, 3},
-        {0.13528088, -0.1779867, -0.07448981, 0.08759661, -0.04002844, -0.08617793});  // Y_h_data
+        {0.13528088f, -0.1779867f, -0.07448981f, 0.08759661f, -0.04002844f, -0.08617793f});  // Y_h_data
     test_case.add_expected_output<float>(
         Shape{1, 2, 3},
-        {0.367563, -0.43762812, -0.20435227, 0.17330585, -0.0732716, -0.18809439});  // Y_c_data
+        {0.367563f, -0.43762812f, -0.20435227f, 0.17330585f, -0.0732716f, -0.18809439f});  // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -142,27 +142,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_mixed_seq_const) {
                                                                         "onnx/lstm_reverse_mixed_seq_const.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607});  // X
+    test_case.add_input<float>({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 1, 2, 3},
-                                         {0.13528088,
-                                          -0.1779867,
-                                          -0.07448981,
-                                          0.14696799,
-                                          -0.15571019,
-                                          -0.10270946,
+                                         {0.13528088f,
+                                          -0.1779867f,
+                                          -0.07448981f,
+                                          0.14696799f,
+                                          -0.15571019f,
+                                          -0.10270946f,
                                           0.,
                                           0.,
                                           0.,
-                                          -0.01110403,
-                                          0.0228607,
-                                          0.00397353});  // Y_data
+                                          -0.01110403f,
+                                          0.0228607f,
+                                          0.00397353f});  // Y_data
     test_case.add_expected_output<float>(
         Shape{1, 2, 3},
-        {0.13528088, -0.1779867, -0.07448981, 0.14696799, -0.15571019, -0.10270946});  // Y_h_data
+        {0.13528088f, -0.1779867f, -0.07448981f, 0.14696799f, -0.15571019f, -0.10270946f});  // Y_h_data
     test_case.add_expected_output<float>(
         Shape{1, 2, 3},
-        {0.367563, -0.43762812, -0.20435227, 0.50598085, -0.42627674, -0.3641275});  // Y_c_data
+        {0.367563f, -0.43762812f, -0.20435227f, 0.50598085f, -0.42627674f, -0.3641275f});  // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -174,43 +174,43 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const) {
 
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(
-        {0.68172926, 1.1405563, -0.03931177, -0.03759607, 1.1397027, 0.60444903, 1.3246384, -0.28191715});  // X
+        {0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f, 1.1397027f, 0.60444903f, 1.3246384f, -0.28191715f});  // X
 
     test_case.add_expected_output<float>(Shape{2, 2, 2, 2},
-                                         {-0.063373,
-                                          -0.20347191,
-                                          0.00860566,
-                                          0.00920492,
-                                          -0.063373,
-                                          -0.20347191,
-                                          -0.12004475,
-                                          -0.12800421,
+                                         {-0.063373f,
+                                          -0.20347191f,
+                                          0.00860566f,
+                                          0.00920492f,
+                                          -0.063373f,
+                                          -0.20347191f,
+                                          -0.12004475f,
+                                          -0.12800421f,
                                           0.,
                                           0.,
-                                          -0.19095606,
-                                          -0.12459831,
+                                          -0.19095606f,
+                                          -0.12459831f,
                                           0.,
                                           0.,
-                                          -0.1911628,
-                                          -0.12813942});  // Y_data
+                                          -0.1911628f,
+                                          -0.12813942f});  // Y_data
     test_case.add_expected_output<float>(Shape{2, 2, 2},
-                                         {-0.063373,
-                                          -0.20347191,
-                                          -0.19095606,
-                                          -0.12459831,
-                                          -0.063373,
-                                          -0.20347191,
-                                          -0.12004475,
-                                          -0.12800421});  // Y_h_data
+                                         {-0.063373f,
+                                          -0.20347191f,
+                                          -0.19095606f,
+                                          -0.12459831f,
+                                          -0.063373f,
+                                          -0.20347191f,
+                                          -0.12004475f,
+                                          -0.12800421f});  // Y_h_data
     test_case.add_expected_output<float>(Shape{2, 2, 2},
-                                         {-0.2732999,
-                                          -0.38956356,
-                                          -0.48170844,
-                                          -0.34701264,
-                                          -0.2732999,
-                                          -0.38956356,
-                                          -0.27130172,
-                                          -0.253659});  // Y_c_data
+                                         {-0.2732999f,
+                                          -0.38956356f,
+                                          -0.48170844f,
+                                          -0.34701264f,
+                                          -0.2732999f,
+                                          -0.38956356f,
+                                          -0.27130172f,
+                                          -0.253659f});  // Y_c_data
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -221,8 +221,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes) {
                                                                         "onnx/lstm_fwd_with_clip_peepholes.onnx"));
 
     auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({-0.455351, -0.276391, -0.185934, -0.269585});  // X
-    test_case.add_input<float>({-0.494659f,                                    // W
+    test_case.add_input<float>({-0.455351f, -0.276391f, -0.185934f, -0.269585f});  // X
+    test_case.add_input<float>({-0.494659f,                                        // W
                                 0.0453352f,
                                 -0.487793f,
                                 0.417264f,
@@ -560,10 +560,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_dynamic_batch_size_and_seq_len) {
     test_case.add_input<float>({1, 2, 3, 4, 5, 6});
 
     test_case.add_expected_output<float>(Shape{1, 1, 3, 2},
-                                         {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594});  // Y
+                                         {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f});  // Y
     test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594});  // Y_c
-    test_case.add_expected_output<float>(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1});                            // Y_h
+                                         {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f});  // Y_c
+    test_case.add_expected_output<float>(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1});                                  // Y_h
 
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1);
 }
@@ -769,32 +769,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_con
     // Y
     test_case.add_expected_output<float>(
         Shape{4, 1, 3, 5},
-        std::vector<float>{0.30736187,  0.10271017,  0.91698503, 0.3471303,  -0.0123809, 0.51264125, 0.51235366,
-                           0.45471948,  0.50601995,  0.49260828, 0.4781971,  0.0668709,  0.89421916, 0.33762455,
-                           -0.19021586, 0.6881336,   0.7331965,  0.8887774,  0.34048334, 0.38408905, 0.49962956,
-                           0.2948451,   0.3651103,   0.33406913, 0.57418096, 0.49882296, 0.4321446,  0.97142136,
-                           0.20714557,  0.66270787,  0.53192705, 0.46424377, 0.9647801,  0.19583187, 0.7362316,
-                           0.48205143,  -0.04748845, 0.27395952, 0.35897565, 0.5801568,  0.5889811,  0.36110958,
-                           1.3433081,   0.29702073,  0.5709667,  0.936689,   0.84129435, 1.1782551,  0.23925206,
-                           0.57521456,  0.43502977,  -0.5664091, 0.6758457,  0.2958132,  0.70932186, 0.4411352,
-                           -0.1717428,  1.7761463,   0.14413449, 0.73801273});
+        std::vector<float>{0.30736187f,  0.10271017f,  0.91698503f, 0.3471303f,  -0.0123809f, 0.51264125f, 0.51235366f,
+                           0.45471948f,  0.50601995f,  0.49260828f, 0.4781971f,  0.0668709f,  0.89421916f, 0.33762455f,
+                           -0.19021586f, 0.6881336f,   0.7331965f,  0.8887774f,  0.34048334f, 0.38408905f, 0.49962956f,
+                           0.2948451f,   0.3651103f,   0.33406913f, 0.57418096f, 0.49882296f, 0.4321446f,  0.97142136f,
+                           0.20714557f,  0.66270787f,  0.53192705f, 0.46424377f, 0.9647801f,  0.19583187f, 0.7362316f,
+                           0.48205143f,  -0.04748845f, 0.27395952f, 0.35897565f, 0.5801568f,  0.5889811f,  0.36110958f,
+                           1.3433081f,   0.29702073f,  0.5709667f,  0.936689f,   0.84129435f, 1.1782551f,  0.23925206f,
+                           0.57521456f,  0.43502977f,  -0.5664091f, 0.6758457f,  0.2958132f,  0.70932186f, 0.4411352f,
+                           -0.1717428f,  1.7761463f,   0.14413449f, 0.73801273f});
     // Y_h
     test_case.add_expected_output<float>(Shape{1, 3, 5},
-                                         std::vector<float>{0.936689,
-                                                            0.84129435,
-                                                            1.1782551,
-                                                            0.23925206,
-                                                            0.57521456,
-                                                            0.43502977,
-                                                            -0.5664091,
-                                                            0.6758457,
-                                                            0.2958132,
-                                                            0.70932186,
-                                                            0.4411352,
-                                                            -0.1717428,
-                                                            1.7761463,
-                                                            0.14413449,
-                                                            0.73801273});
+                                         std::vector<float>{0.936689f,
+                                                            0.84129435f,
+                                                            1.1782551f,
+                                                            0.23925206f,
+                                                            0.57521456f,
+                                                            0.43502977f,
+                                                            -0.5664091f,
+                                                            0.6758457f,
+                                                            0.2958132f,
+                                                            0.70932186f,
+                                                            0.4411352f,
+                                                            -0.1717428f,
+                                                            1.7761463f,
+                                                            0.14413449f,
+                                                            0.73801273f});
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5);
 }
 
@@ -908,32 +908,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len_c
     // Y
     test_case.add_expected_output<float>(
         Shape{4, 1, 3, 5},
-        std::vector<float>{-0.9559332,  0.4372494,   0.9967716,   -0.9079381,  -1.2538278,  1.9265908,   -0.8437393,
-                           -1.2057271,  -0.25887525, -0.52679026, -0.3619178,  0.67928517,  0.9486744,   -0.12006134,
-                           -1.3862017,  -0.98941356, 0.80389524,  0.97586197,  -0.9343586,  -0.74858856, 1.797039,
-                           -0.7873732,  -0.72469383, -0.5866635,  -0.42103744, -0.8406298,  0.85877097,  0.6349921,
-                           -0.55897295, -0.6168443,  0.,          0.,          0.,          0.,          0.,
-                           1.577129,    -0.6935871,  -0.304804,   -0.75392795, -0.20703818, -0.93796504, 0.9220495,
-                           0.36017662,  -0.7007159,  0.06962098,  0.,          0.,          0.,          0.,
-                           0.,          0.,          0.,          0.,          0.,          0.,          -0.96323603,
-                           0.9265786,   0.54976916,  -0.8037839,  0.73501444});
+        std::vector<float>{-0.9559332f,  0.4372494f,   0.9967716f,   -0.9079381f,  -1.2538278f,  1.9265908f,
+                           -0.8437393f,  -1.2057271f,  -0.25887525f, -0.52679026f, -0.3619178f,  0.67928517f,
+                           0.9486744f,   -0.12006134f, -1.3862017f,  -0.98941356f, 0.80389524f,  0.97586197f,
+                           -0.9343586f,  -0.74858856f, 1.797039f,    -0.7873732f,  -0.72469383f, -0.5866635f,
+                           -0.42103744f, -0.8406298f,  0.85877097f,  0.6349921f,   -0.55897295f, -0.6168443f,
+                           0.,           0.,           0.,           0.,           0.,           1.577129f,
+                           -0.6935871f,  -0.304804f,   -0.75392795f, -0.20703818f, -0.93796504f, 0.9220495f,
+                           0.36017662f,  -0.7007159f,  0.06962098f,  0.,           0.,           0.,
+                           0.,           0.,           0.,           0.,           0.,           0.,
+                           0.,           -0.96323603f, 0.9265786f,   0.54976916f,  -0.8037839f,  0.73501444f});
     // Y_h
     test_case.add_expected_output<float>(Shape{1, 3, 5},
-                                         std::vector<float>{-0.98941356,
-                                                            0.80389524,
-                                                            0.97586197,
-                                                            -0.9343586,
-                                                            -0.74858856,
-                                                            1.577129,
-                                                            -0.6935871,
-                                                            -0.304804,
-                                                            -0.75392795,
-                                                            -0.20703818,
-                                                            -0.96323603,
-                                                            0.9265786,
-                                                            0.54976916,
-                                                            -0.8037839,
-                                                            0.73501444});
+                                         std::vector<float>{-0.98941356f,
+                                                            0.80389524f,
+                                                            0.97586197f,
+                                                            -0.9343586f,
+                                                            -0.74858856f,
+                                                            1.577129f,
+                                                            -0.6935871f,
+                                                            -0.304804f,
+                                                            -0.75392795f,
+                                                            -0.20703818f,
+                                                            -0.96323603f,
+                                                            0.9265786f,
+                                                            0.54976916f,
+                                                            -0.8037839f,
+                                                            0.73501444f});
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
 }
 
@@ -949,32 +950,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_mixed_seq_l
     // Y
     test_case.add_expected_output<float>(
         Shape{4, 1, 3, 5},
-        std::vector<float>{-0.9917215,  0.07583051,  0.997975,    -0.9315585,  -0.7483002, 1.536813,   -0.59922504,
-                           -0.33637103, -0.7565539,  -0.23930266, -0.7844553,  1.0393485,  0.73516595, -0.5616293,
-                           -0.09489207, -0.9501128,  0.7905356,   0.9928266,   -0.9153729, -1.1781745, 1.7955453,
-                           -0.77754307, -0.6831806,  -0.6266324,  -0.39791372, -0.8030517, 1.3107346,  0.3700709,
-                           -0.49808976, 0.52939236,  0.,          0.,          0.,         0.,         0.,
-                           1.9345565,   -0.83817405, -1.1433047,  -0.35640514, -0.5191339, -0.655544,  1.3520991,
-                           0.42289692,  -0.3171452,  -0.3922639,  0.,          0.,         0.,         0.,
-                           0.,          0.,          0.,          0.,          0.,         0.,         -0.24612205,
-                           1.6415757,   0.79883975,  -0.18640287, -1.0134869});
+        std::vector<float>{-0.9917215f,  0.07583051f,  0.997975f,    -0.9315585f,  -0.7483002f,  1.536813f,
+                           -0.59922504f, -0.33637103f, -0.7565539f,  -0.23930266f, -0.7844553f,  1.0393485f,
+                           0.73516595f,  -0.5616293f,  -0.09489207f, -0.9501128f,  0.7905356f,   0.9928266f,
+                           -0.9153729f,  -1.1781745f,  1.7955453f,   -0.77754307f, -0.6831806f,  -0.6266324f,
+                           -0.39791372f, -0.8030517f,  1.3107346f,   0.3700709f,   -0.49808976f, 0.52939236f,
+                           0.,           0.,           0.,           0.,           0.,           1.9345565f,
+                           -0.83817405f, -1.1433047f,  -0.35640514f, -0.5191339f,  -0.655544f,   1.3520991f,
+                           0.42289692f,  -0.3171452f,  -0.3922639f,  0.,           0.,           0.,
+                           0.,           0.,           0.,           0.,           0.,           0.,
+                           0.,           -0.24612205f, 1.6415757f,   0.79883975f,  -0.18640287f, -1.0134869f});
     // Y_h
     test_case.add_expected_output<float>(Shape{1, 3, 5},
-                                         std::vector<float>{-0.9917215,
-                                                            0.07583051,
-                                                            0.997975,
-                                                            -0.9315585,
-                                                            -0.7483002,
-                                                            1.536813,
-                                                            -0.59922504,
-                                                            -0.33637103,
-                                                            -0.7565539,
-                                                            -0.23930266,
-                                                            -0.7844553,
-                                                            1.0393485,
-                                                            0.73516595,
-                                                            -0.5616293,
-                                                            -0.09489207});
+                                         std::vector<float>{-0.9917215f,
+                                                            0.07583051f,
+                                                            0.997975f,
+                                                            -0.9315585f,
+                                                            -0.7483002f,
+                                                            1.536813f,
+                                                            -0.59922504f,
+                                                            -0.33637103f,
+                                                            -0.7565539f,
+                                                            -0.23930266f,
+                                                            -0.7844553f,
+                                                            1.0393485f,
+                                                            0.73516595f,
+                                                            -0.5616293f,
+                                                            -0.09489207f});
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
 }
 
@@ -991,29 +993,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidir_mixed_seq_len
     test_case.add_expected_output<float>(
         Shape{4, 2, 3, 5},
         std::vector<float>{
-            -0.3224981,  -0.44282594, 0.7499796,   -0.12240417, 0.12079421,  0.02534254,  0.02504561,  -0.0463777,
-            0.01204535,  -0.01497037, -0.04651929, -0.6264307,  0.7236632,   0.06250653,  0.02594197,  0.0595789,
-            0.40258542,  -0.40646964, 0.70320284,  -0.02962421, 0.10372428,  -0.38378227, -0.4331268,  -0.15696645,
-            -0.3451503,  0.20918667,  -0.59024405, -0.845524,   0.60705113,  -0.6336088,  -0.0833023,  -0.40062034,
-            0.7579466,   -0.12340625, 0.04415433,  -0.24662054, 0.27420586,  -0.09122991, -0.22768986, 0.19980887,
-            -0.218649,   -0.5560231,  0.56177044,  -0.25098884, 0.15462328,  0.0409361,   0.17866893,  -0.2782218,
-            0.27396634,  -0.04992082, 0.15353821,  -0.4497267,  -0.44631857, -0.478926,   -0.23017275, 0.25369287,
-            -0.7369056,  -0.73285,    -0.5750758,  -0.533177,   0.,          0.,          0.,          0.,
-            0.,          -0.45753813, 0.5987347,   -0.07046632, -0.35819566, 0.3916747,   -0.18096107, -0.24415034,
-            0.38435352,  -0.29881003, 0.07738188,  0.,          0.,          0.,          0.,          0.,
-            0.10390212,  -0.29646862, -0.20532897, -0.31521815, 0.01049522,  0.19370168,  -0.6386781,  -0.42919028,
-            -0.47081998, -0.2954276,  0.,          0.,          0.,          0.,          0.,          0.,
-            0.,          0.,          0.,          0.,          -0.50112087, -0.11085765, 0.5155622,   -0.5635352,
-            0.54762024,  0.,          0.,          0.,          0.,          0.,          0.,          0.,
-            0.,          0.,          0.,          0.17058733,  -0.6941011,  -0.27862304, -0.27050856, -0.03864266});
+            -0.3224981f,  -0.44282594f, 0.7499796f,   -0.12240417f, 0.12079421f,  0.02534254f,  0.02504561f,
+            -0.0463777f,  0.01204535f,  -0.01497037f, -0.04651929f, -0.6264307f,  0.7236632f,   0.06250653f,
+            0.02594197f,  0.0595789f,   0.40258542f,  -0.40646964f, 0.70320284f,  -0.02962421f, 0.10372428f,
+            -0.38378227f, -0.4331268f,  -0.15696645f, -0.3451503f,  0.20918667f,  -0.59024405f, -0.845524f,
+            0.60705113f,  -0.6336088f,  -0.0833023f,  -0.40062034f, 0.7579466f,   -0.12340625f, 0.04415433f,
+            -0.24662054f, 0.27420586f,  -0.09122991f, -0.22768986f, 0.19980887f,  -0.218649f,   -0.5560231f,
+            0.56177044f,  -0.25098884f, 0.15462328f,  0.0409361f,   0.17866893f,  -0.2782218f,  0.27396634f,
+            -0.04992082f, 0.15353821f,  -0.4497267f,  -0.44631857f, -0.478926f,   -0.23017275f, 0.25369287f,
+            -0.7369056f,  -0.73285f,    -0.5750758f,  -0.533177f,   0.,           0.,           0.,
+            0.,           0.,           -0.45753813f, 0.5987347f,   -0.07046632f, -0.35819566f, 0.3916747f,
+            -0.18096107f, -0.24415034f, 0.38435352f,  -0.29881003f, 0.07738188f,  0.,           0.,
+            0.,           0.,           0.,           0.10390212f,  -0.29646862f, -0.20532897f, -0.31521815f,
+            0.01049522f,  0.19370168f,  -0.6386781f,  -0.42919028f, -0.47081998f, -0.2954276f,  0.,
+            0.,           0.,           0.,           0.,           0.,           0.,           0.,
+            0.,           0.,           -0.50112087f, -0.11085765f, 0.5155622f,   -0.5635352f,  0.54762024f,
+            0.,           0.,           0.,           0.,           0.,           0.,           0.,
+            0.,           0.,           0.,           0.17058733f,  -0.6941011f,  -0.27862304f, -0.27050856f,
+            -0.03864266f});
     // Y_h
     test_case.add_expected_output<float>(
         Shape{2, 3, 5},
-        std::vector<float>{-0.0833023, -0.40062034, 0.7579466,   -0.12340625, 0.04415433,  -0.45753813,
-                           0.5987347,  -0.07046632, -0.35819566, 0.3916747,   -0.50112087, -0.11085765,
-                           0.5155622,  -0.5635352,  0.54762024,  0.0595789,   0.40258542,  -0.40646964,
-                           0.70320284, -0.02962421, 0.10372428,  -0.38378227, -0.4331268,  -0.15696645,
-                           -0.3451503, 0.20918667,  -0.59024405, -0.845524,   0.60705113,  -0.6336088});
+        std::vector<float>{-0.0833023f, -0.40062034f, 0.7579466f,   -0.12340625f, 0.04415433f,  -0.45753813f,
+                           0.5987347f,  -0.07046632f, -0.35819566f, 0.3916747f,   -0.50112087f, -0.11085765f,
+                           0.5155622f,  -0.5635352f,  0.54762024f,  0.0595789f,   0.40258542f,  -0.40646964f,
+                           0.70320284f, -0.02962421f, 0.10372428f,  -0.38378227f, -0.4331268f,  -0.15696645f,
+                           -0.3451503f, 0.20918667f,  -0.59024405f, -0.845524f,   0.60705113f,  -0.6336088f});
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4);
 }
 
@@ -1901,32 +1906,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_l
     // Y
     test_case.add_expected_output<float>(
         Shape{4, 1, 3, 5},
-        std::vector<float>{-0.27398264, 0.96948624, 0.26404798,  0.8068119,  0.99935544,  0.73694086,  0.44305325,
-                           -0.9964632,  0.7063714,  0.9999049,   -0.7241098, 0.08538079,  -0.785874,   0.60833323,
-                           0.99999666,  0.53703666, 0.0267657,   0.37151086, -0.68740594, 0.9992448,   0.3254757,
-                           0.7716811,   -0.9996745, 0.9957807,   0.9995338,  0.9997339,   0.9888724,   -0.8992324,
-                           -0.797282,   0.98666525, 0.,          0.,         0.,          0.,          0.,
-                           0.95711637,  -0.8986079, -0.99998885, 0.96265936, 0.9380511,   -0.86523867, 0.3528558,
-                           -0.99675506, 0.946875,   0.79539406,  0.,         0.,          0.,          0.,
-                           0.,          0.,         0.,          0.,         0.,          0.,          0.99903,
-                           0.9998094,   0.9499353,  0.6077225,   -0.9921822});
+        std::vector<float>{
+            -0.27398264f, 0.96948624f, 0.26404798f,  0.8068119f,  0.99935544f, 0.73694086f,  0.44305325f, -0.9964632f,
+            0.7063714f,   0.9999049f,  -0.7241098f,  0.08538079f, -0.785874f,  0.60833323f,  0.99999666f, 0.53703666f,
+            0.0267657f,   0.37151086f, -0.68740594f, 0.9992448f,  0.3254757f,  0.7716811f,   -0.9996745f, 0.9957807f,
+            0.9995338f,   0.9997339f,  0.9888724f,   -0.8992324f, -0.797282f,  0.98666525f,  0.,          0.,
+            0.,           0.,          0.,           0.95711637f, -0.8986079f, -0.99998885f, 0.96265936f, 0.9380511f,
+            -0.86523867f, 0.3528558f,  -0.99675506f, 0.946875f,   0.79539406f, 0.,           0.,          0.,
+            0.,           0.,          0.,           0.,          0.,          0.,           0.,          0.99903f,
+            0.9998094f,   0.9499353f,  0.6077225f,   -0.9921822f});
     // Y_h
     test_case.add_expected_output<float>(Shape{1, 3, 5},
-                                         std::vector<float>{-0.27398264,
-                                                            0.96948624,
-                                                            0.26404798,
-                                                            0.8068119,
-                                                            0.99935544,
-                                                            0.73694086,
-                                                            0.44305325,
-                                                            -0.9964632,
-                                                            0.7063714,
-                                                            0.9999049,
-                                                            -0.7241098,
-                                                            0.08538079,
-                                                            -0.785874,
-                                                            0.60833323,
-                                                            0.99999666});
+                                         std::vector<float>{-0.27398264f,
+                                                            0.96948624f,
+                                                            0.26404798f,
+                                                            0.8068119f,
+                                                            0.99935544f,
+                                                            0.73694086f,
+                                                            0.44305325f,
+                                                            -0.9964632f,
+                                                            0.7063714f,
+                                                            0.9999049f,
+                                                            -0.7241098f,
+                                                            0.08538079f,
+                                                            -0.785874f,
+                                                            0.60833323f,
+                                                            0.99999666f});
     test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4);
 }
 
@@ -1942,29 +1947,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidir_mixed_seq_len
     test_case.add_expected_output<float>(
         Shape{4, 2, 3, 5},
         std::vector<float>{
-            0.02254748,  0.15776646,  -0.8229023,  0.19205809,  0.76984656,  -0.00603169, -0.0286147,  0.04512155,
-            -0.0011912,  -0.02572936, -0.13703543, -0.49651444, -0.78868157, 0.3566854,   0.8758509,   -0.99602485,
-            -0.8151508,  -0.5803147,  0.4985683,   0.30210292,  0.11550081,  -0.30236644, 0.99622667,  -0.8732492,
-            -0.43772405, -0.9284624,  -0.5595875,  0.9986867,   -0.18373811, 0.8451735,   -0.43823165, -0.1904698,
-            0.8320786,   0.9830735,   0.61861455,  0.19109797,  0.64407,     0.00962067,  -0.32752877, -0.5050589,
-            -0.23455954, 0.9517933,   0.9050665,   0.91091585,  -0.77941567, -0.71390504, -0.24422187, -0.38115412,
-            0.3462553,   0.44084883,  -0.81455964, -0.23556596, 0.85043025,  -0.7840209,  -0.82087713, -0.8349008,
-            -0.7880142,  0.99017143,  -0.9816452,  -0.93827677, 0.,          0.,          0.,          0.,
-            0.,          0.28117967,  0.20685148,  0.01166701,  -0.5441828,  -0.5463747,  -0.85301256, 0.52109087,
-            -0.8317892,  -0.9676957,  -0.30258918, 0.,          0.,          0.,          0.,          0.,
-            -0.7010546,  -0.3106169,  -0.04788882, -0.21822351, -0.33518708, -0.9073148,  0.16276085,  0.9518349,
-            -0.8635942,  -0.92539954, 0.,          0.,          0.,          0.,          0.,          0.,
-            0.,          0.,          0.,          0.,          0.9948462,   -0.6242633,  -0.19065344, -0.36072153,
-            -0.99407107, 0.,          0.,          0.,          0.,          0.,          0.,          0.,
-            0.,          0.,          0.,          -0.9957684,  -0.7924,     -0.40261805, -0.34061068, -0.55580306});
+            0.02254748f,  0.15776646f,  -0.8229023f,  0.19205809f,  0.76984656f,  -0.00603169f, -0.0286147f,
+            0.04512155f,  -0.0011912f,  -0.02572936f, -0.13703543f, -0.49651444f, -0.78868157f, 0.3566854f,
+            0.8758509f,   -0.99602485f, -0.8151508f,  -0.5803147f,  0.4985683f,   0.30210292f,  0.11550081f,
+            -0.30236644f, 0.99622667f,  -0.8732492f,  -0.43772405f, -0.9284624f,  -0.5595875f,  0.9986867f,
+            -0.18373811f, 0.8451735f,   -0.43823165f, -0.1904698f,  0.8320786f,   0.9830735f,   0.61861455f,
+            0.19109797f,  0.64407f,     0.00962067f,  -0.32752877f, -0.5050589f,  -0.23455954f, 0.9517933f,
+            0.9050665f,   0.91091585f,  -0.77941567f, -0.71390504f, -0.24422187f, -0.38115412f, 0.3462553f,
+            0.44084883f,  -0.81455964f, -0.23556596f, 0.85043025f,  -0.7840209f,  -0.82087713f, -0.8349008f,
+            -0.7880142f,  0.99017143f,  -0.9816452f,  -0.93827677f, 0.,           0.,           0.,
+            0.,           0.,           0.28117967f,  0.20685148f,  0.01166701f,  -0.5441828f,  -0.5463747f,
+            -0.85301256f, 0.52109087f,  -0.8317892f,  -0.9676957f,  -0.30258918f, 0.,           0.,
+            0.,           0.,           0.,           -0.7010546f,  -0.3106169f,  -0.04788882f, -0.21822351f,
+            -0.33518708f, -0.9073148f,  0.16276085f,  0.9518349f,   -0.8635942f,  -0.92539954f, 0.,
+            0.,           0.,           0.,           0.,           0.,           0.,           0.,
+            0.,           0.,           0.9948462f,   -0.6242633f,  -0.19065344f, -0.36072153f, -0.99407107f,
+            0.,           0.,           0.,           0.,           0.,           0.,           0.,
+            0.,           0.,           0.,           -0.9957684f,  -0.7924f,     -0.40261805f, -0.34061068f,
+            -0.55580306f});
     // Y_h
     test_case.add_expected_output<float>(
         Shape{2, 3, 5},
-        std::vector<float>{-0.43823165, -0.1904698,  0.8320786,   0.9830735,   0.61861455,  0.28117967,
-                           0.20685148,  0.01166701,  -0.5441828,  -0.5463747,  0.9948462,   -0.6242633,
-                           -0.19065344, -0.36072153, -0.99407107, -0.99602485, -0.8151508,  -0.5803147,
-                           0.4985683,   0.30210292,  0.11550081,  -0.30236644, 0.99622667,  -0.8732492,
-                           -0.43772405, -0.9284624,  -0.5595875,  0.9986867,   -0.18373811, 0.8451735});
+        std::vector<float>{-0.43823165f, -0.1904698f,  0.8320786f,   0.9830735f,   0.61861455f,  0.28117967f,
+                           0.20685148f,  0.01166701f,  -0.5441828f,  -0.5463747f,  0.9948462f,   -0.6242633f,
+                           -0.19065344f, -0.36072153f, -0.99407107f, -0.99602485f, -0.8151508f,  -0.5803147f,
+                           0.4985683f,   0.30210292f,  0.11550081f,  -0.30236644f, 0.99622667f,  -0.8732492f,
+                           -0.43772405f, -0.9284624f,  -0.5595875f,  0.9986867f,   -0.18373811f, 0.8451735f});
 
     // loosest match @ mantissa bit:
     // 16 or next bit (0.01166688557714223862 vs 0.01166701037436723709)
diff --git a/src/frontends/onnx/tests/onnx_transformations.cpp b/src/frontends/onnx/tests/onnx_transformations.cpp
index 76420f41e314c3..ca4535c921ff76 100644
--- a/src/frontends/onnx/tests/onnx_transformations.cpp
+++ b/src/frontends/onnx/tests/onnx_transformations.cpp
@@ -42,7 +42,7 @@ bool after_func_expand_name_comp(std::string lhs, std::string rhs) {
             if (is_hex_symbol(name[i])) {
                 ++founded_hex;
                 if (cut_begin == -1) {
-                    cut_begin = i;
+                    cut_begin = static_cast<int>(i);
                 }
                 if (founded_hex >= min_address) {
                     cut_length = founded_hex;

From 9d0749a5b7f16121bc80059e760e342572abc2ca Mon Sep 17 00:00:00 2001
From: Sofya Balandina <sofya.balandina@intel.com>
Date: Thu, 23 Mar 2023 10:59:31 +0000
Subject: [PATCH 056/296] [conformanceTests] Add key for manage pipeline after
 crashes (#16123)

* [conformanceTests] Add key for manage pipeline after crashes

* Move crash_handler to funcTestsUtils
---
 .../plugin/conformance/test_runner/README.md      |  1 +
 .../conformance_infra/include/gflag_config.hpp    |  5 +++++
 .../test_runner/conformance_infra/src/main.cpp    |  3 ++-
 .../src/read_ir_test/read_ir.cpp                  |  2 +-
 .../include/base/ov_behavior_test_utils.hpp       |  2 +-
 .../shared/include/behavior/plugin/life_time.hpp  |  2 +-
 .../op_impl_check/op_impl_check_compile_model.hpp |  2 +-
 .../op_impl_check/op_impl_check_query_model.hpp   |  2 +-
 .../op_impl_check/op_impl_check.cpp               |  2 +-
 .../shared_test_classes/base/layer_test_utils.hpp |  2 +-
 .../shared_test_classes/src/base/ov_subgraph.cpp  |  2 +-
 .../functional_test_utils}/crash_handler.hpp      |  4 +++-
 .../src}/crash_handler.cpp                        | 15 ++++++++++++++-
 13 files changed, 33 insertions(+), 11 deletions(-)
 rename src/tests/ie_test_utils/{common_test_utils => functional_test_utils/include/functional_test_utils}/crash_handler.hpp (78%)
 rename src/tests/ie_test_utils/{common_test_utils => functional_test_utils/src}/crash_handler.cpp (83%)

diff --git a/src/tests/functional/plugin/conformance/test_runner/README.md b/src/tests/functional/plugin/conformance/test_runner/README.md
index 4c67da79667285..628567d23aab41 100644
--- a/src/tests/functional/plugin/conformance/test_runner/README.md
+++ b/src/tests/functional/plugin/conformance/test_runner/README.md
@@ -129,6 +129,7 @@ The target is able to take the following command-line arguments:
 * `--shape_mode` is optional. It allows you to run `static`, `dynamic` , or both scenarios. The default value is an empty string, which allows running both scenarios. Possible values
   are `static`, `dynamic`, ``
 * `--test_timeout` specifies setup timeout for each test in seconds. The default timeout is 900 seconds (15 minutes).
+* `--ignore_crash` Optional. Allow to not terminate the whole run after crash and continue execution from the next test. This is organized with custom crash handler. Please, note, that handler work for test body,  if crash happened on SetUp/TearDown stage, the process will be terminated.
 * All `gtest` command-line parameters
 
 > **NOTE**:
diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp
index 5ce146ba16b698..04c5dd2e28b2b2 100644
--- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp
+++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp
@@ -46,6 +46,9 @@ static const char extract_body_message[] = "Optional. Allows to count extracted
 static const char shape_mode_message[] = "Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both"
                                          " scenarios. Possible values are `static`, `dynamic`, ``";
 static const char test_timeout_message[] = "Optional. Setup timeout for each test in seconds, default timeout 900seconds (15 minutes).";
+static const char ignore_crash_message[] = "Optional. Allow to not terminate the whole run after crash and continue execution from the next test."
+                                           "This is organized with custom crash handler. Please, note, that handler work for test body,"
+                                           "if crash happened on SetUp/TearDown stage, the process will be terminated.";
 static const char reference_cache_dir_message[] = "Optional. Set the directory with reference cache";
 
 
@@ -63,6 +66,7 @@ DEFINE_bool(report_unique_name, false, report_unique_name_message);
 DEFINE_bool(extract_body, false, extract_body_message);
 DEFINE_string(shape_mode, "", shape_mode_message);
 DEFINE_uint32(test_timeout, UINT_MAX, test_timeout_message);
+DEFINE_uint32(ignore_crash, false, ignore_crash_message);
 DEFINE_string(ref_dir, "", reference_cache_dir_message);
 
 /**
@@ -87,6 +91,7 @@ static void showUsage() {
     std::cout << "    --plugin_lib_name                " << output_folder_message << std::endl;
     std::cout << "    --shape_mode  \"<value>\"          " << shape_mode_message << std::endl;
     std::cout << "    --test_timeout  \"<value>\"        " << test_timeout_message << std::endl;
+    std::cout << "    --ignore_crash                     " << ignore_crash_message << std::endl;
     std::cout << "    --ref_dir  \"<paths>\"             " << reference_cache_dir_message << std::endl;
 }
 
diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp
index 8c3567cafa1a82..82ca2ec898afbf 100644
--- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp
+++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp
@@ -17,7 +17,7 @@
 #include "gflag_config.hpp"
 #include "conformance.hpp"
 
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 
 using namespace ov::test::conformance;
 
@@ -63,6 +63,7 @@ int main(int argc, char* argv[]) {
     }
 
     CommonTestUtils::CrashHandler::SetUpTimeout(FLAGS_test_timeout);
+    CommonTestUtils::CrashHandler::SetUpPipelineAfterCrash(FLAGS_ignore_crash);
 
     // ---------------------------Initialization of Gtest env -----------------------------------------------
     ov::test::conformance::targetDevice = FLAGS_device.c_str();
diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp
index 397dcb9e6762f8..246051b7f7e543 100644
--- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp
+++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp
@@ -13,7 +13,7 @@
 #include "common_test_utils/file_utils.hpp"
 #include "common_test_utils/data_utils.hpp"
 #include "common_test_utils/common_utils.hpp"
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "functional_test_utils/summary/op_info.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 
diff --git a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
index 73fc178a7209e8..010ec941e5db90 100644
--- a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
+++ b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
@@ -18,7 +18,7 @@
 #include "common_test_utils/test_common.hpp"
 #include "common_test_utils/test_constants.hpp"
 #include "common_test_utils/common_utils.hpp"
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "common_test_utils/file_utils.hpp"
 
 #include "functional_test_utils/plugin_cache.hpp"
diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp
index 6ef031d096a15a..010ed38672c643 100644
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp
@@ -15,7 +15,7 @@
 #include <common_test_utils/test_constants.hpp>
 #include <cpp/ie_cnn_network.h>
 #include "gtest/gtest.h"
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 #include "functional_test_utils/precision_utils.hpp"
 #include "base/behavior_test_utils.hpp"
diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp
index 9450bc62dc1065..76c839ecbab1ba 100644
--- a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp
+++ b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "single_layer_tests/op_impl_check/op_impl_check.hpp"
 
 namespace ov {
diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp
index 5aa0df75dfaf48..a1c18f05f33521 100644
--- a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp
+++ b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "single_layer_tests/op_impl_check/op_impl_check.hpp"
 
 namespace ov {
diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp
index 99fd2c24a55cac..2a6c23f2c004cb 100644
--- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp
+++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp
@@ -7,7 +7,7 @@
 #endif
 
 #include "single_layer_tests/op_impl_check/op_impl_check.hpp"
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 
 namespace ov {
 namespace test {
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
index c2d415b04b893f..a4dda85d675d25 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
@@ -21,8 +21,8 @@
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "common_test_utils/common_utils.hpp"
 #include "common_test_utils/test_common.hpp"
-#include "common_test_utils/crash_handler.hpp"
 
+#include "functional_test_utils/crash_handler.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "functional_test_utils/blob_utils.hpp"
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
index ed59d6e0d743b3..ee6c57ca694222 100644
--- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -23,7 +23,7 @@
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 
 #include "common_test_utils/file_utils.hpp"
-#include "common_test_utils/crash_handler.hpp"
+#include "functional_test_utils/crash_handler.hpp"
 #include "common_test_utils/ov_tensor_utils.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 
diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp b/src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp
similarity index 78%
rename from src/tests/ie_test_utils/common_test_utils/crash_handler.hpp
rename to src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp
index f06ede67c61993..75b2c47cff1d2c 100644
--- a/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp
+++ b/src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp
@@ -6,7 +6,7 @@
 
 #include <gtest/gtest.h>
 
-#include "common_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
 
 #include <signal.h>
 #include <setjmp.h>
@@ -20,10 +20,12 @@ enum JMP_STATUS { ok = 0, anyError = 1, alarmErr = 2 };
 class CrashHandler {
 private:
     static unsigned int MAX_TEST_WORK_TIME;
+    static bool IGNORE_CRASH;
 public:
     CrashHandler();
     ~CrashHandler();
     static void SetUpTimeout(unsigned int timeout);
+    static void SetUpPipelineAfterCrash(bool ignore_crash);
     void StartTimer();
 };
 
diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp b/src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp
similarity index 83%
rename from src/tests/ie_test_utils/common_test_utils/crash_handler.cpp
rename to src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp
index 4372f75a4dc5b5..3134df4317578d 100644
--- a/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp
+++ b/src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp
@@ -2,7 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "crash_handler.hpp"
+#include "functional_test_utils/summary/op_summary.hpp"
+
+#include "functional_test_utils/crash_handler.hpp"
 #include <limits.h>
 
 namespace CommonTestUtils {
@@ -10,6 +12,7 @@ namespace CommonTestUtils {
 // enviroment to restore in case of crash
 jmp_buf env;
 unsigned int CrashHandler::MAX_TEST_WORK_TIME = UINT_MAX;
+bool CrashHandler::IGNORE_CRASH = false;
 
 CrashHandler::CrashHandler() {
     // setup default value for timeout in 15 minutes
@@ -31,6 +34,12 @@ CrashHandler::CrashHandler() {
         signal(SIGALRM, SIG_DFL);
 #endif
 
+        if (!CrashHandler::IGNORE_CRASH) {
+            auto &s = ov::test::utils::OpSummary::getInstance();
+            s.saveReport();
+            std::abort();
+        }
+
 #ifdef _WIN32
         longjmp(env, JMP_STATUS::anyError);
 #else
@@ -84,4 +93,8 @@ void CrashHandler::SetUpTimeout(unsigned int timeout) {
     MAX_TEST_WORK_TIME = timeout;
 }
 
+void CrashHandler::SetUpPipelineAfterCrash(bool ignore_crash) {
+    IGNORE_CRASH = ignore_crash;
+}
+
 }  // namespace CommonTestUtils
\ No newline at end of file

From c89da1aee2018dd28085dcbc4018e8a02d56cfc7 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 23 Mar 2023 12:02:01 +0100
Subject: [PATCH 057/296] DOCS shift to rst - Install OpenVINO on macOS,
 Raspbian (#16506)

---
 .../installing-openvino-brew.md               |  81 +++++-------
 .../installing-openvino-from-archive-macos.md | 123 +++++++++---------
 .../installing-openvino-macos-header.md       |  13 +-
 .../installing-openvino-macos.md              |  12 +-
 .../installing-openvino-raspbian.md           |  65 +++------
 5 files changed, 133 insertions(+), 161 deletions(-)

diff --git a/docs/install_guides/installing-openvino-brew.md b/docs/install_guides/installing-openvino-brew.md
index 557e38872e84c2..1bbf98042015a2 100644
--- a/docs/install_guides/installing-openvino-brew.md
+++ b/docs/install_guides/installing-openvino-brew.md
@@ -2,58 +2,52 @@
 
 @sphinxdirective
 
-With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew <https://brew.sh/>`_. OpenVINO™ Development Tools can be installed via PyPI only. See :ref:`Installing Additional Components <intall additional components brew>` for more information.
+With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew <https://brew.sh/>`_. OpenVINO™ Development Tools can be installed via PyPI only. See `Installing Additional Components <#optional-installing-additional-components>`__ for more information.
 
-See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`_ for more information on updates in the latest release.
+See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`__ for more information on updates in the latest release.
 
 Installing OpenVINO Runtime from Homebrew is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions on how to install OpenVINO Runtime for Python using PyPI.
 
-.. note:: 
+.. note::
 
    Only CPU is supported for inference if you install OpenVINO via HomeBrew.
 
-.. warning:: 
+.. warning::
 
-   By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`_.
+   By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`__.
 
-@endsphinxdirective
-
-## Prerequisites
-
-### System Requirements
 
-@sphinxdirective
-
-Full requirement listing is available on the `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
+Prerequisites
+####################
 
-@endsphinxdirective
+System Requirements
+++++++++++++++++++++
 
-### Software Requirements
+Full requirement listing is available on the `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`__
 
-@sphinxdirective
+Software Requirements
++++++++++++++++++++++
 
 .. tab:: macOS
 
   * `Homebrew <https://brew.sh/>`_
-  * `CMake 3.13 or higher <https://cmake.org/download/>`_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default installation). 
-  * `Python 3.7 - 3.10 <https://www.python.org/downloads/mac-osx/>`_ (choose 3.7 - 3.10). Install and add it to path.
-  * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory to install it.
+  * `CMake 3.13 or higher <https://cmake.org/download/>`__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). 
+  * `Python 3.7 - 3.10 <https://www.python.org/downloads/mac-osx/>`__ (choose 3.7 - 3.10). Install and add it to path.
+  * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it.
   * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development)
 
 .. tab:: Linux
 
   * `Homebrew <https://brew.sh/>`_
-  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`_
+  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`__
   * GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04)
-  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
-
-@endsphinxdirective
+  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`__
 
-## Installing OpenVINO Runtime
 
-@sphinxdirective
+Installing OpenVINO Runtime
+###########################
 
-1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website <https://brew.sh/>`_ to install and configure it.
+1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website <https://brew.sh/>`__ to install and configure it.
 
 2. Open a command prompt terminal window, and run the following command to install OpenVINO Runtime:
 
@@ -61,15 +55,11 @@ Full requirement listing is available on the `System Requirements Page <https://
 
       brew install openvino
 
-Congratulations, you've finished the installation!
-
-.. _intall additional components brew:
 
-@endsphinxdirective
-
-## (Optional) Installing Additional Components
+Congratulations, you've finished the installation!
 
-@sphinxdirective
+(Optional) Installing Additional Components
+###########################################
 
 OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you installed OpenVINO Runtime using Homebrew, OpenVINO Development Tools must be installed separately.
 
@@ -77,20 +67,20 @@ See **For C++ Developers** section on the :doc:`Install OpenVINO Development Too
 
 OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`_.
 
-@endsphinxdirective
-
-## Uninstalling OpenVINO
+Uninstalling OpenVINO
+#####################
 
 To uninstall OpenVINO via HomeBrew, use the following command:
-```sh
-brew uninstall openvino
-```
 
-## What's Next?
+.. code-block:: sh
 
-@sphinxdirective
+   brew uninstall openvino
+
+
+What's Next?
+####################
 
-Now that you've installed OpenVINO Runtime, you can try the following things: 
+Now that you've installed OpenVINO Runtime, you can try the following things:
 
 * Learn more about :doc:`OpenVINO Workflow <openvino_workflow>`.
 * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <openvino_docs_model_processing_introduction>`.
@@ -99,8 +89,9 @@ Now that you've installed OpenVINO Runtime, you can try the following things:
 * See sample applications in :doc:`OpenVINO toolkit Samples Overview <openvino_docs_OV_UG_Samples_Overview>`.
 * Take a glance at the OpenVINO product home page: https://software.intel.com/en-us/openvino-toolkit.
 
-@endsphinxdirective
+Additional Resources
+####################
 
-## Additional Resources
+* `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
+@endsphinxdirective
diff --git a/docs/install_guides/installing-openvino-from-archive-macos.md b/docs/install_guides/installing-openvino-from-archive-macos.md
index 23d20d6e666130..2e3793bce92e3f 100644
--- a/docs/install_guides/installing-openvino-from-archive-macos.md
+++ b/docs/install_guides/installing-openvino-from-archive-macos.md
@@ -1,53 +1,59 @@
 # Install OpenVINO™ Runtime on macOS from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_macos}
 
+@sphinxdirective
+
 With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples.
 
-Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI.
+Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions on how to install OpenVINO Runtime for Python using PyPI.
 
-See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release.
+See the `Release Notes <https://software.intel.com/en-us/articles/OpenVINO-RelNotes>`__ for more information on updates in the latest release.
 
-> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only.
+.. note::
+
+   Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org <https://pypi.org/project/openvino-dev/>`__ only.
 
-@sphinxdirective
 
 .. tab:: System Requirements
 
    | Full requirement listing is available in:
-   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
+   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`__
 
 .. tab:: Software Requirements
 
-  * `CMake 3.13 or higher <https://cmake.org/download/>`_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default install). 
-  * `Python 3.7 - 3.10 <https://www.python.org/downloads/mac-osx/>`_ (choose 3.7 - 3.10). Install and add to path.
-  * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory
+  * `CMake 3.13 or higher <https://cmake.org/download/>`__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install).
+  * `Python 3.7 - 3.10 <https://www.python.org/downloads/mac-osx/>`__ (choose 3.7 - 3.10). Install and add to path.
+  * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory
   * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development)
 
-@endsphinxdirective
 
-## Installing OpenVINO Runtime
+Installing OpenVINO Runtime
+###########################
 
-### <a name="install-core"></a>Step 1: Install OpenVINO Core Components
+Step 1: Install OpenVINO Core Components
+++++++++++++++++++++++++++++++++++++++++
 
-@sphinxdirective
 
-1. Open a command prompt terminal window. 
-2. Create the `/opt/intel` folder for OpenVINO by using the following command. If the folder already exists, skip this command.
+1. Open a command prompt terminal window.
+2. Create the ``/opt/intel`` folder for OpenVINO by using the following command. If the folder already exists, skip this command.
 
    .. code-block:: sh
 
       sudo mkdir /opt/intel
-   
-   .. note:: 
-   
-      The `/opt/intel` path is the recommended folder path for installing OpenVINO. You may use a different path if desired.
 
-3. Browse to the current user's `Downloads` folder:
+
+   .. note::
+
+      The ``/opt/intel`` path is the recommended folder path for installing OpenVINO. You may use a different path if desired.
+
+
+3. Browse to the current user's ``Downloads`` folder:
 
    .. code-block:: sh
 
       cd <user_home>/Downloads
-    
-4. Download the `OpenVINO Runtime archive file for macOS <https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/macos/>`_, extract the files, rename the extracted folder and move it to the desired path:
+
+
+4. Download the `OpenVINO Runtime archive file for macOS <https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/macos/>`__, extract the files, rename the extracted folder and move it to the desired path:
 
    .. tab:: x86, 64-bit
 
@@ -65,55 +71,62 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
          tar -xf openvino_2022.3.0.tgz
          sudo mv m_openvino_toolkit_macos_11_0_2022.3.0.9052.9752fafe8eb_arm64 /opt/intel/openvino_2022.3.0
 
+
 5. For simplicity, it is useful to create a symbolic link as below:
 
    .. code-block:: sh
 
       sudo ln -s openvino_2022.3.0 openvino_2022
-   
-   .. note:: 
-   
-      If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. Unlink the previous link with `sudo unlink openvino_2022`, and then re-run the command above.
 
-@endsphinxdirective
+   .. note::
+
+      If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2022``, and then re-run the command above.
+
 
-Congratulations, you finished the installation! The `/opt/intel/openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `<INSTALL_DIR>` throughout the OpenVINO documentation.
+Congratulations, you finished the installation! The ``/opt/intel/openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ``<INSTALL_DIR>`` throughout the OpenVINO documentation.
 
-### <a name="set-the-environment-variables-macos"></a>Step 2: Configure the Environment
+Step 2: Configure the Environment
++++++++++++++++++++++++++++++++++
 
-You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the `setupvars.sh` script as shown below to temporarily set your environment variables. If your <INSTALL_DIR> is not `/opt/intel/openvino_2022`, use the correct one instead.
+You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your ``<INSTALL_DIR>`` is not ``/opt/intel/openvino_2022``, use the correct one instead.
 
-```sh
-source /opt/intel/openvino_2022/setupvars.sh
-```  
+.. code-block:: sh
 
-If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the `setupvars.sh` of your choice.
+   source /opt/intel/openvino_2022/setupvars.sh
 
-> **NOTE**: The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open `~/.zshrc` in your favorite editor and add `source /opt/intel/openvino_2022/setupvars.sh` after the last line. Next time when you open a terminal, you will see `[setupvars.sh] OpenVINO™ environment initialized`. Changing `~/.zshrc` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them.
+
+If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the ``setupvars.sh`` of your choice.
+
+.. note::
+
+   The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open ``~/.zshrc`` in your favorite editor and add ``source /opt/intel/openvino_2022/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``~/.zshrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them.
 
 The environment variables are set. Continue to the next section if you want to download any additional components.
 
-### <a name="model-optimizer-macos"></a>Step 3 (Optional): Install Additional Components
+Step 3 (Optional): Install Additional Components
+++++++++++++++++++++++++++++++++++++++++++++++++
 
 OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately.
 
-See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions.
+See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for step-by-step installation instructions.
+
+OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`__.
 
-OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
+What's Next?
+####################
 
-## <a name="get-started-macos"></a>What's Next?
 Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
 
-@sphinxdirective
+
 .. tab:: Get started with Python
 
    Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
-   
+
    .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif
       :width: 400
 
    Visit the :ref:`Tutorials <notebook tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
-   
+
    * `OpenVINO Python API Tutorial <https://docs.openvino.ai/nightly/notebooks/002-openvino-api-with-output.html>`_
    * `Basic image classification program with Hello Image Classification <https://docs.openvino.ai/nightly/notebooks/001-hello-world-with-output.html>`_
    * `Convert a PyTorch model and use it for image background removal <https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html>`_
@@ -121,44 +134,38 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine
 .. tab:: Get started with C++
 
    Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
-   
+
    .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
       :width: 400
 
    Visit the :ref:`Samples <code samples>` page for other C++ example applications to get you started with OpenVINO, such as:
-   
+
    * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
    * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
 
-@endsphinxdirective
-
-## <a name="uninstall-from-macos"></a>Uninstalling Intel® Distribution of OpenVINO™ Toolkit
+Uninstalling Intel® Distribution of OpenVINO™ Toolkit
+#####################################################
 
-To uninstall the toolkit, follow the steps on the [Uninstalling page](uninstalling-openvino.md).
+To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page <openvino_docs_install_guides_uninstalling_openvino>`.
 
-## Additional Resources
-
-@sphinxdirective
+Additional Resources
+####################
 
+* `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration <troubleshooting guide for install>`
 * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide <deep learning model optimizer>`
 * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide <deep learning openvino runtime>`
 * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview <code samples>`
 * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models <model zoo>`
-* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ 
+* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit <https://github.com/intel-iot-devkit>`__
 
 <!---
-   To learn more about converting models from specific frameworks, go to:  
+   To learn more about converting models from specific frameworks, go to:
    * :ref:`Convert Your Caffe Model <convert model caffe>`
    * :ref:`Convert Your TensorFlow Model <convert model tf>`
    * :ref:`Convert Your Apache MXNet Model <convert model mxnet>`
    * :ref:`Convert Your Kaldi Model <convert model kaldi>`
    * :ref:`Convert Your ONNX Model <convert model onnx>`
---->   
-.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit
+--->
 
 @endsphinxdirective
-
-## Additional Resources
-
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-macos-header.md b/docs/install_guides/installing-openvino-macos-header.md
index 7054d0c6e0dbc3..69b1e93df437c3 100644
--- a/docs/install_guides/installing-openvino-macos-header.md
+++ b/docs/install_guides/installing-openvino-macos-header.md
@@ -10,12 +10,13 @@
    From PyPI <openvino_docs_install_guides_installing_openvino_pip>
    Using HomeBrew <openvino_docs_install_guides_installing_openvino_brew>
 
-@endsphinxdirective
+If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you:
 
-If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you: 
+* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_macos>`
+* :doc:`Install OpenVINO Runtime via HomeBrew <openvino_docs_install_guides_installing_openvino_brew>`
+* :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>`
 
-* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md)
-* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md)
-* [Install OpenVINO from PyPI](installing-openvino-pip.md)
+For a full selection of distribution channels, 
+see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 
-For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/installing-openvino-macos.md
index 9a98f9335e1ded..21c5053e082821 100644
--- a/docs/install_guides/installing-openvino-macos.md
+++ b/docs/install_guides/installing-openvino-macos.md
@@ -1,12 +1,16 @@
 # Install OpenVINO™ Runtime for macOS from Installer
 
+@sphinxdirective
+
 Currently only the following ways are provided to install OpenVINO™:
 
-* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md)
-* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md)
-* [Install OpenVINO from PyPI](installing-openvino-pip.md)
-* [Build From Source](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md)
+* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_macos>`
+* :doc:`Install OpenVINO Runtime via HomeBrew <openvino_docs_install_guides_installing_openvino_brew>`
+* :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>`
+* `Build From Source <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__
 
 The other installation methods are temporarily unavailable.
 
 For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-raspbian.md b/docs/install_guides/installing-openvino-raspbian.md
index 8e87595aca6022..d47cea800092e6 100644
--- a/docs/install_guides/installing-openvino-raspbian.md
+++ b/docs/install_guides/installing-openvino-raspbian.md
@@ -8,12 +8,8 @@
    * These steps have been validated with Raspberry Pi 3.
    * There is also an open-source version of OpenVINO™ that can be compiled for arch64 (see `build instructions <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_raspbian.md>`_).
 
-@endsphinxdirective
-
-
-## Development and Target Systems
-
-@sphinxdirective
+Development and Target Systems
+###############################
 
 .. tab:: System Requirements
 
@@ -28,12 +24,8 @@
 
 .. _install-openvino:
 
-@endsphinxdirective
-
-
-## Step 1: Download and Install OpenVINO Runtime
-
-@sphinxdirective
+Step 1: Download and Install OpenVINO Runtime
+#############################################
 
 #. Open the Terminal or your preferred console application.
 #. Create an installation folder for OpenVINO. If the folder already exists, skip this step.
@@ -99,12 +91,8 @@ Congratulations, you finished the installation! The ``/opt/intel/openvino_2022``
 
 .. _install-external-dependencies:
 
-@endsphinxdirective
-
-
-## Step 2: Install External Software Dependencies
-
-@sphinxdirective
+Step 2: Install External Software Dependencies
+##############################################
 
 CMake version 3.10 or higher is required for building the OpenVINO™ toolkit sample application. To install, open a Terminal window and run the following command:
 
@@ -117,12 +105,8 @@ CMake is installed. Continue to the next section to set the environment variable
 
 .. _set-the-environment-variables-raspbian:
 
-@endsphinxdirective
-
-
-## Step 3: Set the Environment Variables
-
-@sphinxdirective
+Step 3: Set the Environment Variables
+#####################################
 
 You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your <INSTALL_DIR> is not ``/opt/intel/openvino_2022``, use the correct one instead.
 
@@ -141,12 +125,8 @@ The environment variables are set. Continue to the next section if you want to d
 
 .. _model-optimizer:
 
-@endsphinxdirective
-
-
-## Step 4 (Optional): Install Additional Components
-
-@sphinxdirective
+Step 4 (Optional): Install Additional Components
+################################################
 
 If you want to use your model for inference, the model must be converted to the ``.bin`` and ``.xml`` Intermediate Representation (IR) files that are used as input by OpenVINO Runtime. To get the optimized models, you can use one of the following options:
 
@@ -158,16 +138,11 @@ If you want to use your model for inference, the model must be converted to the
 
   * OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for step-by-step installation instructions.
 
-@endsphinxdirective
-
-
-## What's Next?
-
-@sphinxdirective
+What's Next?
+####################
 
 Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
 
-
 .. tab:: Get started with Python
 
    Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
@@ -181,6 +156,7 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine
    * `Basic image classification program with Hello Image Classification <https://docs.openvino.ai/nightly/notebooks/001-hello-world-with-output.html>`_
    * `Convert a PyTorch model and use it for image background removal <https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html>`_
 
+
 .. tab:: Get started with C++
 
    Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
@@ -193,24 +169,17 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine
    * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
    * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
 
-
 To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page <openvino_docs_install_guides_uninstalling_openvino>`.
 
-@endsphinxdirective
-
-
-## Additional Resources
-
-@sphinxdirective
+Additional Resources
+####################
 
 * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration <troubleshooting guide for install>`
 * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide <deep learning model optimizer>`
 * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide <deep learning openvino runtime>`
 * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview <code samples>`
 * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models <model zoo>`
-* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ 
+* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit <https://github.com/intel-iot-devkit>`__
 * :ref:`OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`
 
-.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit
-
-@endsphinxdirective
\ No newline at end of file
+@endsphinxdirective

From 448654ea650c683eab7611fd416b36ef0558da16 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Thu, 23 Mar 2023 15:08:18 +0400
Subject: [PATCH 058/296] [CONFORMANCE] Fix report gewneration in case of mixed
 reports: rel and abs (#16505)

---
 .../functional_test_utils/layer_tests_summary/summarize.py     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
index 6abb27405217d8..8814de1d34285e 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
@@ -101,6 +101,9 @@ def merge_xmls(xml_paths: list):
                                     continue
                                 xml_value = None
                                 if "relative_" in attr_name:
+                                    value = op_result.attrib.get(attr_name)
+                                    if value is None:
+                                        continue
                                     xml_value = float(op_result.attrib.get(attr_name))
                                 else:
                                     xml_value = int(op_result.attrib.get(attr_name))

From 3b8d9c568c731e909e735a83cb0ec3a83611ee9d Mon Sep 17 00:00:00 2001
From: Nadezhda Ageeva <nadezhda.ageeva@intel.com>
Date: Thu, 23 Mar 2023 16:09:13 +0400
Subject: [PATCH 059/296] Allow skip LoadNetworkToDefaultDeviceNoThrow tests
 (#16507)

---
 .../plugin/shared/include/behavior/plugin/core_integration.hpp   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
index a519fc11e3f0ce..5a2bce5b9b6097 100644
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
@@ -905,6 +905,7 @@ TEST_P(IEClassQueryNetworkTest, QueryNetworkHETEROWithBigDeviceIDThrows) {
 //
 
 TEST(IEClassBasicTest, smoke_LoadNetworkToDefaultDeviceNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     InferenceEngine::CNNNetwork actualCnnNetwork;
     std::shared_ptr<ngraph::Function> actualNetwork = ngraph::builder::subgraph::makeSplitConvConcat();
     ASSERT_NO_THROW(actualCnnNetwork = InferenceEngine::CNNNetwork(actualNetwork));

From 8a246a8bf20e18c100500de572c191a7e2fa6277 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Thu, 23 Mar 2023 13:25:39 +0100
Subject: [PATCH 060/296] [CPU] Use Dnnl executor to avoid extra dnnl primitve
 desc query (#16372)

---
 .../src/memory_desc/dnnl_memory_desc.cpp      |  9 +++-
 .../src/memory_desc/dnnl_memory_desc.h        |  1 +
 src/plugins/intel_cpu/src/node.cpp            |  6 ---
 src/plugins/intel_cpu/src/node.h              | 10 ++--
 .../src/nodes/common/dnnl_executor.cpp        | 39 +++++++--------
 .../src/nodes/common/dnnl_executor.h          | 40 +++++++++++++--
 src/plugins/intel_cpu/src/nodes/concat.h      |  1 +
 src/plugins/intel_cpu/src/nodes/conv.cpp      | 19 +++----
 src/plugins/intel_cpu/src/nodes/deconv.cpp    | 23 ++++-----
 .../intel_cpu/src/nodes/fullyconnected.cpp    | 49 +++++++------------
 .../intel_cpu/src/nodes/fullyconnected.h      | 10 ----
 src/plugins/intel_cpu/src/nodes/input.h       |  1 +
 src/plugins/intel_cpu/src/nodes/interaction.h |  1 +
 src/plugins/intel_cpu/src/nodes/lrn.cpp       | 27 ++++++----
 src/plugins/intel_cpu/src/nodes/lrn.h         |  4 ++
 src/plugins/intel_cpu/src/nodes/matmul.cpp    | 22 ++++++---
 src/plugins/intel_cpu/src/nodes/matmul.h      |  4 ++
 src/plugins/intel_cpu/src/nodes/pooling.cpp   | 27 ++++++----
 src/plugins/intel_cpu/src/nodes/pooling.h     |  5 ++
 src/plugins/intel_cpu/src/nodes/reorder.cpp   |  6 ++-
 src/plugins/intel_cpu/src/nodes/reorder.h     |  1 +
 src/plugins/intel_cpu/src/nodes/rnn.cpp       | 19 ++++---
 src/plugins/intel_cpu/src/nodes/rnn.h         |  5 ++
 src/plugins/intel_cpu/src/nodes/softmax.cpp   | 26 ++++++----
 src/plugins/intel_cpu/src/nodes/softmax.h     |  5 ++
 src/plugins/intel_cpu/src/nodes/transpose.h   |  1 +
 26 files changed, 205 insertions(+), 156 deletions(-)

diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp
index 1f2a17189a31cc..0458f93836779d 100644
--- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp
+++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp
@@ -36,13 +36,18 @@ MemoryDescPtr DnnlMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Preci
 }
 
 bool DnnlMemoryDesc::isCompatible(const MemoryDesc &rhs) const {
-    if (MemoryDescType::Dnnl == rhs.getType()) {
-        return this->desc == rhs.as<DnnlMemoryDesc>()->desc;
+    if (MemoryDescType::Dnnl & rhs.getType()) {
+        auto* dnnMemDesc = rhs.as<DnnlMemoryDesc>();
+        return isCompatible(*dnnMemDesc);
     } else {
         return false;
     }
 }
 
+bool DnnlMemoryDesc::isCompatible(const DnnlMemoryDesc& rhs) const {
+    return this->desc == rhs.desc;
+}
+
 std::string DnnlMemoryDesc::serializeFormat() const {
     dnnl::impl::memory_desc_wrapper wrapped(desc.get());
     if (wrapped.is_wino_desc()) {
diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h
index c6a88794485c40..373e66679f8824 100644
--- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h
+++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h
@@ -26,6 +26,7 @@ class DnnlMemoryDesc : public virtual MemoryDesc {
     MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override;
 
     bool isCompatible(const MemoryDesc& rhs) const override;
+    bool isCompatible(const DnnlMemoryDesc& rhs) const;
 
     bool hasLayoutType(LayoutType layoutType) const override { return false; }
 
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 03529f39d1c003..64752ea8692fdd 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -550,12 +550,6 @@ std::vector<memory::format_tag> Node::getAvailableFormatsForDims(const Shape &di
     return {memory::format_tag::any};
 }
 
-void Node::execute(dnnl::stream strm) {
-    if (prim) {
-        prim.execute(strm, primArgs);
-    }
-}
-
 void Node::updateShapes() {
     IE_ASSERT(isDynamicNode()) << "Node::updateShapes() is called to a static shape node of type: " << getTypeStr() << " with name: " << getName();
     if (needShapeInfer()) {
diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h
index 0d15441972af92..dd78bfd0159b85 100644
--- a/src/plugins/intel_cpu/src/node.h
+++ b/src/plugins/intel_cpu/src/node.h
@@ -334,7 +334,7 @@ class Node {
 
     void resolveInPlaceEdges();
 
-    virtual void execute(dnnl::stream strm);
+    virtual void execute(dnnl::stream strm) = 0;
     void updateShapes();
     void updateDynamicParams();
     void executeDynamic(dnnl::stream strm);
@@ -578,7 +578,6 @@ class Node {
     std::vector<NodeDesc> supportedPrimitiveDescriptors;
     std::unordered_map<int, dnnl::memory> primArgs;
     std::unordered_map<int, MemoryPtr> postOpsArgs;
-    dnnl::primitive prim;
     std::vector<dnnl::primitive_desc> descs;
 
     const GraphContext::CPtr context;
@@ -649,9 +648,10 @@ class Node {
         IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType());
     }
 
-    MemoryPtr getScratchPadMem(const const_dnnl_primitive_desc_t& pd) {
-        auto scratchpadMemoryDesc = DnnlExtensionUtils::query_md(pd, dnnl::query::scratchpad_md);
-        scratchpadMem = context->getScratchPad()->createScratchPadMem(scratchpadMemoryDesc);
+    MemoryPtr getScratchPadMem(const DnnlMemoryDescPtr& desc) {
+        if (!scratchpadMem || !scratchpadMem->getDesc().isCompatible(*desc)) {
+            scratchpadMem = context->getScratchPad()->createScratchPadMem(desc);
+        }
         return scratchpadMem;
     }
 
diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp
index 3f055cc63fe039..7d337457494de9 100644
--- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp
@@ -9,6 +9,14 @@ using namespace dnnl;
 namespace ov {
 namespace intel_cpu {
 
+DnnlExecutor::DnnlExecutor(const dnnl::primitive_desc& pd) {
+    execPrim = dnnl::primitive(pd);
+    src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc());
+    dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc());
+    wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc());
+    scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc());
+}
+
 DnnlExecutor::IntermReorder::IntermReorder(const dnnl::memory::desc& descSrc,
                                            const dnnl::memory::desc& descDst,
                                            const dnnl::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) {
@@ -20,7 +28,15 @@ void DnnlExecutor::IntermReorder::exec(dnnl::memory& memSrc, dnnl::memory& memDs
     m_reorder.execute(strm, memSrc, memDst);
 }
 
-void DnnlExecutor::exec(std::unordered_map<int, dnnl::memory> primArgs, dnnl::stream strm) {
+void DnnlExecutor::exec(const std::unordered_map<int, dnnl::memory>& primArgs, dnnl::stream strm) {
+    if (inputReorders.empty() && outputReorders.empty()) {
+        execPrim.execute(strm, primArgs);
+    } else {
+        reorder_exec(primArgs, strm);
+    }
+}
+
+void DnnlExecutor::reorder_exec(std::unordered_map<int, dnnl::memory> primArgs, dnnl::stream strm) {
     for (auto &inReorder : inputReorders) {
         if (primArgs.count(inReorder.first)) {
             dnnl::memory memDst(inReorder.second.getDstDesc(), strm.get_engine());
@@ -58,27 +74,6 @@ const_dnnl_primitive_desc_t DnnlExecutor::getPrimitiveDesc() const {
     return execPrim.get_primitive_desc();
 }
 
-dnnl::memory::desc DnnlExecutor::getSrcDesc() const {
-    auto pd = getPrimitiveDesc();
-    auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::src_md);
-
-    return md->getDnnlDesc();
-}
-
-dnnl::memory::desc DnnlExecutor::getWeightDesc() const {
-    auto pd = getPrimitiveDesc();
-    auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::weights_md);
-
-    return md->getDnnlDesc();
-}
-
-dnnl::memory::desc DnnlExecutor::getDstDesc() const {
-    auto pd = getPrimitiveDesc();
-    auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::dst_md);
-
-    return md->getDnnlDesc();
-}
-
 impl_desc_type DnnlExecutor::getImplementationType() const {
     auto pd = getPrimitiveDesc();
     return parse_impl_name(DnnlExtensionUtils::query_impl_info_str(pd));
diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h
index f824fd8146ecb6..0f3eff13797eef 100644
--- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h
+++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h
@@ -26,22 +26,52 @@ class DnnlExecutor {
         };
 
     public:
-        void exec(std::unordered_map<int, dnnl::memory> primArgs, dnnl::stream strm);
+        explicit DnnlExecutor(const dnnl::primitive_desc& pd);
+        void exec(const std::unordered_map<int, dnnl::memory>& primArgs, dnnl::stream strm);
         bool needReordering() const;
         virtual ~DnnlExecutor() = default;
         dnnl::primitive getExecPrim() const;
         const_dnnl_primitive_desc_t getPrimitiveDesc() const;
-        dnnl::memory::desc getSrcDesc() const;
-        dnnl::memory::desc getWeightDesc() const;
-        dnnl::memory::desc getDstDesc() const;
         impl_desc_type getImplementationType() const;
 
+        DnnlMemoryDescPtr getSrcDesc() const {
+            return src_md;
+        }
+        DnnlMemoryDescPtr getWeightDesc() const {
+            return wghts_md;
+        }
+        DnnlMemoryDescPtr getDstDesc() const {
+            return dst_md;
+        }
+        DnnlMemoryDescPtr getScratchPadDesc() const {
+            return scrch_md;
+        }
+
+        const dnnl::memory::desc& getDnnlSrcDesc() const {
+            return src_md->getDnnlDesc();
+        }
+        const dnnl::memory::desc& getDnnlWeightDesc() const {
+            return wghts_md->getDnnlDesc();
+        }
+        const dnnl::memory::desc& getDnnlDstDesc() const {
+            return dst_md->getDnnlDesc();
+        }
+        const dnnl::memory::desc& getDnnlScratchPadDesc() const {
+            return scrch_md->getDnnlDesc();
+        }
+
+    protected:
+        void reorder_exec(std::unordered_map<int, dnnl::memory> primArgs, dnnl::stream strm);
+
     protected:
-        DnnlExecutor() = default;
         dnnl::primitive execPrim;
         // key is the port number for the primitive that needs memory reordering
         std::unordered_map<int, IntermReorder> inputReorders;
         std::unordered_map<int, IntermReorder> outputReorders;
+        DnnlMemoryDescPtr src_md;
+        DnnlMemoryDescPtr wghts_md;
+        DnnlMemoryDescPtr dst_md;
+        DnnlMemoryDescPtr scrch_md;
 };
 
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h
index 9a0a8a66274321..32831bcede332a 100644
--- a/src/plugins/intel_cpu/src/nodes/concat.h
+++ b/src/plugins/intel_cpu/src/nodes/concat.h
@@ -52,6 +52,7 @@ class Concat : public Node {
     InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
     bool canExecRef = false;
     static constexpr size_t MAX_RANK_REF = 6;
+    dnnl::primitive prim;
 };
 
 }   // namespace node
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index ab07b6521e71f7..ab2f07c5d9ca10 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -1490,8 +1490,7 @@ void Convolution::prepareParams() {
 
         Node::appendPostOpArgs(*pAttrLocal, primArgs, convPostOpsArgs[preferLegacyPostOps]);
 
-        auto pd = execPtr->getPrimitiveDesc();
-        auto scratchpadMem = getScratchPadMem(pd);
+        auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
         primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
 
 #ifdef CPU_DEBUG_CAPS
@@ -1508,19 +1507,17 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo
                                                                 const dnnl::memory::desc& inMemDesc,
                                                                 const dnnl::memory::desc& weightMemDesc,
                                                                 const dnnl::memory::desc& outMemDesc,
-                                                                const dnnl::engine& engine) {
-    execPrim = dnnl::convolution_forward(pd);
-
-    if (inMemDesc != pd.src_desc()) {
-        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
+                                                                const dnnl::engine& engine) : DnnlExecutor(pd) {
+    if (inMemDesc != getDnnlSrcDesc()) {
+        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)});
     }
 
-    if (weightMemDesc != pd.weights_desc()) {
-        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
+    if (weightMemDesc != getDnnlWeightDesc()) {
+        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)});
     }
 
-    if (outMemDesc != pd.dst_desc()) {
-        outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
+    if (outMemDesc != getDnnlDstDesc()) {
+        outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)});
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp
index db013ced146e6d..2395a4a6af2a8d 100644
--- a/src/plugins/intel_cpu/src/nodes/deconv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp
@@ -991,8 +991,7 @@ void Deconvolution::prepareParams() {
         }
         Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs);
 
-        auto pd = execPtr->getPrimitiveDesc();
-        auto scratchpadMem = getScratchPadMem(pd);
+        auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
         primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
 #ifdef CPU_DEBUG_CAPS
         if (result.second == CacheEntryBase::LookUpStatus::Miss) {
@@ -1094,9 +1093,7 @@ Deconvolution::DeconvExecutorDefault::DeconvExecutorDefault(const dnnl::convolut
                                                                       const dnnl::memory::desc& inMemDesc,
                                                                       const dnnl::memory::desc& weightMemDesc,
                                                                       const dnnl::memory::desc& outMemDesc,
-                                                                      const dnnl::engine& engine) {
-    execPrim = dnnl::convolution_backward_data(pd);
-
+                                                                      const dnnl::engine& engine) : DnnlExecutor(pd) {
     if (inMemDesc != pd.diff_dst_desc()) {
         inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)});
     }
@@ -1114,19 +1111,17 @@ Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const dnnl::deconvolution_
                                                                 const dnnl::memory::desc& inMemDesc,
                                                                 const dnnl::memory::desc& weightMemDesc,
                                                                 const dnnl::memory::desc& outMemDesc,
-                                                                const dnnl::engine& engine) {
-    execPrim = dnnl::deconvolution_forward(pd);
-
-    if (inMemDesc != pd.src_desc()) {
-        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
+                                                                const dnnl::engine& engine) : DnnlExecutor(pd) {
+    if (inMemDesc != getDnnlSrcDesc()) {
+        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)});
     }
 
-    if (weightMemDesc != pd.weights_desc()) {
-        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
+    if (weightMemDesc != getDnnlWeightDesc()) {
+        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)});
     }
 
-    if (outMemDesc != pd.dst_desc()) {
-        outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
+    if (outMemDesc != getDnnlDstDesc()) {
+        outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)});
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 6b4c8e43521426..3d9cb3035cdf55 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -311,7 +311,7 @@ void FullyConnected::prepareParams() {
                  implementationTypeIP,
                  useConv1x1};
 
-    auto engine = getEngine();
+    auto& engine = getEngine();
 
     auto builder = [&engine](const FCKey& key) -> executorPtr {
         executorPtr execPtr = nullptr;
@@ -333,7 +333,7 @@ void FullyConnected::prepareParams() {
             }
 
             if (prim_desc) {
-                execPtr = std::make_shared<ExecutorConv1x1>(prim_desc);
+                execPtr = std::make_shared<DnnlExecutor>(prim_desc);
             }
         }
         // fallback
@@ -388,7 +388,7 @@ void FullyConnected::prepareParams() {
                 }
             }
 
-            execPtr = std::make_shared<ExecutorInnerProduct>(prim_desc);
+            execPtr = std::make_shared<DnnlExecutor>(prim_desc);
         }
         return execPtr;
     };
@@ -404,26 +404,20 @@ void FullyConnected::prepareParams() {
     execPtr = result.first;
 
     if (execPtr) {
-        // no executor yet or shapes changed
-        if (!prevExecPtr || prevExecPtr->getSrcDesc() != execPtr->getSrcDesc()) {
-            auto oldMem = srcMemPtr->GetPrimitive();
-            // fast path: wanted is same with parent node output, typical is static shape with inner product
-            if (execPtr->getSrcDesc() == inDesc->getDnnlDesc()) {
-                primArgs[DNNL_ARG_SRC] = std::move(oldMem);
-            } else {
-                primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getSrcDesc(), oldMem.get_engine(), oldMem.get_data_handle());
-            }
+        if (execPtr->getSrcDesc()->isCompatible(*inDesc)) {
+            primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
+        } else {
+            primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData());
         }
-        if (!prevExecPtr || prevExecPtr->getDstDesc() != execPtr->getDstDesc()) {
-            auto oldMem = dstMemPtr->GetPrimitive();
-            if (execPtr->getDstDesc() == outDesc->getDnnlDesc()) {
-                primArgs[DNNL_ARG_DST] = std::move(oldMem);
-            } else {
-                primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDstDesc(), oldMem.get_engine(), oldMem.get_data_handle());
-            }
+
+        if (execPtr->getDstDesc()->isCompatible(*outDesc)) {
+            primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
+        } else {
+            primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDnnlDstDesc(), engine, dstMemPtr->GetData());
         }
-        if (!prevExecPtr || prevExecPtr->getWeightDesc() != execPtr->getWeightDesc()) {
-            primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(DnnlExtensionUtils::makeDescriptor(execPtr->getWeightDesc()))->GetPrimitive();
+
+        if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) {
+            primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive();
         }
         // changed shapes may also cause the kernel type changed
         selected_pd->setImplementationType(execPtr->getImplementationType());
@@ -438,9 +432,8 @@ void FullyConnected::prepareParams() {
             primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive();
         }
 
-        auto pd = execPtr->getPrimitiveDesc();
-        auto scratchpadMem = getScratchPadMem(pd);
-        primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
+        auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
+        primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive();
 #ifdef CPU_DEBUG_CAPS
         if (result.second == CacheEntryBase::LookUpStatus::Miss) {
             DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n");
@@ -919,14 +912,6 @@ bool FullyConnected::canBeExecutedInConv1x1() const {
     return retVal;
 }
 
-FullyConnected::ExecutorInnerProduct::ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd) {
-    execPrim = dnnl::inner_product_forward(pd);
-}
-
-FullyConnected::ExecutorConv1x1::ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd) {
-    execPrim = dnnl::convolution_forward(pd);
-}
-
 MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) {
     if (!getParentEdgeAt(1)->getParent()->isConstant())
         IE_THROW() << "Weight input is not const for node " << getName() << ".";
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
index 4de5dff882649d..3f0983f2fc2a77 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
@@ -90,16 +90,6 @@ class FullyConnected : public Node {
     std::unordered_map<std::string, MemoryPtr> privateWeightCache;
     dnnl::primitive_attr attr;
 
-    class ExecutorInnerProduct : public DnnlExecutor {
-        public:
-            ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd);
-    };
-
-    class ExecutorConv1x1 : public DnnlExecutor {
-        public:
-            ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd);
-    };
-
     static dnnl::convolution_forward::primitive_desc
     createDescriptorInternalForConv(DnnlMemoryDescCPtr inputDescPtr,
                                     DnnlMemoryDescCPtr weightDescPtr,
diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h
index d3c05b721da6f0..71ae6b91e7660c 100644
--- a/src/plugins/intel_cpu/src/nodes/input.h
+++ b/src/plugins/intel_cpu/src/nodes/input.h
@@ -31,6 +31,7 @@ class Input : public Node {
     void withMeanImage();
     MemoryCPtr getMemoryPtr() const;
 
+    void execute(dnnl::stream strm) override {}
     void executeDynamicImpl(dnnl::stream strm) override {}
     bool isExecutable() const override {
         return false;
diff --git a/src/plugins/intel_cpu/src/nodes/interaction.h b/src/plugins/intel_cpu/src/nodes/interaction.h
index 661cfc22de8b88..122ae3b2addc8c 100644
--- a/src/plugins/intel_cpu/src/nodes/interaction.h
+++ b/src/plugins/intel_cpu/src/nodes/interaction.h
@@ -60,6 +60,7 @@ class Interaction : public Node {
 
 private:
     void execRef(dnnl::stream strm);
+    dnnl::primitive prim;
     size_t batchSize = 0;
     size_t featureSize = 0;
     size_t inputSizes = 0;
diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp
index 5cc0dce6230eae..f5f8995626d3e4 100644
--- a/src/plugins/intel_cpu/src/nodes/lrn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp
@@ -182,7 +182,7 @@ void Lrn::prepareParams() {
     LrnKey key = {inpDesc, selected_pd->getImplementationType(), alg, size, k, alpha, beta, attr};
     auto engine = getEngine();
 
-    auto builder = [&engine](const LrnKey& key) -> dnnl::primitive {
+    auto builder = [&engine](const LrnKey& key) -> executorPtr {
         auto desc = std::make_shared<dnnl::lrn_forward::primitive_desc>(
             engine,
             dnnl::prop_kind::forward_inference,
@@ -205,25 +205,24 @@ void Lrn::prepareParams() {
                 break;
             }
             if (!itpd.next_impl())
-                return dnnl::lrn_forward();
+                return nullptr;
         }
 
-        return dnnl::lrn_forward(prim_desc);
+        return std::make_shared<DnnlExecutor>(prim_desc);
     };
 
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
-    if (!result.first) {
+    execPtr = result.first;
+    if (!execPtr) {
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
-    prim = result.first;
 
-    auto pd = prim.get_primitive_desc();
-    auto scratchpadMem = getScratchPadMem(pd);
+    auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
 
-    auto src = srcMemPtr->GetPrimitive();
-    auto dst = dstMemPtr->GetPrimitive();
-    primArgs = { {DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()} };
+    primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
+    primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
+    primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
 }
 
 bool Lrn::created() const {
@@ -250,6 +249,14 @@ void Lrn::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
     descs.push_back(desc);
 }
 
+void Lrn::execute(dnnl::stream strm) {
+    if (execPtr) {
+        execPtr->exec(primArgs, strm);
+    } else {
+        IE_THROW() << errorPrefix << " doesn't have an initialized executor";
+    }
+}
+
 void Lrn::executeDynamicImpl(dnnl::stream strm) {
     execute(strm);
 }
diff --git a/src/plugins/intel_cpu/src/nodes/lrn.h b/src/plugins/intel_cpu/src/nodes/lrn.h
index b821fa8b70e521..c1635261f70faf 100644
--- a/src/plugins/intel_cpu/src/nodes/lrn.h
+++ b/src/plugins/intel_cpu/src/nodes/lrn.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <memory>
 #include <vector>
+#include "common/dnnl_executor.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -31,11 +32,14 @@ class Lrn : public Node {
     }
 
     void prepareParams() override;
+    void execute(dnnl::stream strm) override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
     dnnl::algorithm alg;
     size_t size = 1;
     int k = 1;
diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp
index c1c1381e6631f2..4027c2d08e30b8 100644
--- a/src/plugins/intel_cpu/src/nodes/matmul.cpp
+++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp
@@ -593,7 +593,7 @@ void MatMul::prepareParams() {
 
     auto engine = getEngine();
 
-    auto builder = [&engine](const MatMulKey& key) -> dnnl::primitive {
+    auto builder = [&engine](const MatMulKey& key) -> executorPtr {
         dnnl::matmul::primitive_desc matmul_desc;
 
         if (key.bias) {
@@ -633,22 +633,20 @@ void MatMul::prepareParams() {
                 break;
             }
         }
-        return matmul(prim_desc);
+        return std::make_shared<DnnlExecutor>(prim_desc);
     };
 
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
 
-    if (!result.first) {
+    execPtr = result.first;
+    if (!execPtr) {
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    prim = result.first;
+    auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
 
-    auto pd = prim.get_primitive_desc();
-    auto scratchpadMem = getScratchPadMem(pd);
-
-    primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
+    primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive();
     primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive();
     primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive();
     primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
@@ -658,6 +656,14 @@ void MatMul::prepareParams() {
     appendPostOpArgs(*attr, primArgs, postOpsArgs);
 }
 
+void MatMul::execute(dnnl::stream strm) {
+    if (execPtr) {
+        execPtr->exec(primArgs, strm);
+    } else {
+        IE_THROW() << errorPrefix << " doesn't have an initialized executor";
+    }
+}
+
 void MatMul::executeDynamicImpl(dnnl::stream strm) {
     execute(strm);
 }
diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h
index 5c8902483972b8..16d2140cbe5eee 100644
--- a/src/plugins/intel_cpu/src/nodes/matmul.h
+++ b/src/plugins/intel_cpu/src/nodes/matmul.h
@@ -10,6 +10,7 @@
 #include <vector>
 #include <array>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "common/dnnl_executor.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -38,6 +39,7 @@ class MatMul : public Node {
     }
 
     void prepareParams() override;
+    void execute(dnnl::stream strm) override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
@@ -48,6 +50,8 @@ class MatMul : public Node {
     AttrPtr initPrimitiveAttr(const VectorDims& dims);
 
 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
     dnnl::memory::desc getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc);
     std::pair<Shape, Shape> makeDummyInputShapes(const Shape& in0, const Shape& in1) const;
 
diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp
index fc56f8d812ce54..b31c358911904a 100644
--- a/src/plugins/intel_cpu/src/nodes/pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp
@@ -369,7 +369,7 @@ void Pooling::prepareParams() {
                       alg,
                       selected_pd->getImplementationType()};
     auto engine = getEngine();
-    auto builder = [&engine](const PoolingKey& key) -> dnnl::primitive {
+    auto builder = [&engine](const PoolingKey& key) -> executorPtr {
         primitive_desc_iterator itpd = createDescriptorHelper(engine,
                                                               key.inp->getDnnlDesc(),
                                                               key.out->getDnnlDesc(),
@@ -393,27 +393,34 @@ void Pooling::prepareParams() {
                 break;
         }
 
-        return pooling_forward(prim_desc);
+        return std::make_shared<DnnlExecutor>(prim_desc);
     };
 
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
 
-    if (!result.first) {
+    execPtr = result.first;
+
+    if (!execPtr) {
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    prim = result.first;
-
-    auto pd = prim.get_primitive_desc();
-    auto scratchpadMem = getScratchPadMem(pd);
-    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}};
+    auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
+    primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
+    primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+    primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
 
     Node::appendPostOpArgs(*attr, primArgs, postOpsArgs);
 }
 
+void Pooling::execute(dnnl::stream strm) {
+    if (execPtr) {
+        execPtr->exec(primArgs, strm);
+    } else {
+        IE_THROW() << "Pooling node with name '" << getName() << "' doesn't have an initialized executor";
+    }
+}
+
 void Pooling::executeDynamicImpl(dnnl::stream strm) {
     execute(strm);
 }
diff --git a/src/plugins/intel_cpu/src/nodes/pooling.h b/src/plugins/intel_cpu/src/nodes/pooling.h
index 2daaa3f9a528e8..6d76e3d48980a2 100644
--- a/src/plugins/intel_cpu/src/nodes/pooling.h
+++ b/src/plugins/intel_cpu/src/nodes/pooling.h
@@ -10,6 +10,7 @@
 #include <string>
 #include <memory>
 #include <vector>
+#include "common/dnnl_executor.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -30,6 +31,7 @@ class Pooling : public Node {
     }
 
     void prepareParams() override;
+    void execute(dnnl::stream strm) override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
@@ -38,6 +40,9 @@ class Pooling : public Node {
     AttrPtr initPrimitiveAttr() override;
 
 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
+
     void setPostOps(dnnl::primitive_attr &attr);
 
     void initEffectiveAttributes(const Shape &inDims, const Shape &outDims);
diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp
index 2efcd0e44b6e69..5dd5674abd9814 100644
--- a/src/plugins/intel_cpu/src/nodes/reorder.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp
@@ -336,7 +336,11 @@ void Reorder::execute(dnnl::stream strm) {
         src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData());
         dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData());
 
-        Node::execute(strm);
+        if (prim) {
+            prim.execute(strm, primArgs);
+        } else {
+            IE_THROW() << "Reorder node with name " << getName() << " doesn't have an initialized primitive";
+        }
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h
index f6091a6c91bd43..4bd3fa8fc3211b 100644
--- a/src/plugins/intel_cpu/src/nodes/reorder.h
+++ b/src/plugins/intel_cpu/src/nodes/reorder.h
@@ -66,6 +66,7 @@ class Reorder : public Node {
     static void reorderData(const Memory &input, const Memory &output, MultiCachePtr cache = nullptr);
 
 private:
+    dnnl::reorder::primitive prim;
     std::shared_ptr<MemoryDesc> input;
     std::shared_ptr<MemoryDesc> output;
 
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp
index e7b97b9355d214..4ed7ed7a4e5550 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@@ -1062,7 +1062,7 @@ void RNN::prepareParams() {
     RNNKey key = { inDataDescs, outDataDescs, wDescs, cell_type, cell_act, direction, *attr };
 
     auto engine = getEngine();
-    auto builder = [&engine](const RNNKey& key) -> dnnl::primitive {
+    auto builder = [&engine](const RNNKey& key) -> executorPtr {
         const auto descPtr = createPrimitiveDescriptor(engine,
                                                        key.cellType,
                                                        key.cellAct,
@@ -1072,23 +1072,22 @@ void RNN::prepareParams() {
                                                        key.wDescs,
                                                        key.attr);
 
-        return dnnl::primitive(descPtr);
+        return std::make_shared<DnnlExecutor>(descPtr);
     };
 
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
 
-    if (!result.first) {
+    execPtr = result.first;
+
+    if (!execPtr) {
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    prim = result.first;
-
-    auto pd = prim.get_primitive_desc();
-    scratchpadMem = getScratchPadMem(pd);
+    scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
 
     if (!wasMemoryPrepared || wFormatWasChanged) {
-        auto pd = prim.get_primitive_desc();
+        auto pd = execPtr->getPrimitiveDesc();
         auto query_weights_md = [&](int idx = 0) -> dnnl::memory::desc {
             auto what = dnnl::convert_to_c(dnnl::query::weights_md);
             const_dnnl_memory_desc_t cdesc = dnnl_primitive_desc_query_md(pd, what, idx);
@@ -1118,7 +1117,7 @@ std::shared_ptr<MemoryDesc> RNN::getDstMemDesc(dnnl::primitive_desc_iterator& pr
 }
 
 void RNN::execute(dnnl::stream strm) {
-    if (!prim)
+    if (!execPtr)
         THROW_ERROR << "does not have initialized primitive to execute.";
 
     const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
@@ -1160,7 +1159,7 @@ void RNN::execute(dnnl::stream strm) {
         }
     }
 
-    prim.execute(strm, args);
+    execPtr->exec(args, strm);
 }
 
 void RNN::executeDynamicImpl(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h
index b94d026adcf75c..dbe4f9769d14b7 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.h
+++ b/src/plugins/intel_cpu/src/nodes/rnn.h
@@ -11,6 +11,8 @@
 #include <memory>
 #include <vector>
 
+#include "common/dnnl_executor.h"
+
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -66,6 +68,9 @@ class RNN : public Node {
 
     void copyWeightsData();
 
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
+
     /** Specify mode Cell or Seq. true - Cell, false - Seq */
     bool is_cell = false;
 
diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp
index 7f3d3c337e5792..65176e4a7c7907 100644
--- a/src/plugins/intel_cpu/src/nodes/softmax.cpp
+++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp
@@ -170,7 +170,7 @@ void SoftMax::prepareParams() {
     SoftmaxKey key = {inpDesc, selected_pd->getImplementationType(), axis, *attr};
     auto engine = getEngine();
 
-    auto builder = [&engine](const SoftmaxKey& key) -> dnnl::primitive {
+    auto builder = [&engine](const SoftmaxKey& key) -> executorPtr {
         softmax_forward::primitive_desc prim_desc;
         auto desc = std::make_shared<softmax_forward::primitive_desc>(
             engine,
@@ -196,26 +196,32 @@ void SoftMax::prepareParams() {
                 break;
             }
             if (!itpd.next_impl())
-                return softmax_forward();
+                return nullptr;
         }
-        return softmax_forward(prim_desc);
+        return std::make_shared<DnnlExecutor>(prim_desc);
     };
 
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
 
-    if (!result.first) {
+    execPtr = result.first;
+    if (!execPtr) {
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    prim = result.first;
+    auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc());
 
-    auto pd = prim.get_primitive_desc();
-    auto scratchpadMem = getScratchPadMem(pd);
+    primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
+    primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+    primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+}
 
-    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}};
+void SoftMax::execute(dnnl::stream strm) {
+    if (execPtr) {
+        execPtr->exec(primArgs, strm);
+    } else {
+        IE_THROW() << "Softmax node with name '" << getName() << "' doesn't have an initialized executor";
+    }
 }
 
 void SoftMax::executeDynamicImpl(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/softmax.h b/src/plugins/intel_cpu/src/nodes/softmax.h
index 78fc51115a18d7..1a472075168406 100644
--- a/src/plugins/intel_cpu/src/nodes/softmax.h
+++ b/src/plugins/intel_cpu/src/nodes/softmax.h
@@ -11,6 +11,8 @@
 #include <memory>
 #include <vector>
 
+#include "common/dnnl_executor.h"
+
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -26,11 +28,14 @@ class SoftMax : public Node {
     bool created() const override;
     AttrPtr initPrimitiveAttr() override;
     void prepareParams() override;
+    void execute(dnnl::stream strm) override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
     size_t axis = 0;
 };
 
diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h
index b13bc1a0a745ab..03988d24fe8367 100644
--- a/src/plugins/intel_cpu/src/nodes/transpose.h
+++ b/src/plugins/intel_cpu/src/nodes/transpose.h
@@ -48,6 +48,7 @@ class Transpose : public Node {
     };
     using executorPtr = std::shared_ptr<TransposeExecutor>;
     executorPtr execPtr = nullptr;
+    dnnl::primitive prim;
 
     struct TransposeJitExecutor : public TransposeExecutor {
         TransposeJitExecutor(const PermuteParams& params);

From fb24e9141629451a92b7c814fa6738486bb3dc13 Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Thu, 23 Mar 2023 13:24:10 +0000
Subject: [PATCH 061/296] [LPT] NNCF GroupConvolution 5D on weights support
 (#16336)

* [LPT] NNCF GroupConvolution 5D on weights support

* PullReshapeThroughDequantization rollback
---
 .../src/convolution.cpp                       |   16 +-
 .../src/weightable_layer_transformation.cpp   |   12 +-
 .../group_convolution_transformation.cpp      | 1180 ++++++++++-------
 ..._through_dequantization_transformation.cpp |  165 ++-
 ..._through_dequantization_transformation.cpp |   86 +-
 .../groupconvolution_qdq_transformation.cpp   |  121 ++
 .../groupconvolution_qdq_transformation.cpp   |  121 ++
 .../group_convolution_function.hpp            |    3 +-
 .../src/group_convolution_function.cpp        |   60 +-
 9 files changed, 1170 insertions(+), 594 deletions(-)

diff --git a/src/common/low_precision_transformations/src/convolution.cpp b/src/common/low_precision_transformations/src/convolution.cpp
index 46831f0586c5cf..4bd2dd31f24534 100644
--- a/src/common/low_precision_transformations/src/convolution.cpp
+++ b/src/common/low_precision_transformations/src/convolution.cpp
@@ -237,8 +237,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
             Shape newScaleShape = newScalePShape.to_shape();
 
             if (!newScaleShape.empty()) {
-                // that's all we need: [C, 1, 1, 1] => [C, 1, 1]
-                newScaleShape.pop_back();
+                const auto input_shape = convolution->get_input_partial_shape(0);
+                const auto diff = newScaleShape.size() - input_shape.size();
+                OPENVINO_ASSERT(
+                    newScaleShape.empty() || ((0 <= diff) && (diff <= 2ull)),
+                    "unexpected shape size on weights");
+
+                for (size_t i = 0; i <= diff; ++i) {
+                    newScaleShape.pop_back();
+                }
             }
 
             if (reshapeFromWeights != nullptr) {
@@ -282,7 +289,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
 
                 const size_t weightsRankValue = weightsPShape.rank().get_length();
                 Shape zeroPointShape(weightsRankValue, 1ul);
+                // output channel or group
                 zeroPointShape[0] = static_cast<size_t>(weightsPShape[0].get_length());
+                if ((reshapeFromWeights == nullptr) && (weightsRankValue == 5ull)) {
+                    // output channel
+                    zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
+                }
 
                 auto zeroPointConstant = fold<opset1::Broadcast>(
                     subtractFromWeights->input_value(1),
diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp
index 1cfe4bb51d3ac7..1837f21635235c 100644
--- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -230,16 +230,16 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
     FakeQuantizeDequantization dequantizationOnWeights;
     if (reshapeIsRequired) {
         const auto reshape = layer->get_input_node_shared_ptr(1);
-        if (!ov::is_type<opset1::Reshape>(reshape)) {
-            return false;
-        }
+        std::shared_ptr<Node> parent = ov::is_type<opset1::Reshape>(reshape) ?
+            reshape->get_input_node_shared_ptr(0) :
+            reshape;
 
-        if (ov::is_type<opset1::FakeQuantize>(reshape->get_input_node_shared_ptr(0))) {
-            const std::shared_ptr<opset1::FakeQuantize> fq = ov::as_type_ptr<opset1::FakeQuantize>(reshape->get_input_node_shared_ptr(0));
+        const auto fq = ov::as_type_ptr<opset1::FakeQuantize>(parent);
+        if (fq != nullptr) {
             return NetworkHelper::isQuantizeSupported(fq);
         }
 
-        dequantizationOnWeights = NetworkHelper::getDequantization(reshape, defaultPrecisions, 0);
+        dequantizationOnWeights = NetworkHelper::getDequantization(parent, defaultPrecisions, 0, true);
     } else if (ov::is_type<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1))) {
         const std::shared_ptr<opset1::FakeQuantize> fq = ov::as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
         return NetworkHelper::isQuantizeSupported(fq);
diff --git a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp
index b20d211d651adc..d148c370dfd2f8 100644
--- a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp
+++ b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp
@@ -48,6 +48,7 @@ class GroupConvolutionTestValues {
     TestTransformationParams params;
     size_t group;
     int groupCalculationDimention;
+    bool addReshape;
     Actual actual;
     Expected expected;
 };
@@ -76,7 +77,8 @@ class GroupConvolutionTransformation : public LayerTransformation,
                                                                      testValues.actual.dequantizationOnWeights,
                                                                      ngraph::element::f32,
                                                                      {},
-                                                                     ngraph::element::f32);
+                                                                     ngraph::element::f32,
+                                                                     testValues.addReshape);
 
         SimpleLowPrecisionTransformer transform;
         transform.add<ngraph::pass::low_precision::GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
@@ -101,7 +103,8 @@ class GroupConvolutionTransformation : public LayerTransformation,
                                                                      testValues.expected.dequantizationOnWeights,
                                                                      testValues.expected.precisionAfterOperation,
                                                                      testValues.expected.dequantizationAfter,
-                                                                     testValues.expected.precisionAfterDequantization);
+                                                                     testValues.expected.precisionAfterDequantization,
+                                                                     testValues.addReshape);
     }
 
     static std::string getTestCaseName(testing::TestParamInfo<ConvolutionTransformationParams> obj) {
@@ -113,7 +116,9 @@ class GroupConvolutionTransformation : public LayerTransformation,
         result << toString(testValues.params) << "_" << inputShape << "_" << outputShape << "_" << testValues.group
                << "_" << testValues.groupCalculationDimention << "_" << testValues.actual.precisionBeforeDequantization
                << "_" << testValues.actual.dequantization << "_"
-               << "_weights_" << testValues.actual.weights->get_element_type() << "_"
+               << "_add_reshape:" << testValues.addReshape << "_"
+               << "_weights_type:" << testValues.actual.weights->get_element_type() << "_"
+               << "_weights_shape:" << testValues.actual.weights->get_shape() << "_"
                << "{ " << testValues.actual.weights->cast_vector<float>()[0] << " }_"
                << testValues.actual.fakeQuantizeOnWeights << "_";
         return result.str();
@@ -128,333 +133,520 @@ TEST_P(GroupConvolutionTransformation, CompareFunctions) {
     ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique";
 }
 
+// clang-format off
 namespace testValues1 {
+
 const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> shapesForGroupConv = {
     {{1, 6, 224, 224}, {1, 24, 218, 218}},
-    {{-1, -1, -1, -1}, {-1, -1, -1, -1}}};
+    {{-1, -1, -1, -1}, {-1, -1, -1, -1}}
+};
 
 const std::vector<GroupConvolutionTestValues> testValuesGroupConv = {
     // group convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
-     }},
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+         3ul,
+         -1,
+         true,
+         // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+        }
+    },
 
     // group convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     3ul,
-     0,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
-     }},
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        3ul,
+        0,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+        }
+    },
 
     // group convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     3ul,
-     1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
-     }},
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        3ul,
+        1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+        }
+    },
 
     // group convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
+
     // group convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::f32,
-      {{}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::f32,
-         {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-         op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
-     }},
+    {
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::f32,
+            {{}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::f32,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+        }
+    },
+
     // group convolution, per-channel quantization with different values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{},
-          {},
-          {{// 0.0002 = 0.02 (on data) * 0.01 (on weights)
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            // 0.0004 = 0.04 (on data) * 0.01 (on weights)
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            // 0.0008 = 0.08 (on data) * 0.01 (on weights)
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f},
-           ngraph::element::f32,
-           {1, 24, 1, 1}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {
+                {},
+                {},
+                {
+                    {
+                        // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        // 0.0004 = 0.04 (on data) * 0.01 (on weights)
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        // 0.0008 = 0.08 (on data) * 0.01 (on weights)
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f
+                    },
+                    ngraph::element::f32,
+                    {1, 24, 1, 1}
+                }
+            },
+        }
+    },
+
     // group convolution, per-channel quantization with the same values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {
-         ngraph::element::u8,
-         {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}},
-         op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-         {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-         {},
-     },
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {},
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}},
+        }
+    },
+
     // group convolution, without zero point, without convert
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::f32,
-      {{}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::f32,
-      {{}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-1.25f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::f32,
+            {{}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::f32,
+            {{}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-1.25f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
+
     // group convolution, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{element::f32}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {},
-      op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}
+        }
+    },
+
     // per-channel quantization with different values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
-      op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
-      {},
-      {ngraph::element::f32, {}, {0.01f}}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{},
-          {},
-          {{// 0.0002 = 0.02 (on data) * 0.01 (on weights)
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            // 0.0004 = 0.04 (on data) * 0.01 (on weights)
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            // 0.0008 = 0.08 (on data) * 0.01 (on weights)
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f},
-           ngraph::element::f32,
-           {1, 24, 1, 1}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
+            {},
+            {ngraph::element::f32, {}, {0.01f}}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {
+                {},
+                {},
+                {
+                    {
+                        // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        // 0.0004 = 0.04 (on data) * 0.01 (on weights)
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        // 0.0008 = 0.08 (on data) * 0.01 (on weights)
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f
+                    },
+                    ngraph::element::f32,
+                    {1, 24, 1, 1}
+                }
+            },
+        }
+    },
 
     // per-channel quantization with different values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32},
-       {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true},
-       {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
-      op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
-      {},
-      {ngraph::element::f32, {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, {0.01f}}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {{}, {std::vector<float>(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, {}},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
-         {},
-         {{},
-          {std::vector<float>(24ul, 127.f),
-           ngraph::element::f32,
-           {24, 1, 1, 1},
-           false,
-           1,
-           ngraph::element::i8,
-           false,
-           {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}}},
-          {}},
-         ngraph::element::f32,
-         {{},
-          {},
-          {{// 0.0002 = 0.02 (on data) * 0.01 (on weights)
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            0.0002f,
-            // 0.0004 = 0.04 (on data) * 0.01 (on weights)
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            0.0004f,
-            // 0.0008 = 0.08 (on data) * 0.01 (on weights)
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f,
-            0.0008f},
-           ngraph::element::f32,
-           {1, 24, 1, 1}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true},
+                {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}
+            },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
+            {},
+            {
+                ngraph::element::f32,
+                {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true},
+                {0.01f}
+            }
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                {},
+                {std::vector<float>(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8},
+                {}
+            },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{2.f}),
+            {},
+            {
+                {},
+                {
+                    std::vector<float>(24ul, 127.f),
+                    ngraph::element::f32,
+                    {24, 1, 1, 1},
+                    false,
+                    1,
+                    ngraph::element::i8,
+                    false,
+                    {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}}
+                },
+                {}
+            },
+            ngraph::element::f32,
+            {
+                {},
+                {},
+                {
+                    {
+                        // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        // 0.0004 = 0.04 (on data) * 0.01 (on weights)
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        // 0.0008 = 0.08 (on data) * 0.01 (on weights)
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f
+                    },
+                    ngraph::element::f32,
+                    {1, 24, 1, 1}
+                }
+            },
+        }
+    },
+
+    // per-channel quantization with different values, without zero point, no reshape - 5D weights
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        false,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true},
+                {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}
+            },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector<float>{2.f}),
+            {},
+            {
+                ngraph::element::f32,
+                {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true},
+                {0.01f}
+            }
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                {},
+                {std::vector<float>(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8},
+                {}
+            },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector<float>{2.f}),
+            {},
+            {
+                {},
+                {
+                    std::vector<float>(24ul, 127.f),
+                    ngraph::element::f32,
+                    {3, 8, 1, 1, 1},
+                    false,
+                    1,
+                    ngraph::element::i8,
+                    false,
+                    {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}}
+                },
+                {}
+            },
+            ngraph::element::f32,
+            {
+                {},
+                {},
+                {
+                    {
+                        // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        0.0002f,
+                        // 0.0004 = 0.04 (on data) * 0.01 (on weights)
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        0.0004f,
+                        // 0.0008 = 0.08 (on data) * 0.01 (on weights)
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f,
+                        0.0008f
+                    },
+                    ngraph::element::f32,
+                    {1, 24, 1, 1}
+                }
+            },
+        }
+    },
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT,
@@ -472,146 +664,201 @@ const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> shapesF
 
 const std::vector<GroupConvolutionTestValues> testValuesForDepthWiseConv = {
     // depth-wise convolution, per-tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-      op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}
+        }
+    },
+
     // depth-wise convolution, tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::f32,
-      {{}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::f32,
-      {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-125.f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}},
+    {
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::f32,
+            {{}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::f32,
+            {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}
+        }
+    },
+
     // depth-wise convolution, per-channel quantization with different values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     6ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{},
-          {},
-          {{
-               0.0002f,
-               0.0002f,  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
-               0.0004f,
-               0.0004f,  // 0.0004 = 0.04 (on data) * 0.01 (on weights)
-               0.0008f,
-               0.0008f  // 0.0008 = 0.08 (on data) * 0.01 (on weights)
-           },
-           ngraph::element::f32,
-           {1, 6, 1, 1}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        6ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {
+                {},
+                {},
+                {
+                    {
+                       0.0002f,
+                       0.0002f,  // 0.0002 = 0.02 (on data) * 0.01 (on weights)
+                       0.0004f,
+                       0.0004f,  // 0.0004 = 0.04 (on data) * 0.01 (on weights)
+                       0.0008f,
+                       0.0008f  // 0.0008 = 0.08 (on data) * 0.01 (on weights)
+                    },
+                    ngraph::element::f32,
+                    {1, 6, 1, 1}
+                }
+            },
+        }
+    },
+
     // depth-wise convolution, per-tensor quantization with the same values, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     6ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {
-         ngraph::element::u8,
-         {},
-         op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-         {},
-         {},
-         ngraph::element::f32,
-         {{}, {}, {{0.0002f}, ngraph::element::f32, {}}},
-     }},
+    {
+        LayerTransformation::createParamsU8I8(),
+        6ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}},
+        }
+    },
+
     // depth-wise convolution, without zero point, without convert
-    {LayerTransformation::createParamsU8I8(),
-     6ul,
-     -1,
-     // ActualValues
-     {ngraph::element::f32,
-      {{}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::f32,
-      {{}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-1.25f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        6ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::f32,
+            {{}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::f32,
+            {{}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{-1.25f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
+
     // depth-wise convolution, without zero point
-    {LayerTransformation::createParamsU8I8(),
-     6ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{element::f32}, {}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {},
-      op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
-      {},
-      {},
-      ngraph::element::f32,
-      {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        6ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, {}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{-125.f}),
+            {},
+            {},
+            ngraph::element::f32,
+            {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}
+        }
+    },
+
     // without dequantization operations
-    {LayerTransformation::createParamsU8I8(),
-     6ul,
-     -1,
-     // ActualValues
-     {ngraph::element::f32,
-      {},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::f32,
-      {},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        6ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::f32,
+            {},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::f32,
+            {},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT,
@@ -623,27 +870,35 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT,
 
 namespace testValues3 {
 const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> shapesWithDynamicChannel = {
-    {PartialShape::dynamic(), PartialShape::dynamic()}};
+    {PartialShape::dynamic(), PartialShape::dynamic()}
+};
 
 const std::vector<GroupConvolutionTestValues> testValuesWithDynamicChannel = {
     // depth-wise convolution, per-tensor quantization, with zero point
-    {LayerTransformation::createParamsU8I8(),
-     3ul,
-     -1,
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32}, {128.f}, {0.02f}},
-      op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
-      {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
-      {},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8(),
+        3ul,
+        -1,
+        true,
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.02f}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{2.f}),
+            {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT,
@@ -652,3 +907,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT,
                                             ::testing::ValuesIn(testValuesWithDynamicChannel)),
                          GroupConvolutionTransformation::getTestCaseName);
 }  // namespace testValues3
+// clang-format on
diff --git a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp
index 14e8f4361ceb22..b15a8f3b784c92 100644
--- a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp
+++ b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp
@@ -133,11 +133,17 @@ TEST_P(PullReshapeThroughDequantizationTransformation, CompareFunctions) {
     ASSERT_TRUE(res.first) << res.second;
 }
 
-const std::vector<ngraph::Shape> inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})};
+// clang-format off
+
+const std::vector<ngraph::Shape> inputShapes = {
+    ngraph::Shape({1, 960, 7, 7}),
+    ngraph::Shape({4, 960, 7, 7})
+};
 
 const std::vector<std::pair<ngraph::Shape, ngraph::Shape>> dequantizationOnWeightElementwiseConstantShapes = {
     {ngraph::Shape({1, 960}), ngraph::Shape({960, 1, 1, 1})},
-    {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 3, 3})}};
+    {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 3, 3})}
+};
 
 const std::vector<ngraph::Shape> multiplyShapes = {ngraph::Shape({1, 1, 960, 1})};
 
@@ -193,37 +199,51 @@ const std::vector<PullReshapeThroughDequantizationTestValues> testValues = {
     //             \         /
     //               Multiply
     //
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {9, 960}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false},
-       {{0.03f}, element::f32, {/* from parameter */}, false}},
-      {{3, 3, 960, 1}},
-      {{2}, element::f32, {/* from parameter: multiplyShapes */}, false},
-      {{2, 3, 0, 1}},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {960, 1, 3, 3}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false},
-       {{0.06f}, element::f32, {/* from parameter */}, false}},
-      {},
-      {},
-      {},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}}},
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8, {9, 960}},
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {/* from parameter */}, false },
+                { {0.03f}, element::f32, {/* from parameter */}, false }
+            },
+            { {3, 3, 960, 1} },
+            { {2}, element::f32, {/* from parameter: multiplyShapes */}, false },
+            { {2, 3, 0, 1} },
+            { {960, 1, 1, 3, 3} },
+            ngraph::element::f32,
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8, {960, 1, 3, 3}},
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {/* from parameter */}, false },
+                { {0.06f}, element::f32, {/* from parameter */}, false }
+            },
+            {},
+            {},
+            {},
+            {{960, 1, 1, 3, 3}},
+            ngraph::element::f32,
+            {}
+        }
+    },
 
     // Subtract with Convert + Constant
     // Actual:
@@ -276,37 +296,54 @@ const std::vector<PullReshapeThroughDequantizationTestValues> testValues = {
     //             \         /
     //               Multiply
     //
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {9, 960}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true},
-       {{0.03f}, element::f32, {/* from parameter */}, false}},
-      {{3, 3, 960, 1}},
-      {{2}, element::f32, {/* from parameter: multiplyShapes */}, false},
-      {{2, 3, 0, 1}},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {960, 1, 3, 3}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true},
-       {{0.06f}, element::f32, {/* from parameter */}, false}},
-      {},
-      {},
-      {},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}}}};
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8, {9, 960}},
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true },
+                { {0.03f}, element::f32, {/* from parameter */}, false }
+            },
+            { {3, 3, 960, 1} },
+            { {2}, element::f32, {/* from parameter: multiplyShapes */}, false },
+            { {2, 3, 0, 1} },
+            { {960, 1, 1, 3, 3} },
+            ngraph::element::f32,
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8, {960, 1, 3, 3}},
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true },
+                { {0.06f}, element::f32, {/* from parameter */}, false }
+            },
+            {},
+            {},
+            {},
+            {{960, 1, 1, 3, 3}},
+            ngraph::element::f32,
+            {}
+        }
+    }
+};
+
+// clang-format on
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT,
                          PullReshapeThroughDequantizationTransformation,
diff --git a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp
index 8b5d9a0b6447b8..6979bdcc36616d 100644
--- a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp
+++ b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp
@@ -126,7 +126,12 @@ TEST_P(PullTransposeThroughDequantizationTransformation, CompareFunctions) {
     ASSERT_TRUE(res.first) << res.second;
 }
 
-const std::vector<ngraph::Shape> inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})};
+// clang-format off
+
+const std::vector<ngraph::Shape> inputShapes = {
+    ngraph::Shape({1, 960, 7, 7}),
+    ngraph::Shape({4, 960, 7, 7})
+};
 
 const std::vector<std::pair<ngraph::Shape, ngraph::Shape>> dequantizationOnWeightElementwiseConstantShapes = {
     {ngraph::Shape({}), ngraph::Shape({1, 1, 1, 1})},
@@ -178,37 +183,54 @@ const std::vector<PullTransposeThroughDequantizationTestValues> testValues = {
     //             \         /
     //               Multiply
     //
-    {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
-     // ActualValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {3, 3, 960, 1}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false},
-       {{0.03f}, element::f32, {/* from parameter */}, false}},
-      {},  // reshape1
-      {},  // multiply
-      {{2, 3, 0, 1}},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}},
-     // ExpectedValues
-     {ngraph::element::u8,
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
-       {{0.02f}, element::f32, {}, false}},
-      {std::vector<float>{2.f}, ngraph::element::i8, {960, 1, 3, 3}},
-      {{ngraph::element::f32, false},
-       {{127.f}, element::f32, {/* from parameter */}, false},
-       {{0.03f}, element::f32, {/* from parameter */}, false}},
-      {},
-      {},
-      {},
-      {{960, 1, 1, 3, 3}},
-      ngraph::element::f32,
-      {}}}};
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32, false},
+                {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
+                {{0.02f}, element::f32, {}, false}
+            },
+            {std::vector<float>{2.f}, ngraph::element::i8, {3, 3, 960, 1}},
+            {
+                {ngraph::element::f32, false},
+                {{127.f}, element::f32, {/* from parameter */}, false},
+                {{0.03f}, element::f32, {/* from parameter */}, false}
+            },
+            {},  // reshape1
+            {},  // multiply
+            {{2, 3, 0, 1}},
+            {{960, 1, 1, 3, 3}},
+            ngraph::element::f32,
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32, false},
+                {{127.f}, element::f32, {}, false, 1ul, element::u8, true},
+                {{0.02f}, element::f32, {}, false}
+            },
+            {std::vector<float>{2.f}, ngraph::element::i8, {960, 1, 3, 3}},
+            {
+                {ngraph::element::f32, false},
+                {{127.f}, element::f32, {/* from parameter */}, false},
+                {{0.03f}, element::f32, {/* from parameter */}, false}
+            },
+            {},
+            {},
+            {},
+            {{960, 1, 1, 3, 3}},
+            ngraph::element::f32,
+            {}
+        }
+    }
+};
+
+// clang-format on
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT,
                          PullTransposeThroughDequantizationTransformation,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
index c8d8473ad1468e..bc058ef87c7add 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
@@ -11,6 +11,8 @@
 using namespace LayerTestsDefinitions;
 
 namespace {
+// clang-format off
+
 const std::vector<ngraph::element::Type> netPrecisions = {
     ngraph::element::f32,
     // ngraph::element::f16
@@ -370,6 +372,66 @@ const std::vector<LayerTestsDefinitions::GroupConvolutionQDqTransformationParam>
         true,
     },
 
+    // Actual:
+    //
+    // FQ
+    //  |FP32
+    //  |
+    // Convert    Convert   Constant  Constant
+    //  |U8        |U8       |I8       |I8
+    //  |          |         |         |
+    // Convert    Convert   Convert   Convert
+    //   \FP32    /FP32      \FP32    /FP32
+    //    \      /            \      /
+    //    Subtract  Constant  Subtract  Constant
+    //      \FP32   /FP32       \FP32   /FP32
+    //       \     /             \     /
+    //       Multiply           Multiply
+    //         \FP32           /FP32
+    //          \             /
+    //           \           /
+    //            \         /
+    //        GroupConvolution  Constant
+    //              \FP32       /FP32
+    //               \         /
+    //                 Multiply
+    //
+    // Transformed:
+    //
+    //  FQ        Constant Constant
+    //   \U8      /U8      / I8
+    //    \      /        /
+    //    Subtract     Subtract
+    //      \FP32      /FP32
+    //       \        /
+    //        \      /
+    //         \    /
+    //   GroupConvolution  Constant
+    //         \FP32       /FP32
+    //          \         /
+    //           Multiply
+    {
+        { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 },
+        { ngraph::element::u8, false },
+        {
+            { ngraph::element::f32, false },
+            { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true },
+            { {0.1f}, ngraph::element::f32, {}, false }
+        },
+        { std::vector<float>(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} },
+        {},
+        {},
+        {
+            { ngraph::element::f32, false },
+            { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true },
+            { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false }
+        },
+        {},
+        "output_original",
+        "FP32",
+        true,
+    },
+
     // Actual:
     //
     // FQ
@@ -427,6 +489,63 @@ const std::vector<LayerTestsDefinitions::GroupConvolutionQDqTransformationParam>
         false,
     },
 
+    // Actual:
+    //
+    // FQ
+    //  |FP32
+    //  |
+    // Convert    Convert
+    //  |U8        |U8
+    //  |          |
+    // Convert    Convert   Constant
+    //   \FP32    /FP32      \U8
+    //    \      /            \
+    //    Subtract  Constant  Convert   Constant
+    //      \FP32   /FP32       \FP32   /FP32
+    //       \     /             \     /
+    //       Multiply           Multiply
+    //         \FP32           /FP32
+    //          \             /
+    //           \           /
+    //            \         /
+    //          GroupConvolution
+    //
+    // Transformed:
+    //
+    //  FQ        Constant
+    //   \U8      /U8
+    //    \      /
+    //    Subtract
+    //      \FP32
+    //       \        Constant
+    //        \       /I8
+    //         \     /
+    //   GroupConvolution   Constant
+    //           \FP32      /FP32
+    //            \        /
+    //             Multiply
+    {
+        { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 },
+        { ngraph::element::u8, false },
+        {
+            { ngraph::element::f32, false },
+            { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true },
+            { {0.1f}, ngraph::element::f32, {}, false }
+        },
+        { std::vector<float>(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} },
+        {},
+        {},
+        {
+            { ngraph::element::f32, false },
+            {},
+            { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false }
+        },
+        {},
+        "output_original",
+        "U8",
+        false,
+    },
+
     // Actual:
     //
     // FQ
@@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation,
         ::testing::ValuesIn(trasformationParamValues),
         ::testing::ValuesIn(params)),
     GroupConvolutionQDqTransformation::getTestCaseName);
+
+// clang-format on
 }  // namespace
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
index 0794065e13e5f7..946554d0f7f2ea 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp
@@ -11,6 +11,8 @@
 using namespace LayerTestsDefinitions;
 
 namespace {
+// clang-format off
+
 const std::vector<ngraph::element::Type> netPrecisions = {
     ngraph::element::f32,
     // ngraph::element::f16
@@ -370,6 +372,66 @@ const std::vector<LayerTestsDefinitions::GroupConvolutionQDqTransformationParam>
         true,
     },
 
+    // Actual:
+    //
+    // FQ
+    //  |FP32
+    //  |
+    // Convert    Convert   Constant  Constant
+    //  |U8        |U8       |I8       |I8
+    //  |          |         |         |
+    // Convert    Convert   Convert   Convert
+    //   \FP32    /FP32      \FP32    /FP32
+    //    \      /            \      /
+    //    Subtract  Constant  Subtract  Constant
+    //      \FP32   /FP32       \FP32   /FP32
+    //       \     /             \     /
+    //       Multiply           Multiply
+    //         \FP32           /FP32
+    //          \             /
+    //           \           /
+    //            \         /
+    //        GroupConvolution  Constant
+    //              \FP32       /FP32
+    //               \         /
+    //                 Multiply
+    //
+    // Transformed:
+    //
+    //  FQ        Constant Constant
+    //   \U8      /U8      / I8
+    //    \      /        /
+    //    Subtract     Subtract
+    //      \FP32      /FP32
+    //       \        /
+    //        \      /
+    //         \    /
+    //   GroupConvolution  Constant
+    //         \FP32       /FP32
+    //          \         /
+    //           Multiply
+    {
+        { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 },
+        { ngraph::element::u8, false },
+        {
+            { ngraph::element::f32, false },
+            { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true },
+            { {0.1f}, ngraph::element::f32, {}, false }
+        },
+        { std::vector<float>(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} },
+        {},
+        {},
+        {
+            { ngraph::element::f32, false },
+            { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true },
+            { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false }
+        },
+        {},
+        "output_original",
+        "FP32",
+        true,
+    },
+
     // Actual:
     //
     // FQ
@@ -427,6 +489,63 @@ const std::vector<LayerTestsDefinitions::GroupConvolutionQDqTransformationParam>
         false,
     },
 
+    // Actual:
+    //
+    // FQ
+    //  |FP32
+    //  |
+    // Convert    Convert
+    //  |U8        |U8
+    //  |          |
+    // Convert    Convert   Constant
+    //   \FP32    /FP32      \U8
+    //    \      /            \
+    //    Subtract  Constant  Convert   Constant
+    //      \FP32   /FP32       \FP32   /FP32
+    //       \     /             \     /
+    //       Multiply           Multiply
+    //         \FP32           /FP32
+    //          \             /
+    //           \           /
+    //            \         /
+    //          GroupConvolution
+    //
+    // Transformed:
+    //
+    //  FQ        Constant
+    //   \U8      /U8
+    //    \      /
+    //    Subtract
+    //      \FP32
+    //       \        Constant
+    //        \       /I8
+    //         \     /
+    //   GroupConvolution   Constant
+    //           \FP32      /FP32
+    //            \        /
+    //             Multiply
+    {
+        { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 },
+        { ngraph::element::u8, false },
+        {
+            { ngraph::element::f32, false },
+            { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true },
+            { {0.1f}, ngraph::element::f32, {}, false }
+        },
+        { std::vector<float>(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} },
+        {},
+        {},
+        {
+            { ngraph::element::f32, false },
+            {},
+            { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false }
+        },
+        {},
+        "output_original",
+        "U8",
+        false,
+    },
+
     // Actual:
     //
     // FQ
@@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation,
         ::testing::ValuesIn(trasformationParamValues),
         ::testing::ValuesIn(params)),
     GroupConvolutionQDqTransformation::getTestCaseName);
+
+// clang-format on
 }  // namespace
diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp
index e90c32eb00bf46..20101a88d57745 100644
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp
@@ -49,7 +49,8 @@ class GroupConvolutionFunction {
         const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights,
         const ngraph::element::Type precisionAfterOperation,
         const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter,
-        const ngraph::element::Type precisionAfterDequantization);
+        const ngraph::element::Type precisionAfterDequantization,
+        const bool addReshape);
 };
 
 }  // namespace subgraph
diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp
index 6af36be45295cd..953e52326de7c7 100644
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp
@@ -31,7 +31,8 @@ std::shared_ptr<Node> createWeightsOriginal(
     const size_t kernelSize,
     const std::vector<float>& weightsValues,
     const FakeQuantizeOnWeights& fakeQuantizeOnWeights,
-    const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights) {
+    const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights,
+    const bool addReshape = true) {
     std::shared_ptr<Node> weights;
     if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) {
         weights = ngraph::opset1::Constant::create(
@@ -46,9 +47,13 @@ std::shared_ptr<Node> createWeightsOriginal(
         const size_t inputChannelsPerGroup = inputChannelsCount / groupCount;
         weights = ngraph::opset1::Constant::create(
             precision,
-            rankLength == 3 ?
-                ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } :
-                ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize },
+            addReshape ?
+                (rankLength == 3 ?
+                    ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } :
+                    ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }) :
+                (rankLength == 3 ?
+                    ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize } :
+                    ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize }),
             weightsValues.size() == 1ul ?
                 std::vector<float>(
                     rankLength == 3 ?
@@ -75,24 +80,26 @@ std::shared_ptr<Node> createWeightsOriginal(
             weights = ngraph::builder::subgraph::makeDequantization(weights, dequantizationOnWeights);
         }
 
-        weights = std::make_shared<ngraph::opset1::Reshape>(
-            weights,
-            ngraph::opset1::Constant::create(
-                element::i64,
-                Shape{ static_cast<size_t>(rankLength) + 1ul },
-                rankLength == 3 ?
-                    std::vector<int64_t> {
-                        calculatedDimention == 0 ? -1 : static_cast<int64_t>(groupCount),
-                        calculatedDimention == 1 ? -1 : static_cast<int64_t>(outputChannelsCount / groupCount),
-                        static_cast<int64_t>(inputChannelsPerGroup),
-                        static_cast<int64_t>(kernelSize) } :
-                    std::vector<int64_t> {
-                        calculatedDimention == 0 ? -1 : static_cast<int64_t>(groupCount),
-                        calculatedDimention == 1 ? -1 : static_cast<int64_t>(outputChannelsCount / groupCount),
-                        static_cast<int64_t>(inputChannelsPerGroup),
-                        static_cast<int64_t>(kernelSize),
-                        static_cast<int64_t>(kernelSize) }),
-            true);
+        if (addReshape) {
+            weights = std::make_shared<ngraph::opset1::Reshape>(
+                weights,
+                ngraph::opset1::Constant::create(
+                    element::i64,
+                    Shape{ static_cast<size_t>(rankLength) + 1ul },
+                    rankLength == 3 ?
+                        std::vector<int64_t> {
+                            calculatedDimention == 0 ? -1 : static_cast<int64_t>(groupCount),
+                            calculatedDimention == 1 ? -1 : static_cast<int64_t>(outputChannelsCount / groupCount),
+                            static_cast<int64_t>(inputChannelsPerGroup),
+                            static_cast<int64_t>(kernelSize) } :
+                        std::vector<int64_t> {
+                            calculatedDimention == 0 ? -1 : static_cast<int64_t>(groupCount),
+                            calculatedDimention == 1 ? -1 : static_cast<int64_t>(outputChannelsCount / groupCount),
+                            static_cast<int64_t>(inputChannelsPerGroup),
+                            static_cast<int64_t>(kernelSize),
+                            static_cast<int64_t>(kernelSize) }),
+                true);
+        }
     }
 
     return weights;
@@ -253,7 +260,8 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::get(
     const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights,
     const ngraph::element::Type precisionAfterOperation,
     const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter,
-    const ngraph::element::Type precisionAfterDequantization) {
+    const ngraph::element::Type precisionAfterDequantization,
+    const bool addReshape) {
     const auto rankLength = inputShape.rank().is_dynamic() ? 4 : inputShape.rank().get_length();
     OPENVINO_ASSERT(rankLength == 3 || rankLength == 4, "not supported input shape rank: ", rankLength);
 
@@ -269,9 +277,6 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::get(
     const size_t outputChannelsInGroup = outputChannelsCount / groupCount;
 
     const size_t weightsSize = weightsConst->cast_vector<float>().size();
-    if ((weightsSize != 1ul) && (weightsSize != (inputChannelsCount * outputChannelsCount))) {
-        throw std::runtime_error("unexpected actual weights values size");
-    }
 
     std::shared_ptr<ngraph::Node> weights;
     if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) {
@@ -293,7 +298,8 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::get(
             kernelSize,
             weightsConst->cast_vector<float>(),
             fakeQuantizeOnWeights,
-            dequantizationOnWeights);
+            dequantizationOnWeights,
+            addReshape);
     }
 
     auto convolutionOriginal = ngraph::opset1::GroupConvolution(

From 44d6d97871b81bd4c36ec358f760adccecc25310 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 23 Mar 2023 14:47:54 +0100
Subject: [PATCH 062/296] DOCS shift to rst - OpenVINO 2.0 Deployment (#16509)

---
 .../migration_ov_2_0/deployment_migration.md  | 212 ++++++++++--------
 docs/OV_Runtime_UG/migration_ov_2_0/intro.md  |  90 +++++---
 docs/{img => _static/images}/tf_openvino.svg  |   0
 3 files changed, 168 insertions(+), 134 deletions(-)
 rename docs/{img => _static/images}/tf_openvino.svg (100%)

diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md b/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md
index f5d9a1c4213ca1..46d8a693094cc9 100644
--- a/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md
@@ -1,151 +1,161 @@
 # Installation & Deployment {#openvino_2_0_deployment}
 
+@sphinxdirective
+
 One of the main concepts for OpenVINO™ API 2.0 is being "easy to use", which includes:
+
 * Simplification of migration from different frameworks to OpenVINO.
-* Organization of OpenVINO. 
+* Organization of OpenVINO.
 * Usage of development tools.
 * Development and deployment of OpenVINO-based applications.
 
+
 To accomplish that, the 2022.1 release OpenVINO introduced significant changes to the installation and deployment processes. This guide will walk you through these changes.
 
-## The Installer Package Contains OpenVINO™ Runtime Only
+The Installer Package Contains OpenVINO™ Runtime Only
+#####################################################
 
-Since OpenVINO 2022.1, development tools have been distributed only via [PyPI](https://pypi.org/project/openvino-dev/), and are no longer included in the OpenVINO installer package. For a list of these components, refer to the [installation overview](../../install_guides/installing-openvino-overview.md) guide. Benefits of this approach include:
+Since OpenVINO 2022.1, development tools have been distributed only via `PyPI <https://pypi.org/project/openvino-dev/>`__, and are no longer included in the OpenVINO installer package. For a list of these components, refer to the :doc:`installation overview <openvino_docs_install_guides_overview>` guide. Benefits of this approach include:
 
-* simplification of the user experience - in previous versions, installation and usage of OpenVINO Development Tools differed from one distribution type to another (the OpenVINO installer vs. PyPI), 
+* simplification of the user experience - in previous versions, installation and usage of OpenVINO Development Tools differed from one distribution type to another (the OpenVINO installer vs. PyPI),
 * ensuring that dependencies are handled properly via the PIP package manager, and support virtual environments of development tools.
 
 The structure of the OpenVINO 2022.1 installer package has been organized as follows:
 
-- The `runtime` folder includes headers, libraries and CMake interfaces.
-- The `tools` folder contains [the compile tool](../../../tools/compile_tool/README.md), [deployment manager](../../OV_Runtime_UG/deployment/deployment-manager-tool.md), and a set of `requirements.txt` files with links to the corresponding versions of the `openvino-dev` package.
-- The `python` folder contains the Python version for OpenVINO Runtime.
+* The ``runtime`` folder includes headers, libraries and CMake interfaces.
+* The ``tools`` folder contains :doc:`the compile tool <openvino_inference_engine_tools_compile_tool_README>`, :doc:`deployment manager <openvino_docs_install_guides_deployment_manager_tool>`, and a set of ``requirements.txt`` files with links to the corresponding versions of the ``openvino-dev`` package.
+* The ``python`` folder contains the Python version for OpenVINO Runtime.
 
-## Installing OpenVINO Development Tools via PyPI
+Installing OpenVINO Development Tools via PyPI
+##############################################
 
 Since OpenVINO Development Tools is no longer in the installer package, the installation process has also changed. This section describes it through a comparison with previous versions.
 
-### For Versions Prior to 2022.1
+For Versions Prior to 2022.1
+++++++++++++++++++++++++++++
+
+In previous versions, OpenVINO Development Tools was a part of the main package. After the package was installed, to convert models (for example, TensorFlow), you needed to install additional dependencies by using the requirement files, such as ``requirements_tf.txt``, install Post-Training Optimization tool and Accuracy Checker tool via the ``setup.py`` scripts, and then use the ``setupvars`` scripts to make the tools available to the following command:
 
-In previous versions, OpenVINO Development Tools was a part of the main package. After the package was installed, to convert models (for example, TensorFlow), you needed to install additional dependencies by using the requirement files, such as `requirements_tf.txt`, install Post-Training Optimization tool and Accuracy Checker tool via the `setup.py` scripts, and then use the `setupvars` scripts to make the tools available to the following command:
+.. code-block:: sh
 
-```sh
-$ mo.py -h
-```
+   $ mo.py -h
 
-### For 2022.1 and After
 
-In OpenVINO 2022.1 and later, you can install the development tools only from a [PyPI](https://pypi.org/project/openvino-dev/) repository, using the following command (taking TensorFlow as an example):
+For 2022.1 and After
+++++++++++++++++++++
 
-```sh
-$ python3 -m pip install -r <INSTALL_DIR>/tools/requirements_tf.txt 
-```
+In OpenVINO 2022.1 and later, you can install the development tools only from a `PyPI <https://pypi.org/project/openvino-dev/>`__ repository, using the following command (taking TensorFlow as an example):
 
-This will install all the development tools and additional components necessary to work with TensorFlow via the `openvino-dev` package (see **Step 4. Install the Package** on the [PyPI page](https://pypi.org/project/openvino-dev/) for parameters of other frameworks).
+.. code-block:: sh
+
+   $ python3 -m pip install -r <INSTALL_DIR>/tools/requirements_tf.txt 
+
+
+This will install all the development tools and additional components necessary to work with TensorFlow via the ``openvino-dev`` package (see **Step 4. Install the Package** on the `PyPI page <https://pypi.org/project/openvino-dev/>`__ for parameters of other frameworks).
 
 Then, the tools can be used by commands like:
 
-```sh
-$ mo -h
-$ pot -h
-```
+.. code-block:: sh
+
+   $ mo -h
+   $ pot -h
 
-Installation of any other dependencies is not required. For more details on the installation steps, see the [Install OpenVINO Development Tools](../../install_guides/installing-model-dev-tools.md).
 
-## Interface Changes for Building C/C++ Applications
+Installation of any other dependencies is not required. For more details on the installation steps, see the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>`.
+
+Interface Changes for Building C/C++ Applications
+#################################################
 
 The new OpenVINO Runtime with its API 2.0 has also brought some changes for building C/C++ applications.
 
-### CMake Interface
+CMake Interface
+++++++++++++++++++++
 
 The CMake interface has been changed as follows:
 
 **With Inference Engine of previous versions**:
 
-```cmake
-find_package(InferenceEngine REQUIRED)
-find_package(ngraph REQUIRED)
-add_executable(ie_ngraph_app main.cpp)
-target_link_libraries(ie_ngraph_app PRIVATE ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES})
-```
+.. code-block:: cmake
+
+   find_package(InferenceEngine REQUIRED)
+   find_package(ngraph REQUIRED)
+   add_executable(ie_ngraph_app main.cpp)
+   target_link_libraries(ie_ngraph_app PRIVATE ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES})
+
 
 **With OpenVINO Runtime 2022.1 (API 2.0)**:
 
-```cmake
-find_package(OpenVINO REQUIRED)
-add_executable(ov_app main.cpp)
-target_link_libraries(ov_app PRIVATE openvino::runtime)
+.. code-block:: cmake
+
+   find_package(OpenVINO REQUIRED)
+   add_executable(ov_app main.cpp)
+   target_link_libraries(ov_app PRIVATE openvino::runtime)
 
-add_executable(ov_c_app main.c)
-target_link_libraries(ov_c_app PRIVATE openvino::runtime::c)
-```
+   add_executable(ov_c_app main.c)
+   target_link_libraries(ov_c_app PRIVATE openvino::runtime::c)
 
-### Native Interfaces
+
+Native Interfaces
+++++++++++++++++++++
 
 It is possible to build applications without the CMake interface by using: MSVC IDE, UNIX makefiles, and any other interface, which has been changed as shown here:
 
 **With Inference Engine of previous versions**:
 
-@sphinxdirective
-
 .. tab:: Include dirs
 
-  .. code-block:: sh
-    
-    <INSTALL_DIR>/deployment_tools/inference_engine/include
-    <INSTALL_DIR>/deployment_tools/ngraph/include
+   .. code-block:: sh
+
+      <INSTALL_DIR>/deployment_tools/inference_engine/include
+      <INSTALL_DIR>/deployment_tools/ngraph/include
 
 .. tab:: Path to libs
 
-  .. code-block:: sh
+   .. code-block:: sh
 
-    <INSTALL_DIR>/deployment_tools/inference_engine/lib/intel64/Release
-    <INSTALL_DIR>/deployment_tools/ngraph/lib/
+      <INSTALL_DIR>/deployment_tools/inference_engine/lib/intel64/Release
+      <INSTALL_DIR>/deployment_tools/ngraph/lib/
 
 .. tab:: Shared libs
 
-  .. code-block:: sh
+   .. code-block:: sh
 
-    // UNIX systems
-    inference_engine.so ngraph.so
+      // UNIX systems
+      inference_engine.so ngraph.so
 
-    // Windows
-    inference_engine.dll ngraph.dll
+      // Windows
+      inference_engine.dll ngraph.dll
 
 .. tab:: (Windows) .lib files
 
-  .. code-block:: sh
-  
-    ngraph.lib
-    inference_engine.lib
+   .. code-block:: sh
 
-@endsphinxdirective
+      ngraph.lib
+      inference_engine.lib
 
 **With OpenVINO Runtime 2022.1 (API 2.0)**:
 
-@sphinxdirective
-
 .. tab:: Include dirs
 
-  .. code-block:: sh
+   .. code-block:: sh
 
-    <INSTALL_DIR>/runtime/include
+      <INSTALL_DIR>/runtime/include
 
 .. tab:: Path to libs
 
-  .. code-block:: sh
+   .. code-block:: sh
 
-    <INSTALL_DIR>/runtime/lib/intel64/Release
+      <INSTALL_DIR>/runtime/lib/intel64/Release
 
 .. tab:: Shared libs
 
-  .. code-block:: sh
+   .. code-block:: sh
 
-    // UNIX systems
-    openvino.so
+      // UNIX systems
+      openvino.so
 
-    // Windows
-    openvino.dll
+      // Windows
+      openvino.dll
 
 .. tab:: (Windows) .lib files
 
@@ -153,49 +163,55 @@ It is possible to build applications without the CMake interface by using: MSVC
 
     openvino.lib
 
-@endsphinxdirective
 
-## Clearer Library Structure for Deployment
+Clearer Library Structure for Deployment
+########################################
 
-OpenVINO 2022.1 introduced a reorganization of the libraries, to make deployment easier. In the previous versions, it was required to use several libraries to perform deployment steps. Now you can just use `openvino` or `openvino_c` based on your developing language,  with the necessary plugins to complete your task. For example, `openvino_intel_cpu_plugin` and `openvino_ir_frontend` plugins will enable loading OpenVINO IRs and performing inference on the CPU device (for more details, see the [Local distribution with OpenVINO](../deployment/local-distribution.md)).
+OpenVINO 2022.1 introduced a reorganization of the libraries, to make deployment easier. In the previous versions, it was required to use several libraries to perform deployment steps. Now you can just use ``openvino`` or ``openvino_c`` based on your developing language,  with the necessary plugins to complete your task. For example, ``openvino_intel_cpu_plugin`` and ``openvino_ir_frontend`` plugins will enable loading OpenVINO IRs and performing inference on the CPU device (for more details, see the :doc:`Local distribution with OpenVINO <openvino_docs_deploy_local_distribution>`).
 
 Below are detailed comparisons of the library structure between OpenVINO 2022.1 and the previous versions:
 
-* Starting with 2022.1 release, a single core library with all the functionalities (`openvino` for C++ Runtime, `openvino_c` for Inference Engine API C interface) is used, instead of the previous core libraries which contained `inference_engine`, `ngraph`, `inference_engine_transformations` and `inference_engine_lp_transformations`.
-* The optional `inference_engine_preproc` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` is used) has been renamed to `openvino_gapi_preproc` and deprecated in 2022.1. For more details, see the [Preprocessing capabilities of OpenVINO API 2.0](preprocessing.md).
+* Starting with 2022.1 release, a single core library with all the functionalities (``openvino`` for C++ Runtime, ``openvino_c`` for Inference Engine API C interface) is used, instead of the previous core libraries which contained ``inference_engine``, ``ngraph``, ``inference_engine_transformations`` and ``inference_engine_lp_transformations``.
+* The optional ``inference_engine_preproc`` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat <classInferenceEngine_1_1PreProcessInfo.html#doxid-class-inference-engine-1-1-pre-process-info-1a3a10ba0d562a2268fe584d4d2db94cac>`__ or `InferenceEngine::PreProcessInfo::setResizeAlgorithm <classInferenceEngine_1_1PreProcessInfo.html#doxid-class-inference-engine-1-1-pre-process-info-1a0c083c43d01c53c327f09095e3e3f004>`__ is used) has been renamed to ``openvino_gapi_preproc`` and deprecated in 2022.1. For more details, see the :doc:`Preprocessing capabilities of OpenVINO API 2.0 <openvino_2_0_preprocessing>`.
+
 * The libraries of plugins have been renamed as follows:
-   * `openvino_intel_cpu_plugin` is used for [CPU](../supported_plugins/CPU.md) device instead of `MKLDNNPlugin`.
-   * `openvino_intel_gpu_plugin` is used for [GPU](../supported_plugins/GPU.md) device instead of `clDNNPlugin`.
-   * `openvino_auto_plugin` is used for [Auto-Device Plugin](../auto_device_selection.md).
+
+  * ``openvino_intel_cpu_plugin`` is used for :doc:`CPU <openvino_docs_OV_UG_supported_plugins_CPU>` device instead of ``MKLDNNPlugin``.
+  * ``openvino_intel_gpu_plugin`` is used for :doc:`GPU <openvino_docs_OV_UG_supported_plugins_GPU>` device instead of ``clDNNPlugin``.
+  * ``openvino_auto_plugin`` is used for :doc:`Auto-Device Plugin <openvino_docs_OV_UG_supported_plugins_AUTO>`.
+
 * The plugins for reading and converting models have been changed as follows:
-   * `openvino_ir_frontend` is used to read IRs instead of `inference_engine_ir_reader`.
-   * `openvino_onnx_frontend` is used to read ONNX models instead of `inference_engine_onnx_reader` (with its dependencies). 
-   * `openvino_paddle_frontend` is added in 2022.1 to read PaddlePaddle models.
+
+  * ``openvino_ir_frontend`` is used to read IRs instead of ``inference_engine_ir_reader``.
+  * ``openvino_onnx_frontend`` is used to read ONNX models instead of ``inference_engine_onnx_reader`` (with its dependencies).
+  * ``openvino_paddle_frontend`` is added in 2022.1 to read PaddlePaddle models.
 
 <!-----
 Older versions of OpenVINO had several core libraries and plugin modules:
-- Core: `inference_engine`, `ngraph`, `inference_engine_transformations`, `inference_engine_lp_transformations`
-- Optional `inference_engine_preproc` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` are used)
+- Core: ``inference_engine``, ``ngraph``, ``inference_engine_transformations``, ``inference_engine_lp_transformations``
+- Optional ``inference_engine_preproc`` preprocessing library (if ``InferenceEngine::PreProcessInfo::setColorFormat`` or ``InferenceEngine::PreProcessInfo::setResizeAlgorithm`` are used)
 - Plugin libraries:
- - `MKLDNNPlugin` for [CPU](../supported_plugins/CPU.md) device
- - `clDNNPlugin` for [GPU](../supported_plugins/GPU.md) device
- - `MultiDevicePlugin` for [Multi-device execution](../multi_device.md)
+ - ``MKLDNNPlugin`` for :doc:`CPU <openvino_docs_OV_UG_supported_plugins_CPU>` device
+ - ``clDNNPlugin`` for :doc:`GPU <openvino_docs_OV_UG_supported_plugins_GPU>` device
+ - ``MultiDevicePlugin`` for :doc:`Multi-device execution <openvino_docs_OV_UG_Running_on_multiple_devices>`
  - others
 - Plugins to read and convert a model:
- - `inference_engine_ir_reader` to read OpenVINO IR
- - `inference_engine_onnx_reader` (with its dependencies) to read ONNX models
+ - ``inference_engine_ir_reader`` to read OpenVINO IR
+ - ``inference_engine_onnx_reader`` (with its dependencies) to read ONNX models
 Now, the modularity is more clear:
-- A single core library with all the functionality `openvino` for C++ runtime
-- `openvino_c` with Inference Engine API C interface
-- **Deprecated** Optional `openvino_gapi_preproc` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` are used)
- - Use [preprocessing capabilities from OpenVINO 2.0](../preprocessing_overview.md)
+- A single core library with all the functionality ``openvino`` for C++ runtime
+- ``openvino_c`` with Inference Engine API C interface
+- **Deprecated** Optional ``openvino_gapi_preproc`` preprocessing library (if ``InferenceEngine::PreProcessInfo::setColorFormat`` or ``InferenceEngine::PreProcessInfo::setResizeAlgorithm`` are used)
+ - Use :doc:`preprocessing capabilities of OpenVINO API 2.0 <openvino_2_0_preprocessing>`
 - Plugin libraries with clear names:
- - `openvino_intel_cpu_plugin`
- - `openvino_intel_gpu_plugin`
- - `openvino_auto_plugin`
+ - ``openvino_intel_cpu_plugin``
+ - ``openvino_intel_gpu_plugin``
+ - ``openvino_auto_plugin``
  - others
 - Plugins to read and convert models:
- - `openvino_ir_frontend` to read OpenVINO IR
- - `openvino_onnx_frontend` to read ONNX models
- - `openvino_paddle_frontend` to read Paddle models
+ - ``openvino_ir_frontend`` to read OpenVINO IR
+ - ``openvino_onnx_frontend`` to read ONNX models
+ - ``openvino_paddle_frontend`` to read Paddle models
 ---->
+
+@endsphinxdirective
diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
index 290610326492a8..80438604a56eb1 100644
--- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
@@ -12,83 +12,101 @@
    openvino_2_0_preprocessing
    openvino_2_0_model_creation
 
-@endsphinxdirective
 
-This guide introduces the new OpenVINO™ API: API 2.0, as well as the new OpenVINO IR model format: IR v11. Here, you will find comparisons of their "old" and "new" versions. 
+This guide introduces the new OpenVINO™ API: API 2.0, as well as the new OpenVINO IR model format: IR v11. Here, you will find comparisons of their "old" and "new" versions.
 
-### Introduction of API 2.0
+Introduction of API 2.0
+#######################
 
 Versions of OpenVINO prior to 2022.1 required changes in the application logic when migrating an app from other frameworks, such as TensorFlow, ONNX Runtime, PyTorch, PaddlePaddle, etc. The changes were required because:
 
-- Model Optimizer changed input precisions for some inputs. For example, neural language processing models with `I64` inputs were changed to include `I32` ones.
-- Model Optimizer changed layouts for TensorFlow models (see the [Layouts in OpenVINO](../layout_overview.md)). It lead to unusual requirement of using the input data with a different layout than that of the framework:
-![tf_openvino]
-- Inference Engine API (`InferenceEngine::CNNNetwork`) applied some conversion rules for input and output precisions due to limitations in device plugins.
+- Model Optimizer changed input precisions for some inputs. For example, neural language processing models with ``I64`` inputs were changed to include ``I32`` ones.
+- Model Optimizer changed layouts for TensorFlow models (see the :doc:`Layouts in OpenVINO <openvino_docs_OV_UG_Layout_Overview>`). It lead to unusual requirement of using the input data with a different layout than that of the framework:
+
+.. image:: _static/images/tf_openvino.svg
+   :alt: tf_openvino
+
+- Inference Engine API (`InferenceEngine::CNNNetwork <classInferenceEngine_1_1CNNNetwork.html#doxid-class-inference-engine-1-1-c-n-n-network>`__) applied some conversion rules for input and output precisions due to limitations in device plugins.
 - Users needed to specify input shapes during model conversions in Model Optimizer, and work with static shapes in the application.
 
-OpenVINO™ 2022.1 has introduced API 2.0 (also called OpenVINO API v2) to align the logic of working with models as it is done in their origin frameworks - no layout and precision changes, operating with tensor names and indices to address inputs and outputs. OpenVINO Runtime has combined Inference Engine API used for inference and nGraph API targeted to work with models and operations. API 2.0 has a common structure, naming convention styles, namespaces, and removes duplicated structures. For more details, see the [Changes to Inference Pipeline in OpenVINO API v2](common_inference_pipeline.md).
+OpenVINO™ 2022.1 has introduced API 2.0 (also called OpenVINO API v2) to align the logic of working with models as it is done in their origin frameworks - no layout and precision changes, operating with tensor names and indices to address inputs and outputs. OpenVINO Runtime has combined Inference Engine API used for inference and nGraph API targeted to work with models and operations. API 2.0 has a common structure, naming convention styles, namespaces, and removes duplicated structures. For more details, see the :doc:`Changes to Inference Pipeline in OpenVINO API v2 <openvino_2_0_inference_pipeline>`.
+
+.. note::
+
+   Your existing applications will continue to work with OpenVINO Runtime 2022.1, as normal. Although, migration to API 2.0 is strongly recommended. This will allow you to use additional features, such as :doc:`Preprocessing <openvino_docs_OV_UG_Preprocessing_Overview>` and :doc:`Dynamic shapes support <openvino_docs_OV_UG_DynamicShapes>`.
 
-> **NOTE**: Your existing applications will continue to work with OpenVINO Runtime 2022.1, as normal. Although, migration to API 2.0 is strongly recommended. This will allow you to use additional features, such as [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../ov_dynamic_shapes.md).
 
-### The New OpenVINO IR v11
+The New OpenVINO IR v11
+#######################
 
-To support these features, OpenVINO has introduced OpenVINO IR v11, which is now the default version for Model Optimizer. The model represented in OpenVINO IR v11 fully matches the original model in the original framework format in terms of inputs and outputs. It is also not required to specify input shapes during conversion, which results in OpenVINO IR v11 containing `-1` to denote undefined dimensions. For more details on how to fully utilize this feature, see [Working with dynamic shapes](../ov_dynamic_shapes.md). For information on how to reshape to static shapes in application, see [Changing input shapes](../ShapeInference.md).
+To support these features, OpenVINO has introduced OpenVINO IR v11, which is now the default version for Model Optimizer. The model represented in OpenVINO IR v11 fully matches the original model in the original framework format in terms of inputs and outputs. It is also not required to specify input shapes during conversion, which results in OpenVINO IR v11 containing ``-1`` to denote undefined dimensions. For more details on how to fully utilize this feature, see :doc:`Working with dynamic shapes <openvino_docs_OV_UG_DynamicShapes>`. For information on how to reshape to static shapes in application, see :doc:`Changing input shapes <openvino_docs_OV_UG_ShapeInference>`.
 
 OpenVINO IR v11 is fully compatible with applications written with the Inference Engine API used by older versions of OpenVINO. This backward compatibility is allowed thanks to additional runtime information included in OpenVINO IR v11. This means that when OpenVINO IR v11 is read by an application based on Inference Engine, it is internally converted to OpenVINO IR v10.
 
 OpenVINO IR v11 is supported by all OpenVINO Development tools including Post-Training Optimization Tool, Benchmark app, etc.
 
-### Backward Compatibility for OpenVINO IR v10
+Backward Compatibility for OpenVINO IR v10
+##########################################
 
-API 2.0 also supports backward compatibility for models of OpenVINO IR v10. If you have OpenVINO IR v10 files, they can also be fed to OpenVINO Runtime. For more details, see the [migration steps](common_inference_pipeline.md).
+API 2.0 also supports backward compatibility for models of OpenVINO IR v10. If you have OpenVINO IR v10 files, they can also be fed to OpenVINO Runtime. For more details, see the :doc:`migration steps <openvino_2_0_inference_pipeline>`.
 
 Some of the OpenVINO Development Tools also support both OpenVINO IR v10 and v11 as an input:
-- Accuracy checker uses API 2.0 for model accuracy measurement by default. It also supports switching to the old API by using the `--use_new_api False` command-line parameter. Both launchers accept OpenVINO IR v10 and v11, but in some cases configuration files should be updated. For more details, see the [Accuracy Checker documentation](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md).
-- [Compile tool](../../../tools/compile_tool/README.md) compiles the model to be used in API 2.0 by default. To use the resulting compiled blob under the Inference Engine API, the additional `ov_api_1_0` option should be passed.
 
-However, Post-Training Optimization Tool of OpenVINO 2022.1 does not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files.
+- Accuracy checker uses API 2.0 for model accuracy measurement by default. It also supports switching to the old API by using the ``--use_new_api False`` command-line parameter. Both launchers accept OpenVINO IR v10 and v11, but in some cases configuration files should be updated. For more details, see the `Accuracy Checker documentation <https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md>`__.
+- :doc:`Compile tool <openvino_inference_engine_tools_compile_tool_README>` compiles the model to be used in API 2.0 by default. To use the resulting compiled blob under the Inference Engine API, the additional ``ov_api_1_0`` option should be passed.
 
-> **NOTE**: To quantize your OpenVINO IR v10 models to run with OpenVINO 2022.1, download and use Post-Training Optimization Tool of OpenVINO 2021.4.
+However, Post-Training Optimization Tool of OpenVINO 2022.1 does not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files.
 
-@sphinxdirective
+.. note::
 
-.. _differences_api20_ie:
+   To quantize your OpenVINO IR v10 models to run with OpenVINO 2022.1, download and use Post-Training Optimization Tool of OpenVINO 2021.4.
 
-@endsphinxdirective
 
+.. _differences_api20_ie:
 
-### Differences in API 2.0 and Inference Engine API Behaviors
+Differences in API 2.0 and Inference Engine API Behaviors
+#########################################################
 
 Inference Engine and nGraph APIs do not become deprecated with the introduction of the new API, and they can still be used in applications. However, it is highly recommended to migrate to API 2.0, as it offers more features (further extended in future releases), such as:
-- [Working with dynamic shapes](../ov_dynamic_shapes.md), which increases performance when working with compatible models such as NLP (Neural Language Processing) and super-resolution models.
-- [Preprocessing of the model](../preprocessing_overview.md), which adds preprocessing operations to inference models and fully occupies the accelerator, freeing CPU resources.
+
+- :doc:`Working with dynamic shapes <openvino_docs_OV_UG_DynamicShapes>`, which increases performance when working with compatible models such as NLP (Neural Language Processing) and super-resolution models.
+- :doc:`Preprocessing of the model <openvino_docs_OV_UG_Preprocessing_Overview>`, which adds preprocessing operations to inference models and fully occupies the accelerator, freeing CPU resources.
 
 To understand the differences between Inference Engine API and API 2.0, see the definitions of two types of behaviors first:
+
 - **Old behavior** of OpenVINO assumes that:
+
   - Model Optimizer can change input element types and order of dimensions (layouts) for the model from the original framework.
   - Inference Engine can override input and output element types.
-  - Inference Engine API uses operation names to address inputs and outputs (e.g. InferenceEngine::InferRequest::GetBlob).
+  - Inference Engine API uses operation names to address inputs and outputs (e.g. `InferenceEngine::InferRequest::GetBlob <classInferenceEngine_1_1InferRequest.html#doxid-class-inference-engine-1-1-infer-request-1a9601a4cda3f309181af34feedf1b914c>`__).
   - Inference Engine API does not support compiling of models with dynamic input shapes.
+
 - **New behavior** implemented in 2022.1 assumes full model alignment with the framework:
+
   - Model Optimizer preserves input element types and order of dimensions (layouts), and stores tensor names from the original models.
-  - OpenVINO Runtime 2022.1 reads models in any format (OpenVINO IR v10, OpenVINO IR v11, TensorFlow (check [TensorFlow Frontend Capabilities and Limitations](../../resources/tensorflow_frontend.md)), ONNX, PaddlePaddle, etc.).
+  - OpenVINO Runtime 2022.1 reads models in any format (OpenVINO IR v10, OpenVINO IR v11, TensorFlow (check :doc:`TensorFlow Frontend Capabilities and Limitations <openvino_docs_MO_DG_TensorFlow_Frontend>`), ONNX, PaddlePaddle, etc.).
   - API 2.0 uses tensor names for addressing, which is the standard approach among the compatible model frameworks.
   - API 2.0 can also address input and output tensors by the index. Some model formats like ONNX are sensitive to the input and output order, which is preserved by OpenVINO 2022.1.
 
 The table below demonstrates which behavior, **old** or **new**, is used for models based on the two APIs.
 
-|               API             | OpenVINO IR v10  | OpenVINO IR v11  | ONNX Files | Models Created in Code |
-|-------------------------------|------------------|------------------|------------|------------------------|
-|Inference Engine / nGraph APIs |              Old |              Old |        Old |                    Old |
-|API 2.0                        |              Old |              New |        New |                    New |
++--------------------------------+-----------------+-----------------+-----------------+------------------------+
+| API                            | OpenVINO IR v10 | OpenVINO IR v11 | ONNX Files      | Models Created in Code |
++================================+=================+=================+=================+========================+
+| Inference Engine / nGraph APIs | Old             | Old             | Old             | Old                    |
++--------------------------------+-----------------+-----------------+-----------------+------------------------+
+| API 2.0                        | Old             | New             | New             | New                    |
++--------------------------------+-----------------+-----------------+-----------------+------------------------+
 
-### More Information
+More Information
+####################
 
 See the following pages to understand how to migrate Inference Engine-based applications to API 2.0:
- - [Installation & Deployment](deployment_migration.md)
- - [OpenVINO™ Common Inference pipeline](common_inference_pipeline.md)
- - [Preprocess your model](./preprocessing.md)
- - [Configure device](./configure_devices.md)
- - [OpenVINO™ Model Creation](graph_construction.md)
 
-[tf_openvino]: ../../img/tf_openvino.svg
+- :doc:`Installation & Deployment <openvino_2_0_deployment>`
+- :doc:`OpenVINO™ Common Inference pipeline <openvino_2_0_inference_pipeline>`
+- :doc:`Preprocess your model <openvino_2_0_preprocessing>`
+- :doc:`Configure device <openvino_2_0_configure_devices>`
+- :doc:`OpenVINO™ Model Creation <openvino_2_0_model_creation>`
+
+@endsphinxdirective
diff --git a/docs/img/tf_openvino.svg b/docs/_static/images/tf_openvino.svg
similarity index 100%
rename from docs/img/tf_openvino.svg
rename to docs/_static/images/tf_openvino.svg

From de0a4e16fbb9baf88f93a72dbcedff348d27b695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Thu, 23 Mar 2023 16:33:54 +0100
Subject: [PATCH 063/296] TopK 11 exposed to Python (#16501)

---
 .../compatibility/ngraph/opset11/__init__.py  |  2 +-
 .../src/compatibility/ngraph/opset11/ops.py   | 32 ++++++++++++++++++-
 .../src/openvino/runtime/opset11/__init__.py  |  2 +-
 .../src/openvino/runtime/opset11/ops.py       | 30 +++++++++++++++++
 .../python/tests/test_graph/test_create_op.py | 13 ++++++++
 .../test_ngraph/test_create_op.py             | 13 ++++++++
 6 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
index 91f84b81f415cd..047c93e4cc03d3 100644
--- a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
+++ b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py
@@ -170,7 +170,7 @@
 from ngraph.opset1.ops import tanh
 from ngraph.opset1.ops import tensor_iterator
 from ngraph.opset1.ops import tile
-from ngraph.opset3.ops import topk
+from ngraph.opset11.ops import topk
 from ngraph.opset1.ops import transpose
 from ngraph.opset10.ops import unique
 from ngraph.opset1.ops import unsqueeze
diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py
index 434b778b246cf8..3a4b54059ca6fc 100644
--- a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py
+++ b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py
@@ -34,7 +34,7 @@ def interpolate(
     axes: Optional[NodeInput] = None,
     name: Optional[str] = None,
 ) -> Node:
-    """Perfors the interpolation of the input tensor.
+    """Performs the interpolation of the input tensor.
 
     :param  image:         The node providing input tensor with data for interpolation.
     :param  scales_or_sizes:
@@ -75,3 +75,33 @@ def interpolate(
     inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes)
 
     return _get_node_factory_opset11().create("Interpolate", inputs, attrs)
+
+
+@nameable_op
+def topk(
+    data: NodeInput,
+    k: NodeInput,
+    axis: int,
+    mode: str,
+    sort: str,
+    index_element_type: str = "i32",
+    stable: bool = False,
+    name: Optional[str] = None,
+) -> Node:
+    """Return a node which performs TopK.
+
+    :param data: Input data.
+    :param k: K.
+    :param axis: TopK Axis.
+    :param mode: Compute TopK largest ('max') or smallest ('min')
+    :param sort: Order of output elements (sort by: 'none', 'index' or 'value')
+    :param index_element_type: Type of output tensor with indices.
+    :param stable: Specifies whether the equivalent elements should maintain
+                   their relative order from the input tensor during sorting.
+    :return: The new node which performs TopK
+    """
+    return _get_node_factory_opset11().create(
+        "TopK",
+        as_nodes(data, k),
+        {"axis": axis, "mode": mode, "sort": sort, "index_element_type": index_element_type, "stable": stable},
+    )
diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py
index 79c7068bf83d87..3e867f548699b0 100644
--- a/src/bindings/python/src/openvino/runtime/opset11/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py
@@ -171,7 +171,7 @@
 from openvino.runtime.opset1.ops import tanh
 from openvino.runtime.opset1.ops import tensor_iterator
 from openvino.runtime.opset1.ops import tile
-from openvino.runtime.opset3.ops import topk
+from openvino.runtime.opset11.ops import topk
 from openvino.runtime.opset1.ops import transpose
 from openvino.runtime.opset10.ops import unique
 from openvino.runtime.opset1.ops import unsqueeze
diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/runtime/opset11/ops.py
index 2a54db0069ebd1..235b0e0ef37af5 100644
--- a/src/bindings/python/src/openvino/runtime/opset11/ops.py
+++ b/src/bindings/python/src/openvino/runtime/opset11/ops.py
@@ -75,3 +75,33 @@ def interpolate(
     inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes)
 
     return _get_node_factory_opset11().create("Interpolate", inputs, attrs)
+
+
+@nameable_op
+def topk(
+    data: NodeInput,
+    k: NodeInput,
+    axis: int,
+    mode: str,
+    sort: str,
+    index_element_type: str = "i32",
+    stable: bool = False,
+    name: Optional[str] = None,
+) -> Node:
+    """Return a node which performs TopK.
+
+    :param data: Input data.
+    :param k: K.
+    :param axis: TopK Axis.
+    :param mode: Compute TopK largest ('max') or smallest ('min')
+    :param sort: Order of output elements (sort by: 'none', 'index' or 'value')
+    :param index_element_type: Type of output tensor with indices.
+    :param stable: Specifies whether the equivalent elements should maintain
+                   their relative order from the input tensor during sorting.
+    :return: The new node which performs TopK
+    """
+    return _get_node_factory_opset11().create(
+        "TopK",
+        as_nodes(data, k),
+        {"axis": axis, "mode": mode, "sort": sort, "index_element_type": index_element_type, "stable": stable},
+    )
diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py
index f76ed01641a6d5..da8cfca15c1c51 100644
--- a/src/bindings/python/tests/test_graph/test_create_op.py
+++ b/src/bindings/python/tests/test_graph/test_create_op.py
@@ -2300,3 +2300,16 @@ def test_unique_opset10():
     assert node.get_output_element_type(1) == Type.i64
     assert node.get_output_element_type(2) == Type.i64
     assert node.get_output_element_type(3) == Type.i64
+
+
+def test_topk_opset11():
+    data_shape = [1, 3, 256]
+    data = ov.parameter(data_shape, dtype=np.int32, name="Data")
+    k_val = np.int32(3)
+    axis = np.int32(-1)
+    node = ov.topk(data, k_val, axis, "min", "value", stable=True)
+
+    assert node.get_type_name() == "TopK"
+    assert node.get_output_size() == 2
+    assert list(node.get_output_shape(0)) == [1, 3, 3]
+    assert list(node.get_output_shape(1)) == [1, 3, 3]
diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
index 09fda90564bd01..7ec5a26109ab49 100644
--- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
+++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py
@@ -2412,3 +2412,16 @@ def test_unique_opset10():
     assert node.get_output_element_type(1) == Type.i64
     assert node.get_output_element_type(2) == Type.i64
     assert node.get_output_element_type(3) == Type.i64
+
+
+def test_topk_opset11():
+    data_shape = [1, 3, 256]
+    data = ng.parameter(data_shape, dtype=np.int32, name="Data")
+    k_val = np.int32(3)
+    axis = np.int32(-1)
+    node = ng_opset11.topk(data, k_val, axis, "min", "value", stable=True)
+
+    assert node.get_type_name() == "TopK"
+    assert node.get_output_size() == 2
+    assert list(node.get_output_shape(0)) == [1, 3, 3]
+    assert list(node.get_output_shape(1)) == [1, 3, 3]

From 2755b32fb917696cdfd265dee4a616b0db4cff5e Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 23 Mar 2023 19:34:49 +0400
Subject: [PATCH 064/296] Changed Template plugin public property (#16496)

* Changed template plugin public property

* Add property documentation

* Fixed comments

* Fixed typo
---
 docs/IE_PLUGIN_DG/Intro.md                    |   7 +-
 docs/IE_PLUGIN_DG/Plugin.md                   |   1 +
 docs/IE_PLUGIN_DG/Properties.md               |  10 +
 docs/IE_PLUGIN_DG/layout.xml                  |   1 +
 .../template/{config.hpp => properties.hpp}   |  12 +-
 src/plugins/template/src/compiled_model.cpp   |   7 +-
 src/plugins/template/src/config.cpp           |  11 +-
 src/plugins/template/src/config.hpp           |   1 +
 src/plugins/template/src/plugin.cpp           |   7 +-
 .../behavior/plugin/configuration_tests.cpp   |  10 +-
 .../disable_transformations_test.cpp          |  51 +++++
 .../transformations/preprocessing.cpp         | 183 ------------------
 .../template_transformations_test.cpp         |  58 ------
 13 files changed, 95 insertions(+), 264 deletions(-)
 create mode 100644 docs/IE_PLUGIN_DG/Properties.md
 rename src/plugins/template/include/template/{config.hpp => properties.hpp} (58%)
 create mode 100644 src/plugins/template/tests/functional/transformations/disable_transformations_test.cpp
 delete mode 100644 src/plugins/template/tests/functional/transformations/preprocessing.cpp
 delete mode 100644 src/plugins/template/tests/functional/transformations/template_transformations_test.cpp

diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md
index ed3d101ea4a6bc..8334f2db744714 100644
--- a/docs/IE_PLUGIN_DG/Intro.md
+++ b/docs/IE_PLUGIN_DG/Intro.md
@@ -11,6 +11,7 @@
    Implement Compiled Model Functionality <openvino_docs_ov_plugin_dg_compiled_model>
    Implement Synchronous Inference Request <openvino_docs_ov_plugin_dg_infer_request>
    Implement Asynchronous Inference Request <openvino_docs_ov_plugin_dg_async_infer_request>
+   Provide Plugin Specific Properties <openvino_docs_ov_plugin_dg_properties>
    Implement Remote Context <openvino_docs_ov_plugin_dg_remote_context>
    Implement Remote Tensor <openvino_docs_ov_plugin_dg_remote_tensor>
    openvino_docs_ov_plugin_dg_plugin_build
@@ -45,9 +46,11 @@ OpenVINO plugin dynamic library consists of several main components:
     - Can extract performance counters for an inference pipeline execution profiling.
 4. [Asynchronous Inference Request class](@ref openvino_docs_ov_plugin_dg_async_infer_request):
     - Wraps the [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class and runs pipeline stages in parallel on several task executors based on a device-specific pipeline structure.
-5. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context):
+5. [Plugin specific properties](@ref openvino_docs_ov_plugin_dg_properties):
+    - Provides the plugin specific properties.
+6. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context):
     - Provides the device specific remote context. Context allows to create remote tensors.
-6. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor)
+7. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor)
     - Provides the device specific remote tensor API and implementation.
 
 > **NOTE**: This documentation is written based on the `Template` plugin, which demonstrates plugin 
diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md
index 96326fabcb574a..124852d6cf6d79 100644
--- a/docs/IE_PLUGIN_DG/Plugin.md
+++ b/docs/IE_PLUGIN_DG/Plugin.md
@@ -42,6 +42,7 @@ As an example, a plugin configuration has three value parameters:
 - `perf_counts` - boolean value to identify whether to collect performance counters during [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) execution.
 - `streams_executor_config` - configuration of `ov::threading::IStreamsExecutor` to handle settings of multi-threaded context.
 - `performance_mode` - configuration of `ov::hint::PerformanceMode` to set the performance mode.
+- `disable_transformations` - allows to disable transformations which are applied in the process of model compilation.
 
 ### Plugin Constructor
 
diff --git a/docs/IE_PLUGIN_DG/Properties.md b/docs/IE_PLUGIN_DG/Properties.md
new file mode 100644
index 00000000000000..a8459181e74c42
--- /dev/null
+++ b/docs/IE_PLUGIN_DG/Properties.md
@@ -0,0 +1,10 @@
+# Plugin Properties {#openvino_docs_ov_plugin_dg_properties}
+
+Plugin can provide own device specific properties.
+
+Property Class
+------------------------
+
+OpenVINO API provides the interface ov::Property which allows to define the property and access rights. Based on that, a declaration of plugin specific properties can look as follows: 
+
+@snippet include/template/properties.hpp properties:public_header
diff --git a/docs/IE_PLUGIN_DG/layout.xml b/docs/IE_PLUGIN_DG/layout.xml
index 44137896ee794b..dbd424edc2c8dd 100644
--- a/docs/IE_PLUGIN_DG/layout.xml
+++ b/docs/IE_PLUGIN_DG/layout.xml
@@ -79,6 +79,7 @@
                 </tab>
                 <tab type="user" url="@ref infer_request" visibile="yes" title="Implement Synchronous Inference Request"/>
                 <tab type="user" url="@ref async_infer_request" visibile="yes" title="Implement Asynchronous Inference Request"/>
+                <tab type="user" url="@ref properties" visibile="yes" title="Provide Plugin Specific Properties"/>
                 <tab type="user" url="@ref remote_context" visibile="yes" title="Implement Remote Context"/>
                 <tab type="user" url="@ref remote_tensor" visibile="yes" title="Implement Remote Tensor"/>
             </tab>
diff --git a/src/plugins/template/include/template/config.hpp b/src/plugins/template/include/template/properties.hpp
similarity index 58%
rename from src/plugins/template/include/template/config.hpp
rename to src/plugins/template/include/template/properties.hpp
index 8b0267c13744ef..f00e6e1474fda4 100644
--- a/src/plugins/template/include/template/config.hpp
+++ b/src/plugins/template/include/template/properties.hpp
@@ -3,10 +3,10 @@
 //
 
 /**
- * @brief A header that defines advanced related properties for DLIA plugins.
+ * @brief A header that defines advanced related properties for Template plugin.
  * These properties should be used in set_property() and compile_model() methods of plugins
  *
- * @file template/config.hpp
+ * @file template/properties.hpp
  */
 
 #pragma once
@@ -18,14 +18,14 @@
 namespace ov {
 namespace template_plugin {
 
-// ! [public_header:properties]
+// ! [properties:public_header]
 
 /**
- * @brief Defines the number of throutput streams used by TEMPLATE plugin.
+ * @brief Allows to disable all transformations for execution inside the TEMPLATE plugin.
  */
-static constexpr Property<uint32_t, PropertyMutability::RW> throughput_streams{"THROUGHPUT_STREAMS"};
+static constexpr Property<bool, PropertyMutability::RW> disable_transformations{"DISABLE_TRANSFORMATIONS"};
 
-// ! [public_header:properties]
+// ! [properties:public_header]
 
 }  // namespace template_plugin
 }  // namespace ov
diff --git a/src/plugins/template/src/compiled_model.cpp b/src/plugins/template/src/compiled_model.cpp
index 3bdd06163040de..5961280c12aa29 100644
--- a/src/plugins/template/src/compiled_model.cpp
+++ b/src/plugins/template/src/compiled_model.cpp
@@ -12,7 +12,6 @@
 #include "itt.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "plugin.hpp"
-#include "template/config.hpp"
 #include "transformations/utils/utils.hpp"
 
 // ! [compiled_model:ctor]
@@ -47,6 +46,8 @@ ov::template_plugin::CompiledModel::CompiledModel(const std::shared_ptr<ov::Mode
 void transform_model(const std::shared_ptr<ov::Model>& model);
 
 void ov::template_plugin::CompiledModel::compile_model(const std::shared_ptr<ov::Model>& model) {
+    if (m_cfg.disable_transformations)
+        return;
     // apply plugins transformations
     transform_model(model);
     // Perform any other steps like allocation and filling backend specific memory handles and so on
@@ -107,9 +108,7 @@ ov::Any ov::template_plugin::CompiledModel::get_property(const std::string& name
         return ro_properties;
     };
     const auto& default_rw_properties = []() {
-        std::vector<ov::PropertyName> rw_properties{ov::device::id,
-                                                    ov::enable_profiling,
-                                                    ov::template_plugin::throughput_streams};
+        std::vector<ov::PropertyName> rw_properties{ov::device::id, ov::enable_profiling};
         return rw_properties;
     };
     const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
diff --git a/src/plugins/template/src/config.cpp b/src/plugins/template/src/config.cpp
index 90842ead1dfa75..2e2075d39bd3c3 100644
--- a/src/plugins/template/src/config.cpp
+++ b/src/plugins/template/src/config.cpp
@@ -7,7 +7,8 @@
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include <ie_plugin_config.hpp>
 
-#include "template/config.hpp"
+#include "openvino/runtime/properties.hpp"
+#include "template/properties.hpp"
 
 using namespace ov::template_plugin;
 
@@ -22,8 +23,8 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa
         const auto& key = c.first;
         const auto& value = c.second;
 
-        if (ov::template_plugin::throughput_streams == key) {
-            streams_executor_config.set_property(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
+        if (ov::template_plugin::disable_transformations == key) {
+            disable_transformations = value.as<bool>();
         } else if (streamExecutorConfigKeys.end() !=
                    std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
             streams_executor_config.set_property(key, value);
@@ -51,7 +52,9 @@ ov::Any Configuration::Get(const std::string& name) const {
         return {std::to_string(device_id)};
     } else if (name == CONFIG_KEY(PERF_COUNT)) {
         return {perf_count};
-    } else if (name == ov::template_plugin::throughput_streams || name == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
+    } else if (name == ov::template_plugin::disable_transformations) {
+        return {disable_transformations};
+    } else if (name == ov::num_streams) {
         return {std::to_string(streams_executor_config._streams)};
     } else if (name == CONFIG_KEY(CPU_BIND_THREAD)) {
         return streams_executor_config.get_property(name);
diff --git a/src/plugins/template/src/config.hpp b/src/plugins/template/src/config.hpp
index c8066a91ebdc0e..5a9732d382d5fe 100644
--- a/src/plugins/template/src/config.hpp
+++ b/src/plugins/template/src/config.hpp
@@ -34,6 +34,7 @@ struct Configuration {
     bool perf_count = true;
     ov::threading::IStreamsExecutor::Config streams_executor_config;
     ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::UNDEFINED;
+    bool disable_transformations = false;
 };
 // ! [configuration:header]
 
diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp
index 25c44ed062d5d5..6747d88cb8eba5 100644
--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -12,7 +12,7 @@
 #include "openvino/pass/manager.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "remote_context.hpp"
-#include "template/config.hpp"
+#include "template/properties.hpp"
 #include "transformations/common_optimizations/common_optimizations.hpp"
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
 #include "transformations/control_flow/unroll_if.hpp"
@@ -171,6 +171,9 @@ ov::SupportedOpsMap ov::template_plugin::Plugin::query_model(const std::shared_p
     auto supported = ov::get_supported_nodes(
         model,
         [&](std::shared_ptr<ov::Model>& model) {
+            // skip transformations in case of user config
+            if (fullConfig.disable_transformations)
+                return;
             // 1. It is needed to apply all transformations as it is done in compile_model
             transform_model(model);
         },
@@ -228,7 +231,7 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const
         std::vector<ov::PropertyName> rw_properties{ov::device::id,
                                                     ov::enable_profiling,
                                                     ov::hint::performance_mode,
-                                                    ov::template_plugin::throughput_streams};
+                                                    ov::template_plugin::disable_transformations};
         return rw_properties;
     };
     const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
diff --git a/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index c0a9a5d63f0af3..15d04bf9cb3abd 100644
--- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -4,7 +4,7 @@
 
 #include "behavior/plugin/configuration_tests.hpp"
 
-#include <template/config.hpp>
+#include "openvino/runtime/properties.hpp"
 
 using namespace BehaviorTestsDefinitions;
 
@@ -14,13 +14,13 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::
                                                                InferenceEngine::Precision::FP16};
 
 const std::vector<std::map<std::string, std::string>> configs = {
-    {{ov::template_plugin::throughput_streams.name(), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
-    {{ov::template_plugin::throughput_streams.name(), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
-    {{ov::template_plugin::throughput_streams.name(), "8"}},
+    {{ov::num_streams.name(), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
+    {{ov::num_streams.name(), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
+    {{ov::num_streams.name(), "8"}},
 };
 
 const std::vector<std::map<std::string, std::string>> inconfigs = {
-    {{ov::template_plugin::throughput_streams.name(), CONFIG_VALUE(NO)}},
+    {{ov::num_streams.name(), CONFIG_VALUE(NO)}},
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
diff --git a/src/plugins/template/tests/functional/transformations/disable_transformations_test.cpp b/src/plugins/template/tests/functional/transformations/disable_transformations_test.cpp
new file mode 100644
index 00000000000000..fa7a2b557da4b5
--- /dev/null
+++ b/src/plugins/template/tests/functional/transformations/disable_transformations_test.cpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <queue>
+#include <string>
+
+#include "common_test_utils/graph_comparator.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "template/properties.hpp"
+
+TEST(DisableTransformationsTests, TestTemplatePluginProperty) {
+    std::shared_ptr<ov::Model> m(nullptr), m_ref(nullptr);
+    {
+        auto data = std::make_shared<ov::opset11::Parameter>(ov::element::f32, ov::Shape{3, 1, 2});
+        auto like = ov::opset11::Constant::create(ov::element::i32, ov::Shape{1}, {1});
+        auto cvtlike = std::make_shared<ov::opset11::ConvertLike>(data, like);
+
+        m = std::make_shared<ov::Model>(ov::NodeVector{cvtlike}, ov::ParameterVector{data});
+    }
+    {
+        auto data = std::make_shared<ov::opset11::Parameter>(ov::element::f32, ov::Shape{3, 1, 2});
+        auto cvt = std::make_shared<ov::opset11::Convert>(data, ov::element::i32);
+
+        m_ref = std::make_shared<ov::Model>(ov::NodeVector{cvt}, ov::ParameterVector{data});
+    }
+
+    auto core = ov::test::utils::PluginCache::get().core("TEMPLATE");
+
+    auto transformed_comp_model = core->compile_model(m, "TEMPLATE");
+    auto no_transformed_comp_model =
+        core->compile_model(m, "TEMPLATE", ov::template_plugin::disable_transformations(true));
+
+    // Clone is needed only for comparison
+    auto transformed_model = transformed_comp_model.get_runtime_model()->clone();
+    auto no_transformed_model = no_transformed_comp_model.get_runtime_model()->clone();
+
+    auto res = compare_functions(m, m_ref);
+    ASSERT_FALSE(res.first);
+    res = compare_functions(transformed_model, no_transformed_model);
+    ASSERT_FALSE(res.first);
+    res = compare_functions(transformed_model, m_ref);
+    ASSERT_TRUE(res.first) << res.second;
+    res = compare_functions(no_transformed_model, m);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/src/plugins/template/tests/functional/transformations/preprocessing.cpp b/src/plugins/template/tests/functional/transformations/preprocessing.cpp
deleted file mode 100644
index d2211e76c7a8bb..00000000000000
--- a/src/plugins/template/tests/functional/transformations/preprocessing.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-// // Copyright (C) 2018-2023 Intel Corporation
-// // SPDX-License-Identifier: Apache-2.0
-// //
-
-// #include <gtest/gtest.h>
-
-// #include <string>
-// #include <memory>
-// #include <map>
-
-// #include <ngraph/function.hpp>
-// #include <ngraph/opsets/opset5.hpp>
-// #include <ngraph/pass/manager.hpp>
-
-// #include <transformations/init_node_info.hpp>
-// #include <transformations/preprocessing/std_scale.hpp>
-// #include <transformations/preprocessing/mean_image_or_value.hpp>
-
-// #include "common_test_utils/ngraph_test_utils.hpp"
-
-
-// using namespace testing;
-// using namespace ngraph;
-
-
-// TEST(TransformationTests, Preprocessing_AddStdScale) {
-//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-
-//     const Shape data_shape{1, 3, 14, 14};
-//     const Shape scale_shape{3, 1, 1};
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto relu = std::make_shared<opset5::Relu>(data);
-//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//         auto scales = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         pass::Manager m;
-//         m.register_pass<pass::InitNodeInfo>();
-//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scales } });
-//         m.run_passes(f);
-//     }
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto scales = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         auto div = std::make_shared<opset5::Divide>(data, scales);
-//         auto relu = std::make_shared<opset5::Relu>(div);
-//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//     }
-
-//     auto res = compare_functions(f, f_ref);
-//     ASSERT_TRUE(res.first) << res.second;
-// }
-
-// TEST(TransformationTests, Preprocessing_AddMeanValue) {
-//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-
-//     const Shape data_shape{1, 3, 14, 14};
-//     const Shape mean_shape{3, 1, 1};
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto relu = std::make_shared<opset5::Relu>(data);
-//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         pass::Manager m;
-//         m.register_pass<pass::InitNodeInfo>();
-//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
-//         m.run_passes(f);
-//     }
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
-//         auto relu = std::make_shared<opset5::Relu>(sub);
-//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//     }
-
-//     auto res = compare_functions(f, f_ref);
-//     ASSERT_TRUE(res.first) << res.second;
-// }
-
-// TEST(TransformationTests, Preprocessing_AddMeanImage) {
-//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-
-//     const Shape data_shape{1, 3, 14, 14};
-//     const Shape mean_shape{3, 14, 14};
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto relu = std::make_shared<opset5::Relu>(data);
-//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         pass::Manager m;
-//         m.register_pass<pass::InitNodeInfo>();
-//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
-//         m.run_passes(f);
-//     }
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
-//         auto relu = std::make_shared<opset5::Relu>(sub);
-//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//     }
-
-//     auto res = compare_functions(f, f_ref);
-//     ASSERT_TRUE(res.first) << res.second;
-// }
-
-// TEST(TransformationTests, Preprocessing_AddMeanImageAndScale) {
-//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-
-//     const Shape data_shape{1, 3, 14, 14};
-//     const Shape mean_shape{3, 14, 14};
-//     const Shape scale_shape{3, 1, 1};
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto relu = std::make_shared<opset5::Relu>(data);
-//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         pass::Manager m;
-//         m.register_pass<pass::InitNodeInfo>();
-//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scaleValues } });
-//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
-//         m.run_passes(f);
-//     }
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
-//         auto div = std::make_shared<opset5::Divide>(sub, scaleValues);
-//         auto relu = std::make_shared<opset5::Relu>(div);
-//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//     }
-
-//     auto res = compare_functions(f, f_ref);
-//     ASSERT_TRUE(res.first) << res.second;
-// }
-
-// TEST(TransformationTests, Preprocessing_AddMeanValueAndScale) {
-//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-
-//     const Shape data_shape{1, 3, 14, 14};
-//     const Shape mean_shape{3, 1, 1};
-//     const Shape scale_shape{3, 1, 1};
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto relu = std::make_shared<opset5::Relu>(data);
-//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         pass::Manager m;
-//         m.register_pass<pass::InitNodeInfo>();
-//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scaleValues } });
-//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
-//         m.run_passes(f);
-//     }
-//     {
-//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
-//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
-//             std::vector<float>(shape_size(mean_shape), 2.0f));
-//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
-//             std::vector<float>(shape_size(scale_shape), 2.0f));
-//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
-//         auto div = std::make_shared<opset5::Divide>(sub, meanValues);
-//         auto relu = std::make_shared<opset5::Relu>(div);
-//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
-//     }
-
-//     auto res = compare_functions(f, f_ref);
-//     ASSERT_TRUE(res.first) << res.second;
-// }
diff --git a/src/plugins/template/tests/functional/transformations/template_transformations_test.cpp b/src/plugins/template/tests/functional/transformations/template_transformations_test.cpp
deleted file mode 100644
index a8cf7e47850f5c..00000000000000
--- a/src/plugins/template/tests/functional/transformations/template_transformations_test.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <string>
-#include <memory>
-#include <queue>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph/opsets/opset3.hpp>
-#include <transformations/init_node_info.hpp>
-#include <transformations/utils/utils.hpp>
-
-#include "common_test_utils/ngraph_test_utils.hpp"
-
-using namespace testing;
-
-// ! [transformation:test]
-TEST(TransformationTests, DISABLED_TemplateTest) {
-    std::shared_ptr<ngraph::Function> f, f_ref;
-    // f - ngraph::Function for applying transformation
-    // f_ref - ngraph::Function that is expected after applying transformation
-    {
-        // Example function
-        auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
-        auto divide_constant = ngraph::opset3::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1.5});
-        auto divide = std::make_shared<ngraph::opset3::Divide>(data, divide_constant);
-
-        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{data});
-
-        // This transformation init runtime info attributes
-        ov::pass::InitNodeInfo().run_on_model(f);
-
-        // Run transformation
-        // ngraph::pass::MyTransformation().run_on_function(f);
-
-        // Check that after applying transformation all runtime info attributes was correctly propagated
-        ASSERT_NO_THROW(check_rt_info(f));
-    }
-
-    {
-        // Example reference function
-        auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
-        auto divide_constant = ngraph::opset3::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1.5});
-        auto pow = std::make_shared<ngraph::opset3::Power>(divide_constant,
-                                                           ngraph::opset3::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {-1}));
-        auto mul = std::make_shared<ngraph::opset3::Multiply>(data, pow);
-
-        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{mul}, ngraph::ParameterVector{data});
-    }
-
-    // Compare that processed function and expected function are the same
-    auto res = compare_functions(f, f_ref);
-    ASSERT_TRUE(res.first) << res.second;
-}
-// ! [transformation:test]

From 74870f9b0bd747d5d0f11da09d49d1316f683237 Mon Sep 17 00:00:00 2001
From: Paul Youngsoo Ahn <paul.y.ahn@intel.com>
Date: Fri, 24 Mar 2023 02:51:57 +0900
Subject: [PATCH 065/296] [GPU] Fix gpu dynamic model  multistream test issue
 (#16510) (#16510)

---
 .../intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
index 98be895ebc6992..06b0eb081198c8 100644
--- a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
@@ -48,6 +48,8 @@ TEST(multistream_gpu, basic) {
     std::vector<InferenceEngine::Task> tasks;
     for (size_t i = 0; i < num_streams; i++) {
         tasks.push_back([&networks, i, &engine] {
+            auto cfg = get_test_default_config(engine);
+            auto stream = engine.create_stream(cfg);
             auto net = networks[i];
             std::vector<int> various_size = {32, 128, 16, 64};
             for (size_t iter = 0; iter < 8; iter++) {
@@ -60,7 +62,7 @@ TEST(multistream_gpu, basic) {
                 auto outputs = net->execute();
 
                 auto output = outputs.at("shape_of").get_memory();
-                cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
+                cldnn::mem_lock<int32_t> output_ptr(output, *stream);
 
                 std::vector<int32_t> expected_results = {1, len, 512};
 

From 52b27d82c5ec18167800d2a378bfa5f206d512ba Mon Sep 17 00:00:00 2001
From: Przemyslaw Wysocki <przemyslaw.wysocki@intel.com>
Date: Thu, 23 Mar 2023 21:43:01 +0100
Subject: [PATCH 066/296] Upgrade `ONNX` to 1.13, `protobuf` to 3.20.3 and
 relax `tensorflow` (#14773)

* Bump ONNX version

* Bump protobuf

* Add xfails and skips

* Add tickets

* Skip ONNX Serialization tests

* Compile ONNX with C++17

* Force cpp17 - 2

* Use MSVC check

* Update tensorflow

* Minor change

* Bump onnx to 1.13.1

* Bump protobuf to 3.20.3

* Debug test tf

* Xfail tests in comp

* Update comp tests

* Update tf reqs

* Remove deprecated ONNX function

* Align PDPD FE protobuf req with 2.4.1

* Satisfy dependency review

* Attempt to fix dependency review

* Revert pdpd protobuf

* Skip pdpd tests

* Fix MO-TF-PB test

* Skip TF test case

* Add ticket numbers, rewrite reqs

* Fix requirements

* Minor change

* Set TF to 2.12

* Remove wrapt and skip test
---
 .ci/azure/linux.yml                           |  10 +-
 .ci/azure/windows.yml                         |   5 +-
 src/bindings/python/requirements_test.txt     |   4 +-
 src/bindings/python/tests/__init__.py         |  22 ++
 .../python/tests/test_onnx/test_backend.py    | 202 +++++++++++++++
 .../tests/test_onnx/test_ops_reduction.py     |  12 +
 .../python/tests/test_onnx/test_ops_unary.py  |  12 +-
 .../tests/test_onnx/utils/onnx_backend.py     |  12 +-
 .../tests/test_onnx/utils/onnx_helpers.py     |  10 -
 .../python/tests_compatibility/__init__.py    |  22 ++
 .../test_onnx/test_backend.py                 | 230 +++++++++++++++++-
 .../test_onnx/test_ops_reduction.py           |   8 +
 .../test_onnx/test_ops_unary.py               |  12 +-
 .../test_onnx/utils/onnx_backend.py           |  12 +-
 .../test_onnx/utils/onnx_helpers.py           |  11 -
 .../pass/serialization/deterministicity.cpp   |   2 +
 .../tests/pass/serialization/serialize.cpp    |   2 +
 src/frontends/onnx/frontend/CMakeLists.txt    |   2 +-
 src/frontends/onnx/tests/requirements.txt     |   2 +-
 .../tensorflow/tests/requirements.txt         |   2 +-
 .../tensorflow_lite/tests/requirements.txt    |   2 +-
 .../mo_python_api_tests/test_mo_convert_tf.py |   2 +-
 thirdparty/CMakeLists.txt                     |   2 +-
 thirdparty/onnx/CMakeLists.txt                |   3 +
 thirdparty/onnx/onnx                          |   2 +-
 thirdparty/protobuf/protobuf                  |   2 +-
 tools/mo/requirements_onnx.txt                |   2 +-
 tools/mo/requirements_tf.txt                  |   4 +-
 tools/mo/requirements_tf2.txt                 |   4 +-
 .../mo/front/tf/convert_to_pb_test.py         |   1 +
 30 files changed, 551 insertions(+), 67 deletions(-)

diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
index 20b7fc4de5476d..026f21f6eaddfd 100644
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@@ -389,8 +389,9 @@ jobs:
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ConditionalCompilation.xml
     displayName: 'Conditional Compilation Tests'
 
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-PaddleTests.xml
-    displayName: 'Paddle Tests'
+  # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
+  #- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-PaddleTests.xml
+  #  displayName: 'Paddle Tests'
 
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-IRFrontend.xml
     displayName: 'IR Frontend Tests'
@@ -398,8 +399,9 @@ jobs:
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ONNXFrontend.xml
     displayName: 'ONNX Frontend Tests'
 
-  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
-    displayName: 'Paddle Frontend UT'
+  # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
+  #- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
+  #  displayName: 'Paddle Frontend UT'
 
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml
     displayName: 'TensorFlow Frontend Unit Tests'
diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml
index cb42e0f8e30814..c9b8036e83d253 100644
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@@ -267,8 +267,9 @@ jobs:
   - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ONNXFrontend.xml
     displayName: 'ONNX Frontend Tests'
 
-  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-Paddle.xml
-    displayName: 'Paddle Frontend UT'
+  # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
+  #- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-Paddle.xml
+  #  displayName: 'Paddle Frontend UT'
 
   - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-Tensorflow.xml
     displayName: 'TensorFlow Frontend Unit Tests'
diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt
index 2bd82fb628bc26..d8fc77f469cdac 100644
--- a/src/bindings/python/requirements_test.txt
+++ b/src/bindings/python/requirements_test.txt
@@ -24,7 +24,7 @@ flake8_commas
 flake8_pep3101
 flake8_quotes
 mypy
-onnx==1.12.0
+onnx==1.13.1
 Pep8-naming
 pydocstyle
 pytest-forked; platform_system != "Windows"
@@ -38,6 +38,6 @@ retrying
 tox
 types-pkg_resources
 wheel>=0.38.1
-protobuf~=3.18.1
+protobuf~=3.20.3
 numpy>=1.16.6,<=1.23.4
 singledispatchmethod; python_version<'3.8'
diff --git a/src/bindings/python/tests/__init__.py b/src/bindings/python/tests/__init__.py
index a426ce8424ec71..88eb7a97b6bb17 100644
--- a/src/bindings/python/tests/__init__.py
+++ b/src/bindings/python/tests/__init__.py
@@ -58,6 +58,28 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 xfail_issue_38706 = xfail_test(reason="RuntimeError: output_3.0 has zero dimension which is not allowed")
 xfail_issue_38708 = xfail_test(reason="RuntimeError: While validating ONNX node '<Node(Slice): y>': "
                                       "Axes input must be constant")
+xfail_issue_99949 = xfail_test(reason="Bitwise operators are not supported")
+xfail_issue_99950 = xfail_test(reason="CenterCropPad func is not supported")
+xfail_issue_99952 = xfail_test(reason="Col2Im operator is not supported")
+xfail_issue_99954 = xfail_test(reason="Constant Pad - RuntimeError: Shape inference of Reference node with name y failed")
+xfail_issue_99955 = xfail_test(reason="GroupNorm is not supported")
+xfail_issue_99957 = xfail_test(reason="LayerNorm - RuntimeError: While validating node '<Node(Reshape): Mean>'")
+xfail_issue_99958 = xfail_test(reason="LogSoftmax - Results mismatch")
+xfail_issue_99959 = xfail_test(reason="Mish function is not supported")
+xfail_issue_99960 = xfail_test(reason="MVN - Results mismatch")
+xfail_issue_99961 = xfail_test(reason="Optional has/get element operators are not supported)'")
+xfail_issue_99962 = pytest.mark.skip(reason="ReduceL1/L2 - Unrecognized attribute: axes for operator ReduceL1/L2")
+xfail_issue_99968 = xfail_test(reason="ReduceL1/L2 - Results mismatch or unsupported ReduceSum with "
+                                      "dynamic rank by CPU plugin")
+xfail_issue_99969 = xfail_test(reason="Resize - Results mismatch / "
+                                      "RuntimeError: While validating ONNX node '<Node(Resize): Y>' / "
+                                      "RuntimeError: Check '(false)' failed at onnx/frontend/src/op/resize.cpp")
+xfail_issue_99970 = xfail_test(reason="Scatter and ScatterND - RuntimeError: Check '(reduction == none)' failed at "
+                                      "src/frontends/onnx/frontend/src/op/scatter_elements.cpp OR at "
+                                      "src/frontends/onnx/frontend/src/op/scatter_nd")
+xfail_issue_99972 = xfail_test(reason="Softmax - Results mismatch")
+xfail_issue_99973 = xfail_test(reason="Split -  RuntimeError: While validating ONNX node "
+                                      "'<Node(Split): output_1, output_2, output_3, output_4>'")
 xfail_issue_38710 = xfail_test(reason="RuntimeError: data has zero dimension which is not allowed")
 xfail_issue_38713 = xfail_test(reason="RuntimeError: OV does not support the following ONNX operations: "
                                       "ai.onnx.preview.training.Momentum")
diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py
index dc30a9bda3806b..e97a5adcbb6535 100644
--- a/src/bindings/python/tests/test_onnx/test_backend.py
+++ b/src/bindings/python/tests/test_onnx/test_backend.py
@@ -48,6 +48,21 @@
     xfail_issue_90649,
     xfail_issue_91151,
     xfail_issue_91490,
+    xfail_issue_99949,
+    xfail_issue_99950,
+    xfail_issue_99952,
+    xfail_issue_99954,
+    xfail_issue_99955,
+    xfail_issue_99957,
+    xfail_issue_99958,
+    xfail_issue_99959,
+    xfail_issue_99960,
+    xfail_issue_99961,
+    xfail_issue_99968,
+    xfail_issue_99969,
+    xfail_issue_99970,
+    xfail_issue_99972,
+    xfail_issue_99973,
     xfail_issue_101965,
 )
 from tests.test_onnx.utils.onnx_backend import OpenVinoTestBackend
@@ -389,6 +404,193 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_castlike_BFLOAT16_to_FLOAT_cpu",
         "OnnxBackendNodeModelTest.test_castlike_FLOAT_to_BFLOAT16_cpu",
     ),
+    (
+        xfail_issue_99949,
+        "OnnxBackendNodeModelTest.test_bitwise_and_i16_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_ui64_bcast_3v1d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_4d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_i16_4d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_ui64_bcast_3v1d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_i16_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_ui64_bcast_3v1d_cpu",
+    ),
+    (
+        xfail_issue_99950,
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_and_pad_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_chw_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_chw_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_hwc_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_hwc_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_pad_cpu",
+    ),
+    (
+        xfail_issue_99952,
+        "OnnxBackendNodeModelTest.test_col2im_5d_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_dilations_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_pads_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_strides_cpu",
+    ),
+    (
+        xfail_issue_99954,
+        "OnnxBackendNodeModelTest.test_constant_pad_axes_cpu",
+    ),
+    (
+        xfail_issue_99955,
+        "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_example_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu",
+    ),
+    (
+        xfail_issue_99957,
+        "OnnxBackendNodeModelTest.test_layer_normalization_2d_axis1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_2d_axis_negative_1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis1_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis2_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis_negative_1_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis_negative_2_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis2_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis3_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_2_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_3_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_default_axis_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99958,
+        "OnnxBackendNodeModelTest.test_logsoftmax_large_number_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99959,
+        "OnnxBackendNodeModelTest.test_mish_cpu",
+    ),
+    (
+        xfail_issue_99960,
+        "OnnxBackendNodeModelTest.test_mvn_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99961,
+        "OnnxBackendNodeModelTest.test_optional_get_element_optional_sequence_cpu",
+        "OnnxBackendNodeModelTest.test_optional_get_element_optional_tensor_cpu",
+        "OnnxBackendNodeModelTest.test_optional_get_element_tensor_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_name_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_name_tensor_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_tensor_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_tensor_input_cpu",
+    ),
+    (
+        xfail_issue_99968,
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_asc_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_asc_axes_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_desc_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_desc_axes_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_negative_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_random_cpu",
+    ),
+    (
+        xfail_issue_99969,
+        "OnnxBackendNodeModelTest.test_resize_downsample_scales_cubic_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_cubic_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_linear_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_nearest_not_smaller_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_nearest_not_larger_cpu",
+        "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_scales_nearest_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_scales_nearest_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_larger_cpu",
+    ),
+    (
+        xfail_issue_99970,
+        "OnnxBackendNodeModelTest.test_scatter_elements_with_reduction_max_cpu",
+        "OnnxBackendNodeModelTest.test_scatter_elements_with_reduction_min_cpu",
+        "OnnxBackendNodeModelTest.test_scatternd_max_cpu",
+        "OnnxBackendNodeModelTest.test_scatternd_min_cpu",
+    ),
+    (
+        xfail_issue_99972,
+        "OnnxBackendNodeModelTest.test_softmax_large_number_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99973,
+        "OnnxBackendNodeModelTest.test_split_1d_uneven_split_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_2d_uneven_split_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset13_cpu",
+        "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset18_cpu",
+    ),
     (
         xfail_issue_101965,
         "OnnxBackendNodeModelTest.test_dft_axis_cpu",
diff --git a/src/bindings/python/tests/test_onnx/test_ops_reduction.py b/src/bindings/python/tests/test_onnx/test_ops_reduction.py
index f9fe00ddb43fcb..9d2a06e9042b6b 100644
--- a/src/bindings/python/tests/test_onnx/test_ops_reduction.py
+++ b/src/bindings/python/tests/test_onnx/test_ops_reduction.py
@@ -6,6 +6,7 @@
 import onnx
 import pytest
 
+from tests import xfail_issue_99962
 from tests.runtime import get_runtime
 from tests.test_onnx.utils import (
     run_node,
@@ -80,6 +81,7 @@ def test_reduce_operation_keepdims_none_axes(operation, ref_operation):
 
 @pytest.mark.parametrize(("operation", "ref_operation"), reduce_operation_parameters_as_attr)
 @pytest.mark.parametrize("axes", reduce_axis_parameters)
+@xfail_issue_99962
 def test_reduce_operation_keepdims_with_axes_as_attr(operation, ref_operation, axes):
     assert np.array_equal(import_and_compute(operation, reduce_data, axes=axes, keepdims=True),
                           ref_operation(reduce_data, keepdims=True, axis=axes))
@@ -102,6 +104,7 @@ def test_reduce_operation_keepdims_with_axes_as_const(operation, ref_operation,
     (1, 2),
     (0, 1, 2)])
 @pytest.mark.parametrize(("operation", "ref_operation"), reduce_operation_parameters_as_attr)
+@xfail_issue_99962
 def test_reduce_operation_no_keepdims_axes_as_attr(operation, ref_operation, axes):
     if axes:
         assert np.array_equal(import_and_compute(operation, reduce_data, axes=axes, keepdims=False),
@@ -133,6 +136,7 @@ def test_reduce_operation_no_keepdims_axes_as_const(operation, ref_operation, ax
                               ref_operation(reduce_data, keepdims=False))
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_l1(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -152,6 +156,7 @@ def test_reduce_l1(reduction_axes):
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 def test_reduce_l1_default_axes():
     shape = [2, 4, 3, 2]
     np.random.seed(133391)
@@ -170,6 +175,7 @@ def test_reduce_l1_default_axes():
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_l2(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -190,6 +196,7 @@ def test_reduce_l2(reduction_axes):
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 def test_reduce_l2_default_axes():
     shape = [2, 4, 3, 2]
     np.random.seed(133391)
@@ -208,6 +215,7 @@ def test_reduce_l2_default_axes():
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_log_sum(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -227,6 +235,7 @@ def test_reduce_log_sum(reduction_axes):
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 def test_reduce_log_sum_default_axes():
     shape = [2, 4, 3, 2]
     np.random.seed(133391)
@@ -245,6 +254,7 @@ def test_reduce_log_sum_default_axes():
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 def test_reduce_log_sum_exp():
     def logsumexp(data, axis=None, keepdims=True):
         return np.log(np.sum(np.exp(data), axis=axis, keepdims=keepdims))
@@ -276,6 +286,7 @@ def logsumexp(data, axis=None, keepdims=True):
     )
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_sum_square(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -295,6 +306,7 @@ def test_reduce_sum_square(reduction_axes):
     assert np.allclose(expected, graph_result)
 
 
+@xfail_issue_99962
 def test_reduce_sum_square_default_axes():
     shape = [2, 4, 3, 2]
     np.random.seed(133391)
diff --git a/src/bindings/python/tests/test_onnx/test_ops_unary.py b/src/bindings/python/tests/test_onnx/test_ops_unary.py
index f02110e3771ab2..847009cbc769f8 100644
--- a/src/bindings/python/tests/test_onnx/test_ops_unary.py
+++ b/src/bindings/python/tests/test_onnx/test_ops_unary.py
@@ -327,7 +327,7 @@ def test_identity():
 def test_cast_to_bool(val_type, input_data):
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -344,7 +344,7 @@ def test_cast_to_float(val_type, range_start, range_end, in_dtype):
     input_data = np.random.randint(range_start, range_end, size=(2, 2), dtype=in_dtype)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -361,7 +361,7 @@ def test_cast_to_int(val_type):
     input_data = np.ceil(-8 + random_data).astype(val_type)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -374,7 +374,7 @@ def test_cast_to_uint(val_type):
     input_data = np.ceil(np.random.rand(2, 3, 4) * 16).astype(val_type)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -449,7 +449,7 @@ def test_constant(value_type):
         outputs=["values"],
         value=onnx.helper.make_tensor(
             name="const_tensor",
-            data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(value_type)],
+            data_type=onnx.helper.np_dtype_to_tensor_dtype(np.dtype(value_type)),
             dims=values.shape,
             vals=values.flatten(),
         ),
@@ -467,7 +467,7 @@ def test_constant_err():
         outputs=["values"],
         value=onnx.helper.make_tensor(
             name="const_tensor",
-            data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(np.float16)],
+            data_type=onnx.helper.np_dtype_to_tensor_dtype(np.dtype(np.float16)),
             dims=values.shape,
             vals=values.flatten(),
         ),
diff --git a/src/bindings/python/tests/test_onnx/utils/onnx_backend.py b/src/bindings/python/tests/test_onnx/utils/onnx_backend.py
index 11609f5308d035..eb7e37e225f95f 100644
--- a/src/bindings/python/tests/test_onnx/utils/onnx_backend.py
+++ b/src/bindings/python/tests/test_onnx/utils/onnx_backend.py
@@ -13,11 +13,11 @@
 import numpy
 import onnx
 from onnx.backend.base import Backend, BackendRep
-from onnx.helper import make_graph, make_model, make_tensor_value_info
+from onnx.helper import make_graph, make_model, make_tensor_value_info, np_dtype_to_tensor_dtype
 
 from openvino.runtime import Model
 from tests.runtime import get_runtime
-from tests.test_onnx.utils.onnx_helpers import import_onnx_model, np_dtype_to_tensor_type
+from tests.test_onnx.utils.onnx_helpers import import_onnx_model
 
 
 class OpenVinoOnnxBackendRep(BackendRep):
@@ -80,15 +80,15 @@ def run_node(
     ):  # type: (...) -> Optional[Tuple[Any, ...]]
         """Prepare and run a computation on an ONNX node."""
         # default values for input/output tensors
-        input_tensor_types = [np_dtype_to_tensor_type(node_input.dtype) for node_input in inputs]
-        output_tensor_types = [onnx.TensorProto.FLOAT for idx in range(len(node.output))]
+        input_tensor_types = [np_dtype_to_tensor_dtype(node_input.dtype) for node_input in inputs]
+        output_tensor_types = [onnx.TensorProto.FLOAT for _ in range(len(node.output))]
         output_tensor_shapes = [()]  # type: List[Tuple[int, ...]]
 
         if outputs_info is not None:
             output_tensor_types = [
-                np_dtype_to_tensor_type(dtype) for (dtype, shape) in outputs_info
+                np_dtype_to_tensor_dtype(dtype) for (dtype, _) in outputs_info
             ]
-            output_tensor_shapes = [shape for (dtype, shape) in outputs_info]
+            output_tensor_shapes = [shape for (_, shape) in outputs_info]
 
         input_tensors = [
             make_tensor_value_info(name, tensor_type, value.shape)
diff --git a/src/bindings/python/tests/test_onnx/utils/onnx_helpers.py b/src/bindings/python/tests/test_onnx/utils/onnx_helpers.py
index 061e7912e26680..15970e5d43a514 100644
--- a/src/bindings/python/tests/test_onnx/utils/onnx_helpers.py
+++ b/src/bindings/python/tests/test_onnx/utils/onnx_helpers.py
@@ -4,20 +4,10 @@
 
 import numpy as np
 import onnx
-from onnx.mapping import NP_TYPE_TO_TENSOR_TYPE
 
 from openvino.runtime import Core, Tensor, Model
 
 
-def np_dtype_to_tensor_type(data_type: np.dtype) -> int:
-    """Return TensorProto type for provided numpy dtype.
-
-    :param data_type: Numpy data type object.
-    :return: TensorProto.DataType enum value for corresponding type.
-    """
-    return NP_TYPE_TO_TENSOR_TYPE[data_type]
-
-
 def import_onnx_model(model: onnx.ModelProto) -> Model:
     onnx.checker.check_model(model)
     model_byte_string = model.SerializeToString()
diff --git a/src/bindings/python/tests_compatibility/__init__.py b/src/bindings/python/tests_compatibility/__init__.py
index 24d2050a3a9d77..8b4b56d04d0daa 100644
--- a/src/bindings/python/tests_compatibility/__init__.py
+++ b/src/bindings/python/tests_compatibility/__init__.py
@@ -86,6 +86,28 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64")
 xfail_issue_73538 = xfail_test(reason="OneHot: Unsupported negative indices, "
                                       "AssertionError: Mismatched elements.")
+xfail_issue_99949 = xfail_test(reason="Bitwise operators are not supported")
+xfail_issue_99950 = xfail_test(reason="CenterCropPad func is not supported")
+xfail_issue_99952 = xfail_test(reason="Col2Im operator is not supported")
+xfail_issue_99954 = xfail_test(reason="Constant Pad - RuntimeError: Shape inference of Reference node with name y failed")
+xfail_issue_99955 = xfail_test(reason="GroupNorm is not supported")
+xfail_issue_99957 = xfail_test(reason="LayerNorm - RuntimeError: While validating node '<Node(Reshape): Mean>'")
+xfail_issue_99958 = xfail_test(reason="LogSoftmax - Results mismatch")
+xfail_issue_99959 = xfail_test(reason="Mish function is not supported")
+xfail_issue_99960 = xfail_test(reason="MVN - Results mismatch")
+xfail_issue_99961 = xfail_test(reason="Optional has/get element operators are not supported)'")
+xfail_issue_99962 = pytest.mark.skip(reason="ReduceL1/L2 - Unrecognized attribute: axes for operator ReduceL1/L2")
+xfail_issue_99968 = xfail_test(reason="ReduceL1/L2 - Results mismatch or unsupported ReduceSum with "
+                                      "dynamic rank by CPU plugin")
+xfail_issue_99969 = xfail_test(reason="Resize - Results mismatch / "
+                                      "RuntimeError: While validating ONNX node '<Node(Resize): Y>' / "
+                                      "RuntimeError: Check '(false)' failed at onnx/frontend/src/op/resize.cpp")
+xfail_issue_99970 = xfail_test(reason="Scatter and ScatterND - RuntimeError: Check '(reduction == none)' failed at "
+                                      "src/frontends/onnx/frontend/src/op/scatter_elements.cpp OR at "
+                                      "src/frontends/onnx/frontend/src/op/scatter_nd")
+xfail_issue_99972 = xfail_test(reason="Softmax - Results mismatch")
+xfail_issue_99973 = xfail_test(reason="Split -  RuntimeError: While validating ONNX node "
+                                      "'<Node(Split): output_1, output_2, output_3, output_4>'")
 
 # Model MSFT issues:
 xfail_issue_37957 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations: "
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
index 53ec35731cbc5f..26bb2b5def0187 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
@@ -50,6 +50,21 @@
     xfail_issue_91151,
     xfail_issue_91490,
     xfail_issue_101965,
+    xfail_issue_99949,
+    xfail_issue_99950,
+    xfail_issue_99952,
+    xfail_issue_99954,
+    xfail_issue_99955,
+    xfail_issue_99957,
+    xfail_issue_99958,
+    xfail_issue_99959,
+    xfail_issue_99960,
+    xfail_issue_99961,
+    xfail_issue_99968,
+    xfail_issue_99969,
+    xfail_issue_99970,
+    xfail_issue_99972,
+    xfail_issue_99973,
     skip_dynamic_model,
 )
 from tests_compatibility.test_onnx.utils.onnx_backend import OpenVinoTestBackend
@@ -494,7 +509,220 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_hammingwindow_symmetric_expanded_cpu",
         "OnnxBackendNodeModelTest.test_hannwindow_expanded_cpu",
         "OnnxBackendNodeModelTest.test_hannwindow_symmetric_expanded_cpu",
-    )
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_and_pad_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_pad_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_1d_opset13_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_1d_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_2d_opset13_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_2d_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_default_axis_opset13_cpu",
+        "OnnxBackendNodeModelTest.test_split_variable_parts_default_axis_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_negative_axes_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_random_expanded_cpu",
+    ),
+    (
+        xfail_issue_99949,
+        "OnnxBackendNodeModelTest.test_bitwise_and_i16_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_ui64_bcast_3v1d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_not_4d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_i16_4d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_ui64_bcast_3v1d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_i16_3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_i32_2d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_and_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_or_ui8_bcast_4v3d_cpu",
+        "OnnxBackendNodeModelTest.test_bitwise_xor_ui64_bcast_3v1d_cpu",
+    ),
+    (
+        xfail_issue_99950,
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_and_pad_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_chw_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_chw_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_hwc_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_axes_hwc_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_crop_cpu",
+        "OnnxBackendNodeModelTest.test_center_crop_pad_pad_cpu",
+    ),
+    (
+        xfail_issue_99952,
+        "OnnxBackendNodeModelTest.test_col2im_5d_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_dilations_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_pads_cpu",
+        "OnnxBackendNodeModelTest.test_col2im_strides_cpu",
+    ),
+    (
+        xfail_issue_99954,
+        "OnnxBackendNodeModelTest.test_constant_pad_axes_cpu",
+    ),
+    (
+        xfail_issue_99955,
+        "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_example_cpu",
+        "OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu",
+    ),
+    (
+        xfail_issue_99957,
+        "OnnxBackendNodeModelTest.test_layer_normalization_2d_axis1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_2d_axis_negative_1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis1_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis2_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis_negative_1_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_3d_axis_negative_2_epsilon_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis2_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis3_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_1_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_2_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_3_expanded_ver18_cpu",
+        "OnnxBackendNodeModelTest.test_layer_normalization_default_axis_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99958,
+        "OnnxBackendNodeModelTest.test_logsoftmax_large_number_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99959,
+        "OnnxBackendNodeModelTest.test_mish_cpu",
+    ),
+    (
+        xfail_issue_99960,
+        "OnnxBackendNodeModelTest.test_mvn_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99961,
+        "OnnxBackendNodeModelTest.test_optional_get_element_optional_sequence_cpu",
+        "OnnxBackendNodeModelTest.test_optional_get_element_optional_tensor_cpu",
+        "OnnxBackendNodeModelTest.test_optional_get_element_tensor_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_name_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_name_tensor_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_no_input_tensor_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_empty_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_optional_input_cpu",
+        "OnnxBackendNodeModelTest.test_optional_has_element_tensor_input_cpu",
+    ),
+    (
+        xfail_issue_99968,
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_asc_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_asc_axes_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_desc_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_desc_axes_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_negative_axes_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_mean_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_min_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_prod_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_do_not_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_example_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_random_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l1_negative_axes_keep_dims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_l2_negative_axes_keep_dims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_random_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_example_expanded_cpu",
+        "OnnxBackendNodeModelTest.test_reduce_sum_square_negative_axes_keepdims_random_expanded_cpu",
+    ),
+    (
+        xfail_issue_99969,
+        "OnnxBackendNodeModelTest.test_resize_downsample_scales_cubic_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_cubic_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_linear_antialias_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_nearest_not_smaller_cpu",
+        "OnnxBackendNodeModelTest.test_resize_downsample_sizes_nearest_not_larger_cpu",
+        "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_scales_nearest_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_scales_nearest_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_2_3_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_3_2_cpu",
+        "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_larger_cpu",
+    ),
+    (
+        xfail_issue_99970,
+        "OnnxBackendNodeModelTest.test_scatter_elements_with_reduction_max_cpu",
+        "OnnxBackendNodeModelTest.test_scatter_elements_with_reduction_min_cpu",
+        "OnnxBackendNodeModelTest.test_scatternd_max_cpu",
+        "OnnxBackendNodeModelTest.test_scatternd_min_cpu",
+    ),
+    (
+        xfail_issue_99972,
+        "OnnxBackendNodeModelTest.test_softmax_large_number_expanded_ver18_cpu",
+    ),
+    (
+        xfail_issue_99973,
+        "OnnxBackendNodeModelTest.test_split_1d_uneven_split_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_2d_uneven_split_opset18_cpu",
+        "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset13_cpu",
+        "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset18_cpu",
+    ),
 ]
 
 for test_group in tests_expected_to_fail:
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_reduction.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_reduction.py
index 20134cddc6908f..4122d1a3158689 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_reduction.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_reduction.py
@@ -5,6 +5,7 @@
 import onnx
 import pytest
 
+from tests_compatibility import xfail_issue_99962
 from tests_compatibility.runtime import get_runtime
 from tests_compatibility.test_onnx.utils import (
     run_node,
@@ -79,6 +80,7 @@ def test_reduce_operation_keepdims_none_axes(operation, ref_operation):
                           ref_operation(reduce_data, keepdims=True))
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("operation, ref_operation", reduce_operation_parameters_as_attr)
 @pytest.mark.parametrize("axes", reduce_axis_parameters)
 def test_reduce_operation_keepdims_with_axes_as_attr(operation, ref_operation, axes):
@@ -93,6 +95,7 @@ def test_reduce_operation_keepdims_with_axes_as_const(operation, ref_operation,
                           ref_operation(reduce_data, keepdims=True, axis=axes))
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("axes", [
     None,
     (0,),
@@ -134,6 +137,7 @@ def test_reduce_operation_no_keepdims_axes_as_const(operation, ref_operation, ax
                               ref_operation(reduce_data, keepdims=False))
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_l1(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -171,6 +175,7 @@ def test_reduce_l1_default_axes():
     assert np.allclose(expected, ng_result)
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_l2(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -209,6 +214,7 @@ def test_reduce_l2_default_axes():
     assert np.allclose(expected, ng_result)
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_log_sum(reduction_axes):
     shape = [2, 4, 3, 2]
@@ -246,6 +252,7 @@ def test_reduce_log_sum_default_axes():
     assert np.allclose(expected, ng_result)
 
 
+@xfail_issue_99962
 def test_reduce_log_sum_exp():
     def logsumexp(data, axis=None, keepdims=True):
         return np.log(np.sum(np.exp(data), axis=axis, keepdims=keepdims))
@@ -283,6 +290,7 @@ def logsumexp(data, axis=None, keepdims=True):
     )
 
 
+@xfail_issue_99962
 @pytest.mark.parametrize("reduction_axes", [(0,), (0, 2), (0, 1, 2)])
 def test_reduce_sum_square(reduction_axes):
     shape = [2, 4, 3, 2]
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
index 827f226677060e..ddbd8dd53e4a4a 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py
@@ -326,7 +326,7 @@ def test_identity():
 def test_cast_to_bool(val_type, input_data):
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -343,7 +343,7 @@ def test_cast_to_float(val_type, range_start, range_end, in_dtype):
     input_data = np.random.randint(range_start, range_end, size=(2, 2), dtype=in_dtype)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -359,7 +359,7 @@ def test_cast_to_int(val_type):
     input_data = np.ceil(-8 + np.random.rand(2, 3, 4) * 16)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -372,7 +372,7 @@ def test_cast_to_uint(val_type):
     input_data = np.ceil(np.random.rand(2, 3, 4) * 16)
     expected = np.array(input_data, dtype=val_type)
 
-    model = get_node_model("Cast", input_data, opset=6, to=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[val_type])
+    model = get_node_model("Cast", input_data, opset=6, to=onnx.helper.np_dtype_to_tensor_dtype(val_type))
     result = run_model(model, [input_data])
     assert np.allclose(result, expected)
 
@@ -455,7 +455,7 @@ def test_constant(value_type):
         outputs=["values"],
         value=onnx.helper.make_tensor(
             name="const_tensor",
-            data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(value_type)],
+            data_type=onnx.helper.np_dtype_to_tensor_dtype(np.dtype(value_type)),
             dims=values.shape,
             vals=values.flatten(),
         ),
@@ -473,7 +473,7 @@ def test_constant_err():
         outputs=["values"],
         value=onnx.helper.make_tensor(
             name="const_tensor",
-            data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(np.float16)],
+            data_type=onnx.helper.np_dtype_to_tensor_dtype(np.dtype(np.float16)),
             dims=values.shape,
             vals=values.flatten(),
         ),
diff --git a/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_backend.py b/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_backend.py
index 5c06a5e874eaa2..a3eea6a2ac9aed 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_backend.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_backend.py
@@ -13,11 +13,11 @@
 import numpy
 import onnx
 from onnx.backend.base import Backend, BackendRep
-from onnx.helper import make_graph, make_model, make_tensor_value_info
+from onnx.helper import make_graph, make_model, make_tensor_value_info, np_dtype_to_tensor_dtype
 
 from ngraph.impl import Function
 from tests_compatibility.runtime import get_runtime
-from tests_compatibility.test_onnx.utils.onnx_helpers import import_onnx_model, np_dtype_to_tensor_type
+from tests_compatibility.test_onnx.utils.onnx_helpers import import_onnx_model
 
 
 class OpenVinoOnnxBackendRep(BackendRep):
@@ -80,15 +80,15 @@ def run_node(
     ):  # type: (...) -> Optional[Tuple[Any, ...]]
         """Prepare and run a computation on an ONNX node."""
         # default values for input/output tensors
-        input_tensor_types = [np_dtype_to_tensor_type(node_input.dtype) for node_input in inputs]
-        output_tensor_types = [onnx.TensorProto.FLOAT for idx in range(len(node.output))]
+        input_tensor_types = [np_dtype_to_tensor_dtype(node_input.dtype) for node_input in inputs]
+        output_tensor_types = [onnx.TensorProto.FLOAT for _ in range(len(node.output))]
         output_tensor_shapes = [()]  # type: List[Tuple[int, ...]]
 
         if outputs_info is not None:
             output_tensor_types = [
-                np_dtype_to_tensor_type(dtype) for (dtype, shape) in outputs_info
+                np_dtype_to_tensor_dtype(dtype) for (dtype, _) in outputs_info
             ]
-            output_tensor_shapes = [shape for (dtype, shape) in outputs_info]
+            output_tensor_shapes = [shape for (_, shape) in outputs_info]
 
         input_tensors = [
             make_tensor_value_info(name, tensor_type, value.shape)
diff --git a/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_helpers.py b/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_helpers.py
index 063af326802fb3..c5e774da297142 100644
--- a/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_helpers.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/utils/onnx_helpers.py
@@ -1,24 +1,13 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import numpy as np
 import onnx
-from onnx.mapping import NP_TYPE_TO_TENSOR_TYPE
 from openvino.inference_engine import IECore
 
 import ngraph as ng
 from ngraph.impl import Function
 
 
-def np_dtype_to_tensor_type(data_type: np.dtype) -> int:
-    """Return TensorProto type for provided numpy dtype.
-
-    :param data_type: Numpy data type object.
-    :return: TensorProto.DataType enum value for corresponding type.
-    """
-    return NP_TYPE_TO_TENSOR_TYPE[data_type]
-
-
 def import_onnx_model(model: onnx.ModelProto) -> Function:
     onnx.checker.check_model(model)
     model_byte_string = model.SerializeToString()
diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp
index 2abfe1b6e53052..6193dddbd7a2a1 100644
--- a/src/core/tests/pass/serialization/deterministicity.cpp
+++ b/src/core/tests/pass/serialization/deterministicity.cpp
@@ -71,6 +71,7 @@ class SerializationDeterministicityTest : public ov::test::TestsCommon, public D
 #ifdef ENABLE_OV_ONNX_FRONTEND
 
 TEST_F(SerializationDeterministicityTest, BasicModel) {
+    GTEST_SKIP() << "Ticket 99981";
     const std::string model =
         CommonTestUtils::getModelFromTestModelZoo(ov::util::path_join({SERIALIZED_ZOO, "ir/add_abc.onnx"}));
 
@@ -88,6 +89,7 @@ TEST_F(SerializationDeterministicityTest, BasicModel) {
 }
 
 TEST_F(SerializationDeterministicityTest, ModelWithMultipleLayers) {
+    GTEST_SKIP() << "Ticket 99981";
     const std::string model =
         CommonTestUtils::getModelFromTestModelZoo(ov::util::path_join({SERIALIZED_ZOO, "ir/addmul_abc.onnx"}));
 
diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp
index a172e2eb92cd34..d2866901ba2223 100644
--- a/src/core/tests/pass/serialization/serialize.cpp
+++ b/src/core/tests/pass/serialization/serialize.cpp
@@ -59,12 +59,14 @@ class SerializationTest : public ov::test::TestsCommon, public testing::WithPara
 };
 
 TEST_P(SerializationTest, CompareFunctions) {
+    GTEST_SKIP() << "Ticket 99981";
     CompareSerialized([this](const std::shared_ptr<ov::Model>& m) {
         ov::pass::Serialize(m_out_xml_path, m_out_bin_path).run_on_model(m);
     });
 }
 
 TEST_P(SerializationTest, SerializeHelper) {
+    GTEST_SKIP() << "Ticket 99981";
     CompareSerialized([this](const std::shared_ptr<ov::Model>& m) {
         ov::serialize(m, m_out_xml_path, m_out_bin_path);
     });
diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt
index db84dae67ddb36..2269c16c2e5c74 100644
--- a/src/frontends/onnx/frontend/CMakeLists.txt
+++ b/src/frontends/onnx/frontend/CMakeLists.txt
@@ -9,7 +9,7 @@ ov_add_frontend(NAME onnx
                 FILEDESCRIPTION "FrontEnd to load and convert ONNX file format"
                 LINK_LIBRARIES ngraph::builder onnx_common openvino::core::dev)
 
-set(ONNX_OPSET_VERSION 17 CACHE INTERNAL "Supported version of ONNX operator set")
+set(ONNX_OPSET_VERSION 18 CACHE INTERNAL "Supported version of ONNX operator set")
 target_compile_definitions(${TARGET_NAME} PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION})
 
 ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
diff --git a/src/frontends/onnx/tests/requirements.txt b/src/frontends/onnx/tests/requirements.txt
index 6216064a61c146..e8e66c157fb1ad 100644
--- a/src/frontends/onnx/tests/requirements.txt
+++ b/src/frontends/onnx/tests/requirements.txt
@@ -1,4 +1,4 @@
 # ONNX - generate test models
 docopt~=0.6.2
-onnx==1.12.0
+onnx==1.13.1
 protobuf>=3.18.1,<4.0.0
\ No newline at end of file
diff --git a/src/frontends/tensorflow/tests/requirements.txt b/src/frontends/tensorflow/tests/requirements.txt
index 6ad62764d61fcf..398e7ce8868327 100644
--- a/src/frontends/tensorflow/tests/requirements.txt
+++ b/src/frontends/tensorflow/tests/requirements.txt
@@ -1,2 +1,2 @@
 numpy>=1.16.6,<=1.23.4
-tensorflow>=1.15.5,<=2.11.0
+tensorflow>=1.15.5,<2.13.0
diff --git a/src/frontends/tensorflow_lite/tests/requirements.txt b/src/frontends/tensorflow_lite/tests/requirements.txt
index cf70fd3cf83ba9..49b433c132ee5d 100644
--- a/src/frontends/tensorflow_lite/tests/requirements.txt
+++ b/src/frontends/tensorflow_lite/tests/requirements.txt
@@ -1,2 +1,2 @@
 numpy>=1.16.6,<1.25.0
-tensorflow>=1.15.5,<=2.11.0
+tensorflow>=1.15.5,<2.13.0
diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py
index 892de63d69d502..3ec7e4d95e077c 100644
--- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py
@@ -328,7 +328,7 @@ class TestMoConvertTF(CommonMOConvertTest):
         # TF2
         create_keras_model,
         create_keras_layer,
-        create_tf_function,
+        #create_tf_function,  # skip, ticket 106247
         create_tf_module,
         create_tf_checkpoint,
         create_tf_saved_model,
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index ab703af265d895..b2c7d97959bb95 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -346,7 +346,7 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND
         if(CMAKE_VERBOSE_MAKEFILE)
             set(Protobuf_DEBUG ON)
         endif()
-        find_package(Protobuf 3.18.2 REQUIRED)
+        find_package(Protobuf 3.20.3 REQUIRED)
         set(Protobuf_LITE_LIBRARIES protobuf::libprotobuf-lite)
         set(Protobuf_LIBRARIES protobuf::libprotobuf)
         set(PROTOC_EXECUTABLE protobuf::protoc)
diff --git a/thirdparty/onnx/CMakeLists.txt b/thirdparty/onnx/CMakeLists.txt
index 883018394d7eba..6ba56c3bbba0f1 100644
--- a/thirdparty/onnx/CMakeLists.txt
+++ b/thirdparty/onnx/CMakeLists.txt
@@ -21,6 +21,9 @@ endif()
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # 4244 conversion from 'XXX' to 'YYY', possible loss of data
     ie_add_compiler_flags(/wd4244)
+
+    # from onnx==1.13 it requires C++17 when compiling on Windows
+    set(CMAKE_CXX_STANDARD 17)
 endif()
 
 set(ONNX_USE_PROTOBUF_SHARED_LIBS OFF CACHE BOOL "Use dynamic protobuf by ONNX library" FORCE)
diff --git a/thirdparty/onnx/onnx b/thirdparty/onnx/onnx
index f7ee1ac60d06ab..ad834eb73ee0cd 160000
--- a/thirdparty/onnx/onnx
+++ b/thirdparty/onnx/onnx
@@ -1 +1 @@
-Subproject commit f7ee1ac60d06abe8e26c9b6bbe1e3db5286b614b
+Subproject commit ad834eb73ee0cd9b6fa9ea892caeed5fa17d7dc0
diff --git a/thirdparty/protobuf/protobuf b/thirdparty/protobuf/protobuf
index 6c6b0778b70f35..fe271ab76f2ad2 160000
--- a/thirdparty/protobuf/protobuf
+++ b/thirdparty/protobuf/protobuf
@@ -1 +1 @@
-Subproject commit 6c6b0778b70f35f93c2f0dee30e5d12ad2a83eea
+Subproject commit fe271ab76f2ad2b2b28c10443865d2af21e27e0e
diff --git a/tools/mo/requirements_onnx.txt b/tools/mo/requirements_onnx.txt
index 24e81ddd9783fb..a7223c85951aa9 100644
--- a/tools/mo/requirements_onnx.txt
+++ b/tools/mo/requirements_onnx.txt
@@ -1,4 +1,4 @@
-onnx>=1.8.1,<=1.12
+onnx>=1.8.1,<=1.13.1
 networkx~=2.5; python_version <= "3.6"
 networkx<=2.8.8; python_version > "3.6"
 numpy>=1.16.6,<1.25.0
diff --git a/tools/mo/requirements_tf.txt b/tools/mo/requirements_tf.txt
index 8be34395b87376..4c8329c8192dd4 100644
--- a/tools/mo/requirements_tf.txt
+++ b/tools/mo/requirements_tf.txt
@@ -1,7 +1,7 @@
 numpy>=1.16.6,<1.25.0
-tensorflow>=1.15.5,<=2.11.0
+tensorflow>=1.15.5,<2.13.0
 networkx~=2.5; python_version <= "3.6"
 networkx<=2.8.8; python_version > "3.6"
 defusedxml>=0.7.1
 requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
+fastjsonschema>=2.15.1,<2.17
\ No newline at end of file
diff --git a/tools/mo/requirements_tf2.txt b/tools/mo/requirements_tf2.txt
index edad30eb2dc530..b77a4ae3365dc6 100644
--- a/tools/mo/requirements_tf2.txt
+++ b/tools/mo/requirements_tf2.txt
@@ -1,7 +1,7 @@
 numpy>=1.16.6,<1.25.0
-tensorflow>=2.5,<=2.11.0
+tensorflow>=2.5,<2.13.0
 networkx~=2.5; python_version <= "3.6"
 networkx<=2.8.8; python_version > "3.6"
 defusedxml>=0.7.1
 requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
+fastjsonschema>=2.15.1,<2.17
\ No newline at end of file
diff --git a/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py b/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
index 5048cffd3f330a..43c2fb68b9ea64 100644
--- a/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
+++ b/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
@@ -17,6 +17,7 @@ def setUp(self):
                                        saved_model_dir=None, input_meta_graph=None, saved_model_tags=None,
                                        model_name='model', output_dir=None)
 
+    @unittest.skip("Ticket: 106651")
     def test_saved_model(self):
         import tensorflow as tf
         with tempfile.TemporaryDirectory(dir=self.test_directory) as tmp_dir:

From abaf61d05944869c0eceb587a280aacd67bad12e Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Thu, 23 Mar 2023 23:30:03 +0100
Subject: [PATCH 067/296] Improve detectron2 support (#16011)

* Improve op support for detectron mask rcnn

* Initial commit

* Fix for reading processed list

* Format code

* Cleanup

* cleanup

* Cleanup

* cleanup test

* Add comment

* Add rt_info

* fix type

* More fixes for detectron

* Fix build

* Add tests for if

* Revert changes in index

* Add comment

* Fix test

* Fix get_axes_range

* Add tests and fix if type alignment

* Fix code style

---------

Co-authored-by: Mateusz <mateusz.mikolajczyk@intel.com>
---
 src/frontends/pytorch/src/frontend.cpp        |  2 +-
 src/frontends/pytorch/src/op/full.cpp         |  2 +-
 src/frontends/pytorch/src/op/if.cpp           | 27 +++++++++++++
 src/frontends/pytorch/src/op/select.cpp       | 26 +++---------
 .../src/transforms/listconstruct_replacer.cpp |  6 ++-
 .../min_max_prim_list_construct_replacer.cpp  |  3 +-
 .../transforms/prim_list_unpack_replacer.cpp  | 21 +++++-----
 src/frontends/pytorch/src/utils.cpp           |  8 ++--
 .../layer_tests/pytorch_tests/test_argsort.py |  2 +-
 tests/layer_tests/pytorch_tests/test_if.py    | 40 +++++++++++++++++++
 tests/layer_tests/pytorch_tests/test_split.py | 32 ++++++++++++++-
 11 files changed, 128 insertions(+), 41 deletions(-)
 create mode 100644 tests/layer_tests/pytorch_tests/test_if.py

diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 3596e91b9d8b36..685b14c157d3fe 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -116,7 +116,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
 
     // Usually if nn.Module.forward is given as a source model for conversion, there is the first Parameter
     // that represents original `self` argument in forward(self, ...). `self` shouldn't play any role in model
-    // inference if model is completelly frozed and all methods are inlined. So we check if it doesn't have any
+    // inference if model is completely frozen and all methods are inlined. So we check if it doesn't have any
     // consumers in the finally converted model and remove this parameter. This parameter should have index 0.
     if (model->get_parameters().size() > 0) {
         auto self = model->get_parameters()[0];
diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp
index bbb7f98022f42b..cf60d096555007 100644
--- a/src/frontends/pytorch/src/op/full.cpp
+++ b/src/frontends/pytorch/src/op/full.cpp
@@ -176,7 +176,7 @@ OutputVector translate_empty(const NodeContext& context) {
     // side, so just skip these parameters
     num_inputs_check(context, 1, 6);
     auto sizes = context.get_input(0);
-    // In OV uninitialised data is not supported, so we create a tensor filled with zeros with a given shape and type.
+    // In OV uninitialized data is not supported, so we create a tensor filled with zeros with a given shape and type.
     auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
     int dtype_id = 1;
     Output<Node> empty;
diff --git a/src/frontends/pytorch/src/op/if.cpp b/src/frontends/pytorch/src/op/if.cpp
index 7fb3ecce123a26..77015fb1deea6b 100644
--- a/src/frontends/pytorch/src/op/if.cpp
+++ b/src/frontends/pytorch/src/op/if.cpp
@@ -13,6 +13,31 @@ namespace frontend {
 namespace pytorch {
 namespace op {
 
+namespace {
+// TODO: Ticket 106627. This is a WA and will work only if both branches of if will eventually go to the operation that
+// will have same output type for both types
+void align_result_types(const NodeContext& context,
+                        std::shared_ptr<opset10::Result> r1,
+                        std::shared_ptr<opset10::Result> r2) {
+    auto r1_tensor = r1->input_value(0);
+    auto r2_tensor = r2->input_value(0);
+    auto r1_type = r1_tensor.get_element_type();
+    auto r2_type = r2_tensor.get_element_type();
+    if (r1_type.is_dynamic() || r2_type.is_dynamic())
+        return;
+    element::Type merged_type;
+    if (!element::Type::merge(merged_type, r1_type, r2_type)) {
+        if (r1_type.bitwidth() >= r2_type.bitwidth()) {
+            auto convert = std::make_shared<opset10::Convert>(r2_tensor, r1_type);
+            r2->set_argument(0, convert);
+        } else {
+            auto convert = std::make_shared<opset10::Convert>(r1_tensor, r2_type);
+            r1->set_argument(0, convert);
+        }
+    }
+}
+}  // namespace
+
 OutputVector translate_if(const NodeContext& context) {
     auto if_node = std::make_shared<opset10::If>(context.get_input(0));
     context.mark_node(if_node);
@@ -62,6 +87,7 @@ OutputVector translate_if(const NodeContext& context) {
     FRONT_END_OP_CONVERSION_CHECK(then_results.size() >= num_outs && else_results.size() >= num_outs,
                                   "Else or then body have less outputs than prim::If requires.");
     for (size_t i = 0; i < num_outs; i++) {
+        align_result_types(context, then_results[i], else_results[i]);
         res.push_back(if_node->set_output(then_results[i], else_results[i]));
     }
     // Each body can have mutated outputs that are not included into pytorch node outputs.
@@ -136,6 +162,7 @@ OutputVector translate_if(const NodeContext& context) {
         }
     }
     for (const auto& output_idx : extra_output_idxs) {
+        align_result_types(context, extra_then_body_results.at(output_idx), extra_else_body_results.at(output_idx));
         context.add_tensor_to_context(
             output_idx,
             if_node->set_output(extra_then_body_results.at(output_idx), extra_else_body_results.at(output_idx)));
diff --git a/src/frontends/pytorch/src/op/select.cpp b/src/frontends/pytorch/src/op/select.cpp
index ea5255f2410ffa..7cd898fdf223b5 100644
--- a/src/frontends/pytorch/src/op/select.cpp
+++ b/src/frontends/pytorch/src/op/select.cpp
@@ -5,11 +5,7 @@
 #include "openvino/op/select.hpp"
 
 #include "openvino/frontend/pytorch/node_context.hpp"
-#include "openvino/op/add.hpp"
-#include "openvino/op/constant.hpp"
-#include "openvino/op/less.hpp"
-#include "openvino/op/reshape.hpp"
-#include "openvino/op/slice.hpp"
+#include "openvino/op/gather.hpp"
 #include "openvino/op/squeeze.hpp"
 #include "utils.hpp"
 
@@ -21,22 +17,12 @@ namespace op {
 using namespace ov::op;
 
 OutputVector translate_select(const NodeContext& context) {
+    // aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a)
     num_inputs_check(context, 3, 3);
-    auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
-    auto const_minus_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
-    auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
-
-    auto input_tensor = context.get_input(0);
-    auto dim = context.mark_node(std::make_shared<v1::Reshape>(context.get_input(1), const_1, false));
-    auto start = context.mark_node(std::make_shared<v1::Reshape>(context.get_input(2), const_1, false));
-
-    auto less = context.mark_node(std::make_shared<v1::Less>(start, const_0));
-    auto const_1_signed = context.mark_node(std::make_shared<v1::Select>(less, const_minus_1, const_1));
-    auto stop = context.mark_node(std::make_shared<v1::Add>(start, const_1_signed));
-
-    auto slice_node = context.mark_node(std::make_shared<v8::Slice>(input_tensor, start, stop, const_1_signed, dim));
-
-    return {context.mark_node(std::make_shared<v0::Squeeze>(slice_node, dim))};
+    auto data = context.get_input(0);
+    auto dim = context.get_input(1);
+    auto index = context.get_input(2);
+    return {context.mark_node(std::make_shared<v8::Gather>(data, index, dim))};
 };
 
 }  // namespace op
diff --git a/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp b/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp
index a5496501dba283..6b1792f7a63e0d 100644
--- a/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp
@@ -17,6 +17,7 @@
 #include "openvino/op/tile.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/util/framework_node.hpp"
+#include "openvino/op/variadic_split.hpp"
 #include "openvino/pass/pattern/matcher.hpp"
 #include "openvino/pass/pattern/op/or.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
@@ -49,6 +50,8 @@ ListConstructReplacer::ListConstructReplacer() {
     auto tile_op = pattern::wrap_type<v0::Tile>({pattern::any_input(), list});
     // replace aten::permute(tensor, prim::ListConstruct)
     auto transpose_op = pattern::wrap_type<v1::Transpose>({pattern::any_input(), list});
+    // aten::split_with_sizes case
+    auto vsplit_op = pattern::wrap_type<v1::VariadicSplit>({pattern::any_input(), pattern::any_input(), list});
     auto lc_pattern = std::make_shared<pattern::op::Or>(OutputVector{reshape_op,
                                                                      roll_op,
                                                                      broadcast_op,
@@ -57,7 +60,8 @@ ListConstructReplacer::ListConstructReplacer() {
                                                                      equal_op,
                                                                      select_op,
                                                                      tile_op,
-                                                                     transpose_op});
+                                                                     transpose_op,
+                                                                     vsplit_op});
 
     ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
         auto& pattern_map = m.get_pattern_value_map();
diff --git a/src/frontends/pytorch/src/transforms/min_max_prim_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/min_max_prim_list_construct_replacer.cpp
index eed50b174f263f..d4602ee162cbc4 100644
--- a/src/frontends/pytorch/src/transforms/min_max_prim_list_construct_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/min_max_prim_list_construct_replacer.cpp
@@ -49,7 +49,8 @@ MinMaxPrimListConstructReplacer::MinMaxPrimListConstructReplacer() {
             auto step = std::make_shared<ov::op::v0::Constant>(element::i32, Shape{}, 1);
             auto shape = std::make_shared<ov::op::v3::ShapeOf>(input, element::i32);
             auto rank = std::make_shared<ov::op::v3::ShapeOf>(shape, element::i32);
-            auto reduced_rank = std::make_shared<ov::op::v0::Squeeze>(rank);
+            auto axis_0 = ov::op::v0::Constant::create(element::i32, Shape{}, {0});
+            auto reduced_rank = std::make_shared<ov::op::v0::Squeeze>(rank, axis_0);
             auto axes = std::make_shared<ov::op::v4::Range>(start, reduced_rank, step, element::i32);
             std::shared_ptr<Node> reduce_op;
             if (!is_min) {
diff --git a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
index a4ba9a8b3cd4c3..cb7704a99a6dea 100644
--- a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
@@ -33,6 +33,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() {
             if (rank.is_dynamic()) {
                 return false;
             }
+            std::shared_ptr<Node> split;
             if (rank.get_length() == 0) {
                 // Create split_lenghts tensor from split_size int,
                 // allow for last chunk to be smaller if data is not equally divisible.
@@ -45,18 +46,17 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() {
                 auto split_lenghts_m_1 = std::make_shared<opset10::Tile>(split_size, num_out_m_1);
                 NodeVector concat_inputs{split_lenghts_m_1, const_neg_1};
                 auto split_lenghts = std::make_shared<opset10::Concat>(concat_inputs, 0);
-                auto split = std::make_shared<opset10::VariadicSplit>(torch_split->get_input_source_output(0),
-                                                                      torch_split->get_input_source_output(2),
-                                                                      split_lenghts);
-                copy_runtime_info({list_unpack, input_node}, split);
-                replace_node(list_unpack, split);
+                split = std::make_shared<opset10::VariadicSplit>(torch_split->get_input_source_output(0),
+                                                                 torch_split->get_input_source_output(2),
+                                                                 split_lenghts);
             } else {
-                auto split = std::make_shared<opset10::VariadicSplit>(torch_split->get_input_source_output(0),
-                                                                      torch_split->get_input_source_output(2),
-                                                                      torch_split->get_input_source_output(1));
-                copy_runtime_info({list_unpack, input_node}, split);
-                replace_node(list_unpack, split);
+                split = std::make_shared<opset10::VariadicSplit>(torch_split->get_input_source_output(0),
+                                                                 torch_split->get_input_source_output(2),
+                                                                 torch_split->get_input_source_output(1));
             }
+            copy_runtime_info({list_unpack, input_node}, split);
+            split->set_friendly_name(input_node->get_friendly_name());
+            replace_node(list_unpack, split);
 
             return true;
         }
@@ -67,6 +67,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() {
                                                                   split_with_sizes->get_input_source_output(1));
 
             copy_runtime_info({list_unpack, input_node}, split);
+            split->set_friendly_name(input_node->get_friendly_name());
             replace_node(list_unpack, split);
 
             return true;
diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp
index bdae3e9e75e397..e9c67d73f540bd 100644
--- a/src/frontends/pytorch/src/utils.cpp
+++ b/src/frontends/pytorch/src/utils.cpp
@@ -66,7 +66,8 @@ std::tuple<Output<Node>, Output<Node>> get_shape_rank(const NodeContext& context
     auto shape = context.mark_node(std::make_shared<opset10::ShapeOf>(x, output_type));
     Output<Node> rank = context.mark_node(std::make_shared<opset10::ShapeOf>(shape, output_type));
     if (as_scalar) {
-        rank = context.mark_node(std::make_shared<opset10::Squeeze>(rank));
+        auto axis_0 = context.mark_node(opset10::Constant::create(output_type, Shape{}, {0}));
+        rank = context.mark_node(std::make_shared<opset10::Squeeze>(rank, axis_0));
     }
     return std::make_tuple(shape, rank);
 }
@@ -110,9 +111,8 @@ std::shared_ptr<Node> get_axes_range(const NodeContext& context, int input_id) {
     auto x = context.get_input(input_id);
     auto start = std::make_shared<opset10::Constant>(element::i32, Shape{}, 0);
     auto step = std::make_shared<opset10::Constant>(element::i32, Shape{}, 1);
-    auto shape = context.mark_node(std::make_shared<opset10::ShapeOf>(x, element::i32));
-    auto rank = context.mark_node(std::make_shared<opset10::ShapeOf>(shape, element::i32));
-    auto reduced_rank = context.mark_node(std::make_shared<opset10::Squeeze>(rank));
+    Output<Node> reduced_rank;
+    std::tie(std::ignore, reduced_rank) = get_shape_rank(context, x, true);
     return context.mark_node(std::make_shared<opset10::Range>(start, reduced_rank, step, element::i32));
 };
 
diff --git a/tests/layer_tests/pytorch_tests/test_argsort.py b/tests/layer_tests/pytorch_tests/test_argsort.py
index c29a5e91ae9aab..e3514d4c0e6117 100644
--- a/tests/layer_tests/pytorch_tests/test_argsort.py
+++ b/tests/layer_tests/pytorch_tests/test_argsort.py
@@ -11,7 +11,7 @@ def not_yet_supported(value):
     return pytest.param(
         value,
         marks = pytest.mark.xfail(
-            reason="Failed due to aten::sargsort not yet supporting stable sorting. Ticket 105242"
+            reason="Failed due to aten::argsort not yet supporting stable sorting. Ticket 105242"
         ),
     )
 
diff --git a/tests/layer_tests/pytorch_tests/test_if.py b/tests/layer_tests/pytorch_tests/test_if.py
new file mode 100644
index 00000000000000..9e18d1d8f3d658
--- /dev/null
+++ b/tests/layer_tests/pytorch_tests/test_if.py
@@ -0,0 +1,40 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import numpy as np
+
+from pytorch_layer_test_class import PytorchLayerTest
+
+
+class TestIf(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(1, 3, 224, 224).astype(np.float32), self.y)
+
+    def create_model(self):
+        import torch
+        import torch.nn.functional as F
+
+        class prim_if(torch.nn.Module):
+            def __init__(self):
+                super(prim_if, self).__init__()
+
+            def forward(self, x, y):
+                if y > 0:
+                    res = x.new_empty((0, 10), dtype=torch.uint8)
+                else:
+                    res = torch.zeros(x.shape[:2], dtype=torch.bool)
+                return res.to(torch.bool)
+
+        ref_net = None
+
+        return prim_if(), ref_net, "prim::If"
+
+    @pytest.mark.parametrize("y", [np.array(1),
+                                   np.array(-1)
+                                   ])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_if(self, y, ie_device, precision, ir_version):
+        self.y = y
+        self._test(*self.create_model(), ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_split.py b/tests/layer_tests/pytorch_tests/test_split.py
index 3328557c2f126d..57627672c5fe5e 100644
--- a/tests/layer_tests/pytorch_tests/test_split.py
+++ b/tests/layer_tests/pytorch_tests/test_split.py
@@ -47,7 +47,7 @@ def forward(self, input):
 
         return aten_split(self.split_param, self.axis), ref_net, "aten::split"
 
-    # Test case - (split_param, axis), always split into 5 due to hardcoded number of outputs in ListUnpack test. 
+    # Test case - (split_param, axis), always split into 5 due to hardcoded number of outputs in ListUnpack test.
     test_cases = [
         (2, 1),
         (45, 2),
@@ -64,7 +64,8 @@ def forward(self, input):
     def test_split_getitem(self, params, getitem, ie_device, precision, ir_version):
         (self.split_param, self.axis) = params
         self.getitem = getitem
-        self._test(*self.create_model_split_getitem(), ie_device, precision, ir_version)
+        self._test(*self.create_model_split_getitem(),
+                   ie_device, precision, ir_version)
 
     @pytest.mark.parametrize("params", test_cases)
     @pytest.mark.nightly
@@ -74,3 +75,30 @@ def test_split_listunpack(self, params, ie_device, precision, ir_version):
         self._test(
             *self.create_model_split_listunpack(), ie_device, precision, ir_version
         )
+
+
+class TestSplitWithSizes(PytorchLayerTest):
+    def _prepare_input(self):
+        import numpy as np
+        return (np.random.randn(20).astype(np.float32),np.random.randn(20).astype(np.float32))
+
+    def create_model(self):
+        import torch
+
+        class aten_split_with_sizes(torch.nn.Module):
+            def __init__(self):
+                super(aten_split_with_sizes, self).__init__()                
+                #self.sizes = 20
+
+            def forward(self, x, y):
+                return x.split([y.shape[0]], dim=0)
+
+        ref_net = None
+
+        return aten_split_with_sizes(), ref_net, ["aten::split_with_sizes", "prim::ListConstruct"]
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_relu(self, ie_device, precision, ir_version):
+        self._test(*self.create_model(),
+                   ie_device, precision, ir_version, trace_model=True)

From 1b89ecdbae4ad9189e24f4a8146e29614f521e7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Fri, 24 Mar 2023 03:06:22 +0100
Subject: [PATCH 068/296] Interpolate v11 usage in ONNX FE (#16463)

---
 .../onnx/frontend/src/default_opset.hpp       |  4 +-
 .../onnx/frontend/src/op/hardmax.cpp          | 22 +++----
 src/frontends/onnx/frontend/src/op/resize.cpp | 65 ++++++-------------
 src/frontends/onnx/frontend/src/op/topk.cpp   | 33 +++++-----
 .../onnx/frontend/src/op/upsample.cpp         | 50 ++++----------
 .../src/utils/arg_min_max_factory.cpp         | 20 +++---
 .../onnx/tests/runtime/ie/unit_test.manifest  |  1 +
 .../runtime/interpreter/unit_test.manifest    |  3 -
 8 files changed, 73 insertions(+), 125 deletions(-)

diff --git a/src/frontends/onnx/frontend/src/default_opset.hpp b/src/frontends/onnx/frontend/src/default_opset.hpp
index 2044f25dac179a..f286ef62fd7fc5 100644
--- a/src/frontends/onnx/frontend/src/default_opset.hpp
+++ b/src/frontends/onnx/frontend/src/default_opset.hpp
@@ -2,10 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/opsets/opset10.hpp"
+#include "ngraph/opsets/opset11.hpp"
 
 namespace ngraph {
 namespace onnx_import {
-namespace default_opset = ngraph::opset10;
+namespace default_opset = ngraph::opset11;
 }
 }  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp
index 5e726e2458b55d..eb9b421cd71ed6 100644
--- a/src/frontends/onnx/frontend/src/op/hardmax.cpp
+++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp
@@ -4,8 +4,6 @@
 
 #include "op/hardmax.hpp"
 
-#include <openvino/opsets/opset11.hpp>
-
 #include "exceptions.hpp"
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/op/one_hot.hpp"
@@ -39,11 +37,11 @@ OutputVector hardmax(const Node& node) {
 
     const auto indices_axis = 1;
     const auto topk =
-        std::make_shared<ov::opset11::TopK>(coerced_tensor,
-                                            default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
-                                            indices_axis,
-                                            ov::opset11::TopK::Mode::MAX,
-                                            ov::opset11::TopK::SortType::NONE);
+        std::make_shared<default_opset::TopK>(coerced_tensor,
+                                              default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
+                                              indices_axis,
+                                              default_opset::TopK::Mode::MAX,
+                                              default_opset::TopK::SortType::NONE);
 
     const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
     const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
@@ -73,11 +71,11 @@ OutputVector hardmax(const Node& node) {
     row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
 
     const auto topk =
-        std::make_shared<ov::opset11::TopK>(input,
-                                            default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
-                                            axis,
-                                            ov::opset11::TopK::Mode::MAX,
-                                            ov::opset11::TopK::SortType::NONE);
+        std::make_shared<default_opset::TopK>(input,
+                                              default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
+                                              axis,
+                                              default_opset::TopK::Mode::MAX,
+                                              default_opset::TopK::SortType::NONE);
 
     const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
     const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
diff --git a/src/frontends/onnx/frontend/src/op/resize.cpp b/src/frontends/onnx/frontend/src/op/resize.cpp
index f81f8a8eece512..cff54093cecd5c 100644
--- a/src/frontends/onnx/frontend/src/op/resize.cpp
+++ b/src/frontends/onnx/frontend/src/op/resize.cpp
@@ -24,12 +24,12 @@ static const std::unordered_set<std::string> supported_transforms = {"half_pixel
                                                                      "asymmetric",
                                                                      "tf_half_pixel_for_nn"};
 
-using InterpolateMode = ngraph::op::v4::Interpolate::InterpolateMode;
+using InterpolateMode = default_opset::Interpolate::InterpolateMode;
 static const std::map<std::string, int> interp_mode_map = {{"nearest", static_cast<int>(InterpolateMode::NEAREST)},
                                                            {"linear", static_cast<int>(InterpolateMode::LINEAR_ONNX)},
                                                            {"cubic", static_cast<int>(InterpolateMode::CUBIC)}};
 
-using Transform_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode;
+using Transform_mode = default_opset::Interpolate::CoordinateTransformMode;
 static const std::map<std::string, int> transform_mode_map = {
     {"half_pixel", static_cast<int>(Transform_mode::HALF_PIXEL)},
     {"pytorch_half_pixel", static_cast<int>(Transform_mode::PYTORCH_HALF_PIXEL)},
@@ -37,7 +37,7 @@ static const std::map<std::string, int> transform_mode_map = {
     {"asymmetric", static_cast<int>(Transform_mode::ASYMMETRIC)},
     {"tf_half_pixel_for_nn", static_cast<int>(Transform_mode::TF_HALF_PIXEL_FOR_NN)}};
 
-using Nearest_mode = ngraph::op::v4::Interpolate::NearestMode;
+using Nearest_mode = default_opset::Interpolate::NearestMode;
 static const std::map<std::string, int> nearest_mode_map = {
     {"round_prefer_floor", static_cast<int>(Nearest_mode::ROUND_PREFER_FLOOR)},
     {"round_prefer_ceil", static_cast<int>(Nearest_mode::ROUND_PREFER_CEIL)},
@@ -57,9 +57,9 @@ static int mode_as_int(const std::map<std::string, int>& converting_map, const s
     return result;
 }
 
-using InterpolateV4Attrs = ngraph::op::v4::Interpolate::InterpolateAttrs;
+using InterpolateAttrs = default_opset::Interpolate::InterpolateAttrs;
 
-InterpolateV4Attrs get_resize_attrs(const onnx_import::Node& node) {
+InterpolateAttrs get_resize_attrs(const onnx_import::Node& node) {
     auto get_str_attr = [&node](const std::string& name, const std::string& default_value) {
         return node.get_attribute_value<std::string>(name, default_value);
     };
@@ -98,43 +98,17 @@ InterpolateV4Attrs get_resize_attrs(const onnx_import::Node& node) {
                          supported_modes_str);
     }
 
-    InterpolateV4Attrs attrs;
+    InterpolateAttrs attrs;
     attrs.mode = static_cast<InterpolateMode>(mode_as_int(interp_mode_map, mode));
     attrs.coordinate_transformation_mode = static_cast<Transform_mode>(mode_as_int(transform_mode_map, transform_mode));
     attrs.nearest_mode = static_cast<Nearest_mode>(mode_as_int(nearest_mode_map, nearest_mode));
     attrs.antialias = false;
     attrs.cube_coeff = node.get_attribute_value<float>("cubic_coeff_a", -0.75);
-
-    auto zero_pad = std::vector<size_t>(1, 0);
-
-    attrs.pads_begin = zero_pad;
-    attrs.pads_end = zero_pad;
+    attrs.pads_begin = {0};
+    attrs.pads_end = {0};
 
     return attrs;
 }
-
-std::shared_ptr<ngraph::Node> calculate_output_shape_based_on_scales(const Output<ngraph::Node>& data,
-                                                                     const Output<ngraph::Node>& scales) {
-    const auto shape_of_data = std::make_shared<default_opset::Convert>(std::make_shared<default_opset::ShapeOf>(data),
-                                                                        scales.get_element_type());
-    const auto multiply = std::make_shared<default_opset::Multiply>(shape_of_data, scales);
-    const auto output_shape = std::make_shared<default_opset::Convert>(multiply, ngraph::element::i64);
-
-    return output_shape;
-}
-
-std::shared_ptr<ngraph::Node> calculate_scales_based_on_sizes(const Output<ngraph::Node>& data,
-                                                              const Output<ngraph::Node>& sizes) {
-    const float epsilon = 1.0e-5f;
-    const auto shape_of_data =
-        std::make_shared<default_opset::Convert>(std::make_shared<default_opset::ShapeOf>(data), ngraph::element::f32);
-    const auto converted_sizes = std::make_shared<default_opset::Convert>(sizes, ngraph::element::f32);
-    const auto divide = std::make_shared<default_opset::Divide>(converted_sizes, shape_of_data);
-    const auto eps_node = std::make_shared<default_opset::Constant>(ngraph::element::f32, Shape{}, epsilon);
-    const auto scales = std::make_shared<default_opset::Add>(divide, eps_node);
-
-    return scales;
-}
 }  // namespace
 
 namespace set_11 {
@@ -149,16 +123,12 @@ OutputVector resize(const onnx_import::Node& node) {
     if (inputs.size() == 4 && !ngraph::op::is_null(inputs[3])) {
         attrs.shape_calculation_mode = default_opset::Interpolate::ShapeCalcMode::SIZES;
         const auto& sizes = inputs.at(3);
-        const auto scales = calculate_scales_based_on_sizes(data, sizes);
-
-        return {std::make_shared<default_opset::Interpolate>(data, sizes, scales, attrs)};
+        return {std::make_shared<default_opset::Interpolate>(data, sizes, attrs)};
+    } else {
+        attrs.shape_calculation_mode = default_opset::Interpolate::ShapeCalcMode::SCALES;
+        const auto& scales = inputs.at(2);
+        return {std::make_shared<default_opset::Interpolate>(data, scales, attrs)};
     }
-
-    attrs.shape_calculation_mode = default_opset::Interpolate::ShapeCalcMode::SCALES;
-
-    const auto& scales = inputs.at(2);
-    const auto output_shape = calculate_output_shape_based_on_scales(data, scales);
-    return {std::make_shared<default_opset::Interpolate>(data, output_shape, scales, attrs)};
 }
 }  // namespace set_11
 
@@ -169,6 +139,7 @@ OutputVector resize(const onnx_import::Node& node) {
     const auto& scales = inputs.at(1);
 
     auto attrs = get_resize_attrs(node);
+    attrs.shape_calculation_mode = default_opset::Interpolate::ShapeCalcMode::SIZES;
 
     if (attrs.mode == InterpolateMode::NEAREST) {
         attrs.nearest_mode = Nearest_mode::FLOOR;
@@ -177,8 +148,12 @@ OutputVector resize(const onnx_import::Node& node) {
         attrs.coordinate_transformation_mode = Transform_mode::ASYMMETRIC;
     }
 
-    const auto output_shape = calculate_output_shape_based_on_scales(data, scales);
-    return {std::make_shared<default_opset::Interpolate>(data, output_shape, scales, attrs)};
+    const auto shape_of_data = std::make_shared<default_opset::Convert>(std::make_shared<default_opset::ShapeOf>(data),
+                                                                        scales.get_element_type());
+    const auto multiply = std::make_shared<default_opset::Multiply>(shape_of_data, scales);
+    const auto output_shape = std::make_shared<default_opset::Convert>(multiply, ngraph::element::i64);
+
+    return {std::make_shared<default_opset::Interpolate>(data, output_shape, attrs)};
 }
 
 }  // namespace set_1
diff --git a/src/frontends/onnx/frontend/src/op/topk.cpp b/src/frontends/onnx/frontend/src/op/topk.cpp
index dc33b103613df2..df884eaa54c0a9 100644
--- a/src/frontends/onnx/frontend/src/op/topk.cpp
+++ b/src/frontends/onnx/frontend/src/op/topk.cpp
@@ -6,7 +6,6 @@
 
 #include <cstdint>
 #include <memory>
-#include <openvino/opsets/opset11.hpp>
 
 #include "default_opset.hpp"
 #include "ngraph/node.hpp"
@@ -38,12 +37,13 @@ OutputVector topk(const Node& node) {
     const auto k_node = node.get_attribute_as_constant<std::int64_t>("k");
     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
 
-    std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
-                                                                              k_node,
-                                                                              axis,
-                                                                              ov::opset11::TopK::Mode::MAX,
-                                                                              ov::opset11::TopK::SortType::SORT_VALUES,
-                                                                              element::i64);
+    std::shared_ptr<ngraph::Node> top_k =
+        std::make_shared<default_opset::TopK>(data,
+                                              k_node,
+                                              axis,
+                                              default_opset::TopK::Mode::MAX,
+                                              default_opset::TopK::SortType::SORT_VALUES,
+                                              element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
@@ -55,12 +55,13 @@ OutputVector topk(const Node& node) {
     auto k = get_k(node);
     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
 
-    std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
-                                                                              k,
-                                                                              axis,
-                                                                              ov::opset11::TopK::Mode::MAX,
-                                                                              ov::opset11::TopK::SortType::SORT_VALUES,
-                                                                              element::i64);
+    std::shared_ptr<ngraph::Node> top_k =
+        std::make_shared<default_opset::TopK>(data,
+                                              k,
+                                              axis,
+                                              default_opset::TopK::Mode::MAX,
+                                              default_opset::TopK::SortType::SORT_VALUES,
+                                              element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
@@ -78,13 +79,13 @@ OutputVector topk(const Node& node) {
     const auto sorted = node.get_attribute_value<std::int64_t>("sorted", 1);
 
     // Map attribute values to nGraph enums
-    const auto sort_type = sorted ? ov::opset11::TopK::SortType::SORT_VALUES : ov::opset11::TopK::SortType::NONE;
+    const auto sort_type = sorted ? default_opset::TopK::SortType::SORT_VALUES : default_opset::TopK::SortType::NONE;
 
     const auto compute_max = static_cast<bool>(largest);
-    const auto mode = compute_max ? ov::opset11::TopK::Mode::MAX : ov::opset11::TopK::Mode::MIN;
+    const auto mode = compute_max ? default_opset::TopK::Mode::MAX : default_opset::TopK::Mode::MIN;
 
     std::shared_ptr<ngraph::Node> top_k =
-        std::make_shared<ov::opset11::TopK>(data, k, axis, mode, sort_type, element::i64);
+        std::make_shared<default_opset::TopK>(data, k, axis, mode, sort_type, element::i64);
 
     return {top_k->output(0), top_k->output(1)};
 }
diff --git a/src/frontends/onnx/frontend/src/op/upsample.cpp b/src/frontends/onnx/frontend/src/op/upsample.cpp
index 83766e3635138e..2da7bd407c7b9a 100644
--- a/src/frontends/onnx/frontend/src/op/upsample.cpp
+++ b/src/frontends/onnx/frontend/src/op/upsample.cpp
@@ -38,34 +38,21 @@ void check_mode_support(const onnx_import::Node& node, const std::string& mode,
 }
 
 default_opset::Interpolate::InterpolateAttrs get_attributes(const std::string& mode) {
-    using InterpolateMode = default_opset::Interpolate::InterpolateMode;
-    using Transform_mode = default_opset::Interpolate::CoordinateTransformMode;
-    using ShapeCalcMode = default_opset::Interpolate::ShapeCalcMode;
+    const auto interpolate_mode = mode == "linear" || mode == "bilinear"
+                                      ? default_opset::Interpolate::InterpolateMode::LINEAR_ONNX
+                                      : default_opset::Interpolate::InterpolateMode::NEAREST;
 
-    const auto interpolate_mode =
-        (mode == "linear" || mode == "bilinear" ? InterpolateMode::LINEAR_ONNX : InterpolateMode::NEAREST);
+    auto attrs = default_opset::Interpolate::InterpolateAttrs(interpolate_mode,
+                                                              default_opset::Interpolate::ShapeCalcMode::SCALES,
+                                                              {0},
+                                                              {0});
 
-    std::vector<size_t> pad{0};
-    auto attrs = default_opset::Interpolate::InterpolateAttrs(interpolate_mode, ShapeCalcMode::SCALES, pad, pad);
-
-    if (attrs.mode == InterpolateMode::LINEAR_ONNX)
-        attrs.coordinate_transformation_mode = Transform_mode::ASYMMETRIC;
+    if (attrs.mode == default_opset::Interpolate::InterpolateMode::LINEAR_ONNX) {
+        attrs.coordinate_transformation_mode = default_opset::Interpolate::CoordinateTransformMode::ASYMMETRIC;
+    }
 
     return attrs;
 }
-
-OutputVector create_upsample_subgraph(const Output<ngraph::Node>& data,
-                                      const Output<ngraph::Node>& scales,
-                                      const std::string& mode) {
-    const auto shape_of_data =
-        std::make_shared<default_opset::Convert>(std::make_shared<default_opset::ShapeOf>(data), ngraph::element::f32);
-    const auto multiply = std::make_shared<default_opset::Multiply>(shape_of_data, scales);
-    const auto output_shape = std::make_shared<default_opset::Convert>(std::make_shared<default_opset::Floor>(multiply),
-                                                                       ngraph::element::i64);
-
-    return {std::make_shared<default_opset::Interpolate>(data, output_shape, scales, get_attributes(mode))};
-}
-
 }  // namespace
 
 namespace set_1 {
@@ -89,7 +76,7 @@ OutputVector upsample(const onnx_import::Node& node) {
 
     const auto scales_const = default_opset::Constant::create(ngraph::element::f32, Shape({scales.size()}), scales);
 
-    return create_upsample_subgraph(data, scales_const, mode);
+    return std::make_shared<default_opset::Interpolate>(data, scales_const, get_attributes(mode))->outputs();
 }
 
 }  // namespace set_1
@@ -110,7 +97,7 @@ OutputVector upsample(const onnx_import::Node& node) {
 
     const auto scales_const = default_opset::Constant::create(ngraph::element::f32, Shape({scales.size()}), scales);
 
-    return create_upsample_subgraph(data, scales_const, mode);
+    return std::make_shared<default_opset::Interpolate>(data, scales_const, get_attributes(mode))->outputs();
 }
 
 }  // namespace set_7
@@ -120,17 +107,8 @@ OutputVector upsample(const onnx_import::Node& node) {
     const auto mode = node.get_attribute_value<std::string>("mode", "nearest");
     check_mode_support(node, mode, version_9);
 
-    const auto inputs = node.get_ng_inputs();
-    const auto& data = inputs.at(0);
-    const auto& scales = inputs.at(1);
-
-    const auto& data_shape = data.get_partial_shape();
-    const auto& scales_shape = scales.get_partial_shape();
-    CHECK_VALID_NODE(node,
-                     (scales_shape.is_static() || data_shape.rank().is_static()),
-                     " Data rank or shape of Scales input is required to be static.");
-
-    return create_upsample_subgraph(data, scales, mode);
+    const auto& inputs = node.get_ng_inputs();
+    return std::make_shared<default_opset::Interpolate>(inputs.at(0), inputs.at(1), get_attributes(mode))->outputs();
 }
 
 }  // namespace set_9
diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
index 9807367273e46f..d5a3fdb827061f 100644
--- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
+++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
@@ -4,8 +4,6 @@
 
 #include "utils/arg_min_max_factory.hpp"
 
-#include <openvino/opsets/opset11.hpp>
-
 #include "default_opset.hpp"
 #include "ngraph/opsets/opset1.hpp"
 #include "ngraph/validation_util.hpp"
@@ -20,14 +18,14 @@ ArgMinMaxFactory::ArgMinMaxFactory(const Node& node)
       m_select_last_index{node.get_attribute_value<std::int64_t>("select_last_index", 0)} {}
 
 std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_max() const {
-    return make_topk_subgraph(ov::opset11::TopK::Mode::MAX);
+    return make_topk_subgraph(default_opset::TopK::Mode::MAX);
 }
 
 std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_min() const {
-    return make_topk_subgraph(ov::opset11::TopK::Mode::MIN);
+    return make_topk_subgraph(default_opset::TopK::Mode::MIN);
 }
 
-std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(ov::opset11::TopK::Mode mode) const {
+std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset::TopK::Mode mode) const {
     const auto k_node = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
 
     if (m_select_last_index == 1) {
@@ -61,11 +59,11 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(ov::opset11::
         const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis});
         const auto reverse = std::make_shared<opset1::Reverse>(m_input_node, axis_node, opset1::Reverse::Mode::INDEX);
 
-        const auto topk = std::make_shared<ov::opset11::TopK>(reverse,
-                                                              k_node,
-                                                              normalized_axis,
-                                                              mode,
-                                                              ov::opset11::TopK::SortType::NONE);
+        const auto topk = std::make_shared<default_opset::TopK>(reverse,
+                                                                k_node,
+                                                                normalized_axis,
+                                                                mode,
+                                                                default_opset::TopK::SortType::NONE);
 
         const auto data_shape = std::make_shared<default_opset::ShapeOf>(m_input_node);
         const auto dims_on_axis = std::make_shared<default_opset::Gather>(
@@ -90,7 +88,7 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(ov::opset11::
     }
 
     const auto topk =
-        std::make_shared<ov::opset11::TopK>(m_input_node, k_node, m_axis, mode, ov::opset11::TopK::SortType::NONE);
+        std::make_shared<default_opset::TopK>(m_input_node, k_node, m_axis, mode, default_opset::TopK::SortType::NONE);
 
     const auto result = std::make_shared<default_opset::Convert>(topk->output(1), element::i64);
 
diff --git a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest
index 6f2b254f64d201..5a134066a6ce0d 100644
--- a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest
+++ b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest
@@ -433,3 +433,4 @@ IE_CPU.onnx_model_top_k_repeating_unsorted
 
 # Accuracy regression - Ticket 105909
 IE_CPU.onnx_model_attention_qkv_hidden_sizes
+
diff --git a/src/frontends/onnx/tests/runtime/interpreter/unit_test.manifest b/src/frontends/onnx/tests/runtime/interpreter/unit_test.manifest
index a1da514732a1a2..938e5870d0ea9a 100644
--- a/src/frontends/onnx/tests/runtime/interpreter/unit_test.manifest
+++ b/src/frontends/onnx/tests/runtime/interpreter/unit_test.manifest
@@ -3,9 +3,6 @@ INTERPRETER.onnx_top_k_opset_10
 # Temporarily disabled:
 INTERPRETER.onnx_resize11_scales_nearest_asymmetric_floor_dynamic_sizes
 INTERPRETER.onnx_resize11_up_sizes_cubic_half_pixel_dynamic_sizes
-INTERPRETER.onnx_resize10_up_scales_const_nearest
-INTERPRETER.onnx_resize10_up_scales_const_linear
-INTERPRETER.onnx_resize10_down_scales_const_nearest
 
 # Failed in MacOS:
 INTERPRETER.onnx_resize11_sizes_nearest_asymmetric_floor

From 7601e8a874970f0a1d4189d9d61c99b8182bfb2d Mon Sep 17 00:00:00 2001
From: Georgy Krivoruchko <georgy.krivoruchko@intel.com>
Date: Fri, 24 Mar 2023 07:09:29 +0400
Subject: [PATCH 069/296] Added compilation flag (#15436)

---
 cmake/developer_package/compile_flags/os_flags.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake
index 03564d6b142671..c15d22b3d487a3 100644
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@@ -389,6 +389,8 @@ if(WIN32)
         ie_add_compiler_flags(/Qdiag-disable:161,177,556,1744,1879,2586,2651,3180,11075,15335)
     endif()
 
+	ie_add_compiler_flags(/Qspectre)
+
     # Debug information flags, by default CMake adds /Zi option
     # but provides no way to specify CMAKE_COMPILE_PDB_NAME on root level
     # In order to avoid issues with ninja we are replacing default flag instead of having two of them

From e434c320f5ae384f721633629b1a818e1570d521 Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Fri, 24 Mar 2023 09:08:09 +0400
Subject: [PATCH 070/296] [GPU] Update tuning params of shape agnostic version
 of fully_connected_bf_tiled kernel for dGPUs (#16482)

---
 .../fully_connected_kernel_bf_tiled.cpp             | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
index a4a3f1c94f050f..d7f93eccae523f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -201,12 +201,21 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
         max_tile_ofm *= 2;
 
     if (params.is_shape_agnostic) {
+        // Use special tuning params for Gen12HP dGPUs, since these parameters demonstrate higher performance
+        // due to better HW utilization (reduced TILE_OFM parameter) and better assembler kernel's code
+        // generation (extended TILE_K parameter) for both FP16 and FP32 data types
         if (dtype == Datatype::F16) {
             // tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options)
-            selector.Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 2, 1,  1, EXE_MODE_AGE_BASED));
+            if (params.engineInfo.supports_immad)
+                selector.Case(tune_params(8, 1, 1, 4, 1, 1, EXE_MODE_AGE_BASED));
+            else
+                selector.Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 2, 1,  1, EXE_MODE_AGE_BASED));
         } else if (dtype == Datatype::F32) {
             // tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options)
-            selector.Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 1, 1,  1, EXE_MODE_AGE_BASED));
+            if (params.engineInfo.supports_immad)
+                selector.Case(tune_params(8, 1, 1, 4, 1, 1, EXE_MODE_AGE_BASED));
+            else
+                selector.Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 1, 1,  1, EXE_MODE_AGE_BASED));
         }
     } else {
         if (dtype == Datatype::F16) {

From 8518a3a8e87815899b0cc8e0e071f1c8509f8bfe Mon Sep 17 00:00:00 2001
From: Kelvin Choi <kelvin.choi@intel.com>
Date: Fri, 24 Mar 2023 15:12:12 +0900
Subject: [PATCH 071/296] [GPU] Disable converting gather8 to 7 pass because
 GPU plugin supports gather8:nagative-index feature (#15868)

* Add GatherV7 and gatherV8 for convert_gather_0d pattern

* Add updating output_shape using reorder/reshape for scalar indice instead of using ConvertGather0D pass

* Add WA for NMS-gather8 pattern
---
 .../op_conversions/convert_gather_0d.cpp      |  2 +
 .../intel_gpu/src/plugin/ops/constant.cpp     |  2 +
 .../intel_gpu/src/plugin/ops/gather.cpp       | 60 +++++++++++++++++++
 .../src/plugin/transformations_pipeline.cpp   |  4 +-
 .../single_layer_tests/gather.cpp             | 19 ++++++
 .../skip_tests_config.cpp                     |  2 -
 .../include/single_layer_tests/gather.hpp     |  4 ++
 .../single_layer/gather.hpp                   |  9 +++
 .../src/single_layer/gather.cpp               | 42 +++++++++++++
 9 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp
index eca3823bf9b867..86f10208c0524b 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp
@@ -8,6 +8,8 @@
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
 #include <openvino/opsets/opset1.hpp>
+#include <openvino/opsets/opset7.hpp>
+#include <openvino/opsets/opset8.hpp>
 #include <vector>
 
 #include "itt.hpp"
diff --git a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
index 1714e50e5da7be..e9bd5860f940bf 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@@ -108,6 +108,8 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
             }
             consts[op].needsBatchInterpretation = all_inputs_1d && constDims.size() == 1;
         } else if (ngraph::is_type<ngraph::op::v1::Gather>(outOp) ||
+                   ngraph::is_type<ngraph::op::v7::Gather>(outOp) ||
+                   ngraph::is_type<ngraph::op::v8::Gather>(outOp) ||
                    ngraph::is_type<ngraph::op::v1::Split>(outOp) ||
                    ngraph::is_type<ngraph::op::v1::VariadicSplit>(outOp)) {
             consts[op].needsBatchInterpretation = constDims.size() == 1;
diff --git a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
index addc556369b7f2..c7eebb713d38d5 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
@@ -9,7 +9,9 @@
 
 #include "intel_gpu/primitives/gather.hpp"
 #include "intel_gpu/primitives/reorder.hpp"
+#include "intel_gpu/primitives/reshape.hpp"
 
+using namespace InferenceEngine;
 namespace ov {
 namespace intel_gpu {
 
@@ -46,6 +48,43 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
     // Dynamic path will do shape infer internally, so no need to pass valid out shape for that case
     ov::Shape out_shape = op->get_output_partial_shape(0).is_static() ? op->get_output_shape(0) : ov::Shape{};
 
+    // Update output_shape in case of scalar indice
+    bool need_reshape = false;
+    auto out_shape_original = out_shape;
+    if (!p.use_new_shape_infer() && op->get_output_partial_shape(0).is_static()) {
+        auto input1_shape = op->get_input_shape(1);
+        if (input1_shape.size() == 0 && batch_dim == 0) {
+            need_reshape = true;
+
+            auto new_axis = axis;
+            if (new_axis < 0) {
+                new_axis += op->get_input_shape(0).size();
+            }
+            out_shape.push_back(1);
+            for (int i = out_shape.size() - 1; i > new_axis ; i--) {
+                out_shape[i] = out_shape[i-1];
+            }
+            out_shape[new_axis] = 1;
+        }
+    }
+
+    // WA for NMS->Gather construction. NMS fills part of the output blob by the -1 if these values
+    // must not be taken into account.
+    // CPU also uses this like of WA.
+    if (support_neg_ind) {
+        const auto& rti = op->get_rt_info();
+        const auto& reverse = rti.find("dontReverseIndices");
+        if (reverse != rti.end()) {
+            support_neg_ind = false;
+        }
+    }
+
+    // gather
+    auto reshapeName = layerName + "";
+    if (need_reshape) {
+        layerName = layerName + "_reshape_output";
+    }
+
     auto gatherPrim = cldnn::gather(layerName,
                                     reordered_inputs[0],
                                     reordered_inputs[1],
@@ -55,6 +94,27 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
                                     support_neg_ind);
 
     p.add_primitive(*op, gatherPrim);
+
+    // Add reorder and reshape for scalar indice
+    if (need_reshape) {
+        auto input = inputs[0];
+        input.pid = layerName;
+
+        auto targetFormat = cldnn::format::get_default_format(out_shape_original.size());
+        if (targetFormat.value != cldnn::format::get_default_format(out_shape.size()).value) {
+            auto reorderName = layerName + "_cldnn_in_reorder";
+            auto targetDatatype = cldnn::element_type_to_data_type(op->get_input_element_type(0));
+            auto reorderPrim = cldnn::reorder(reorderName,
+                                              input,
+                                              targetFormat,
+                                              targetDatatype);
+            p.add_primitive(*op, reorderPrim);
+            input.pid = reorderName;
+        }
+
+        auto reshapePrim = cldnn::reshape(reshapeName, input, tensor_from_dims(out_shape_original));
+        p.add_primitive(*op, reshapePrim);
+    }
 }
 
 static void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 78ccd48ab3419a..7ce86295f1c338 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -421,8 +421,10 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
         pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
         pass_config->disable<ov::pass::ConvertSoftMax8ToSoftMax1>();
-        pass_config->enable<ov::pass::ConvertGather8ToGather7>();
         pass_config->disable<ov::pass::ConvertShapeOf3>();
+        pass_config->disable<ov::pass::ConvertGather8ToGather7>();
+        pass_config->disable<ov::pass::ConvertGather7ToGather1>();
+
         pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
 
         if (enableInt8) {
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp
index 3e3270875559e2..84d8e889d1fd02 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp
@@ -78,6 +78,18 @@ const std::vector<std::vector<size_t>> inputShapesAxes4b2 = {
         std::vector<size_t>{2, 3, 7, 8, 4},
 };
 
+const auto GatherIndiceScalar = []() {
+    return testing::Combine(testing::Values(std::vector<size_t>{1, 3, 4, 5}),
+                            testing::Values(std::vector<size_t>{}),
+                            testing::Values(std::tuple<int, int>(2, 0)),
+                            testing::Values(InferenceEngine::Precision::FP32),
+                            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                            testing::Values(InferenceEngine::Layout::ANY),
+                            testing::Values(InferenceEngine::Layout::ANY),
+                            testing::Values(CommonTestUtils::DEVICE_GPU));
+};
+
 const auto GatherAxes4i4b1 = []() {
     return testing::Combine(testing::ValuesIn(inputShapesAxes4b1),
                             testing::ValuesIn(indicesShapes2),
@@ -182,6 +194,13 @@ INSTANTIATE_TEST_SUITE_P(
         Gather8LayerTest::getTestCaseName
 );
 
+INSTANTIATE_TEST_SUITE_P(
+        smoke_GatherIndiceScalar,
+        Gather8IndiceScalarLayerTest,
+        GatherIndiceScalar(),
+        Gather8IndiceScalarLayerTest::getTestCaseName
+);
+
 const std::vector<std::vector<int>> indices = {
         std::vector<int>{0, 3, 2, 1},
 };
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 577dcc7236661d..df4e53897f4329 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -58,8 +58,6 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*Behavior.*OVInferRequestIOTensorTest.*canInferAfterIOBlobReallocation.*)",
             R"(.*Behavior.*OVInferRequestDynamicTests.*InferUpperBoundNetworkAfterIOTensorsReshaping.*)",
             R"(.*(Auto|Multi).*Behavior.*IncorrectConfigTests.*CanNotLoadNetworkWithIncorrectConfig.*)",
-            // TODO: until issue is xxx-59670 is resolved
-            R"(.*Gather8LayerTest.*)",
             // Not implemented yet:
             R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNet.*)",
             R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
index b992387da08e50..f6b8f091bd8125 100644
--- a/src/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
+++ b/src/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
@@ -20,4 +20,8 @@ TEST_P(Gather8LayerTest, CompareWithRefs) {
     Run();
 };
 
+TEST_P(Gather8IndiceScalarLayerTest, CompareWithRefs) {
+    Run();
+};
+
 }  // namespace LayerTestsDefinitions
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
index ce3a4105cc68b6..5dcc649a415ab1 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
@@ -74,4 +74,13 @@ class Gather8LayerTest : public testing::WithParamInterface<gather7ParamsTuple>,
     void SetUp() override;
 };
 
+class Gather8IndiceScalarLayerTest : public testing::WithParamInterface<gather7ParamsTuple>,
+                         virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj);
+
+protected:
+    void SetUp() override;
+};
+
 }  // namespace LayerTestsDefinitions
diff --git a/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp b/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp
index 8ce59926dbc829..123ada7a73e237 100644
--- a/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp
@@ -136,4 +136,46 @@ void Gather8LayerTest::SetUp() {
     function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
 }
 
+std::string Gather8IndiceScalarLayerTest::getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj) {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<int> indices;
+    std::vector<size_t> indicesShape, inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    std::string targetName;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetName) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
+    result << "axis=" << std::get<0>(axis_batchIdx) << "_";
+    result << "batchIdx=" << std::get<1>(axis_batchIdx) << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "outPRC=" << outPrc.name() << "_";
+    result << "inL=" << inLayout << "_";
+    result << "outL=" << outLayout << "_";
+    result << "trgDev=" << targetName << "_";
+    return result.str();
+}
+
+void Gather8IndiceScalarLayerTest::SetUp() {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<size_t> indicesShape;
+    std::vector<size_t> inputShape;
+    InferenceEngine::Precision netPrecision;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam();
+    int axis = std::get<0>(axis_batchIdx);
+    int batchIdx = std::get<1>(axis_batchIdx);
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto functionParams = ngraph::builder::makeParams(ngPrc, { inputShape });
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
+    auto indicesNode = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {inputShape[axis] - 1})->output(0);
+
+    auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis });
+    auto gather = std::make_shared<ngraph::opset8::Gather>(paramOuts[0], indicesNode, axisNode, batchIdx);
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset8::Result>(gather) };
+    function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
+}
+
 }  // namespace LayerTestsDefinitions

From 69cec4a5e275d86b305b22dda7cc586e7aae2e30 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Fri, 24 Mar 2023 10:24:08 +0400
Subject: [PATCH 072/296] py/benchmark_app: fix -hint (#16511)

* py/benchmark_app: fix -hint

Don't warn about values which are explicitly set in -hint.
That aligns C++ and Python implementations.

Ticket 106544

* Remove extra throw

* Fix code style
---
 samples/cpp/benchmark_app/main.cpp                   |  8 ++++----
 .../benchmark_tool/openvino/tools/benchmark/main.py  | 12 +++++++++---
 .../openvino/tools/benchmark/parameters.py           |  2 +-
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 9706012a827b44..99e268dc9dd0e6 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -126,17 +126,17 @@ ov::hint::PerformanceMode get_performance_hint(const std::string& device, const
         supported_properties.end()) {
         if (FLAGS_hint != "") {
             if (FLAGS_hint == "throughput" || FLAGS_hint == "tput") {
-                slog::warn << "Device(" << device << ") performance hint is set to THROUGHPUT" << slog::endl;
                 ov_perf_hint = ov::hint::PerformanceMode::THROUGHPUT;
             } else if (FLAGS_hint == "latency") {
-                slog::warn << "Device(" << device << ") performance hint is set to LATENCY" << slog::endl;
                 ov_perf_hint = ov::hint::PerformanceMode::LATENCY;
             } else if (FLAGS_hint == "cumulative_throughput" || FLAGS_hint == "ctput") {
-                slog::warn << "Device(" << device << ") performance hint is set to CUMULATIVE_THROUGHPUT" << slog::endl;
                 ov_perf_hint = ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT;
             } else if (FLAGS_hint == "none") {
-                slog::warn << "No device(" << device << ") performance hint is set" << slog::endl;
                 ov_perf_hint = ov::hint::PerformanceMode::UNDEFINED;
+            } else {
+                throw std::logic_error(
+                    "Incorrect performance hint. Please set -hint option to"
+                    "`throughput`(tput), `latency', 'cumulative_throughput'(ctput) value or 'none'.");
             }
         } else {
             ov_perf_hint =
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py
index 3fe5a80907ff7a..b308e67b0fbcac 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -113,11 +113,17 @@ def get_performance_hint(device) -> properties.hint.PerformanceMode:
             supported_properties = benchmark.core.get_property(device, properties.supported_properties())
             if properties.hint.performance_mode() in supported_properties:
                 if is_flag_set_in_command_line('hint'):
-                    if args.perf_hint=='none':
-                        logger.warning(f"No device {device} performance hint is set.")
+                    if args.perf_hint == "throughput" or args.perf_hint == "tput":
+                        perf_hint = properties.hint.PerformanceMode.THROUGHPUT
+                    elif args.perf_hint == "latency":
+                        perf_hint = properties.hint.PerformanceMode.LATENCY
+                    elif args.perf_hint == "cumulative_throughput" or args.perf_hint == "ctput":
+                        perf_hint = properties.hint.PerformanceMode.CUMULATIVE_THROUGHPUT
+                    elif args.perf_hint=='none':
                         perf_hint = properties.hint.PerformanceMode.UNDEFINED
                     else:
-                        perf_hint = properties.hint.PerformanceMode(args.perf_hint.upper())
+                        raise RuntimeError("Incorrect performance hint. Please set -hint option to"
+                            "`throughput`(tput), `latency', 'cumulative_throughput'(ctput) value or 'none'.")
                 else:
                     perf_hint = properties.hint.PerformanceMode.THROUGHPUT if benchmark.api_type == "async" else properties.hint.PerformanceMode.LATENCY
                     logger.warning(f"Performance hint was not explicitly specified in command line. " +
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
index 39e30986fcab04..d12ba530a495da 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
@@ -60,7 +60,7 @@ def parse_args():
                            'Default value is CPU. Use \'-d HETERO:<comma separated devices list>\' format to specify HETERO plugin. '
                            'Use \'-d MULTI:<comma separated devices list>\' format to specify MULTI plugin. '
                            'The application looks for a suitable plugin for the specified device.')
-    args.add_argument('-hint', '--perf_hint', type=str, required=False, default='', choices=['throughput', 'cumulative_throughput', 'latency', 'none'],
+    args.add_argument('-hint', '--perf_hint', type=str, required=False, default='', choices=('throughput', 'tput', 'cumulative_throughput', 'ctput', 'latency', 'none'),
                       help='Optional. Performance hint (latency or throughput or cumulative_throughput or none). Performance hint allows the OpenVINO device to select the right model-specific settings.\n'
                             '\'throughput\': device performance mode will be set to THROUGHPUT. \n'
                             '\'cumulative_throughput\': device performance mode will be set to CUMULATIVE_THROUGHPUT. \n'

From 025115f695a520ae3e1125a9df6458bf9be2c230 Mon Sep 17 00:00:00 2001
From: Fang Xu <fang.xu@intel.com>
Date: Fri, 24 Mar 2023 11:57:57 +0530
Subject: [PATCH 073/296] Update prebuilt tbbbind static library for Linux
 (#15832)

* update prebuilt tbbbind static library

* update LICENSE file

* update SHA256

* update prebuilt tbbbind static library for linux

cmake 3.23.2
centos7

https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-2.8.0.tar.gz
./autogen.sh
./configure --enable-static --disable-io --disable-libudev --disable-libxml2 --disable-cairo CFLAGS="-fPIE"
make -j$(proc)
make install prefix=$(pwd)/install

https://github.com/oneapi-src/oneTBB/archive/refs/tags/v2021.7.0.tar.gz
sed -i "s/APPLE\s*OR\s*NOT\s*BUILD_SHARED_LIBS/APPLE/g" CMakeLists.txt
export HWLOC_DIR="${hwloc_root_dir}/hwloc-hwloc-2.8.0/install"
export PKG_CONFIG_PATH="${HWLOC_DIR}/lib/pkgconfig"
export CXXFLAGS="-I ${HWLOC_DIR}/include -L ${HWLOC_DIR}/lib"
~/cmake-3.23.2-linux-x86_64/bin/cmake -DTBB_TEST=OFF -DTBB_BUILD=OFF -DTBBMALLOC_BUILD=OFF -DBUILD_SHARED_LIBS=OFF
make -j$(nproc)

* remove changes for windows

---------

Co-authored-by: Chen Peter <peter.chen@intel.com>
---
 cmake/dependencies.cmake                                 | 5 +++--
 src/inference/src/dev/threading/cpu_streams_executor.cpp | 4 ++++
 .../behavior/ov_executable_network/properties.cpp        | 9 ++++++++-
 .../behavior/ov_plugin/core_integration.cpp              | 4 ++++
 .../behavior/plugin/configuration_tests.cpp              | 9 ++++++++-
 5 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index 0cef392e167f95..eb6fec4cda5197 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -184,10 +184,11 @@ function(ov_download_tbbbind_2_5)
                 USE_NEW_LOCATION TRUE)
     elseif(LINUX AND X86_64)
         RESOLVE_DEPENDENCY(TBBBIND_2_5
-                ARCHIVE_LIN "tbbbind_2_5_static_lin_v2.tgz"
+                ARCHIVE_LIN "tbbbind_2_5_static_lin_v3.tgz"
                 TARGET_PATH "${TEMP}/tbbbind_2_5"
                 ENVIRONMENT "TBBBIND_2_5_ROOT"
-                SHA256 "865e7894c58402233caf0d1b288056e0e6ab2bf7c9d00c9dc60561c484bc90f4")
+                SHA256 "d39deb262c06981b5e2d2e3c593e9fc9be62ce4feb91dd4e648e92753659a6b3"
+                USE_NEW_LOCATION TRUE)
     else()
         # TMP: for Apple Silicon TBB does not provide TBBBind
         if(NOT (APPLE AND AARCH64))
diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp
index 26789a570fb44f..bc695e3718bdda 100644
--- a/src/inference/src/dev/threading/cpu_streams_executor.cpp
+++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp
@@ -133,9 +133,13 @@ struct CPUStreamsExecutor::Impl {
                             ? (small_core ? small_core_offset : (logic_core ? 0 : 1))
                             : 0;
 
+#    ifdef _WIN32
                     _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
                                                                 .set_core_type(selected_core_type)
                                                                 .set_max_concurrency(max_concurrency)});
+#    else
+                    _taskArena.reset(new custom::task_arena{max_concurrency});
+#    endif
                     CpuSet processMask;
                     int ncpus = 0;
                     std::tie(processMask, ncpus) = get_process_mask();
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
index 608a80f7b9115b..a458f7afa3fbfa 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
@@ -76,7 +76,14 @@ auto default_affinity = [] {
         }
 }();
 #else
-auto default_affinity = ov::Affinity::CORE;
+auto default_affinity = [] {
+    auto coreTypes = InferenceEngine::getAvailableCoresTypes();
+    if (coreTypes.size() > 1) {
+        return ov::Affinity::HYBRID_AWARE;
+    } else {
+        return ov::Affinity::CORE;
+    }
+}();
 #endif
 
 const std::vector<ov::AnyMap> default_properties = {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 397a183ef3c993..3d3c468753027f 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -252,6 +252,10 @@ TEST(OVClassBasicTest, smoke_SetConfigAffinity) {
     }
 #else
     auto defaultBindThreadParameter = ov::Affinity::CORE;
+    auto coreTypes = InferenceEngine::getAvailableCoresTypes();
+    if (coreTypes.size() > 1) {
+        defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE;
+    }
 #endif
     OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity));
     ASSERT_EQ(defaultBindThreadParameter, value);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index ee9361745c8572..27368152408246 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -22,7 +22,14 @@ namespace {
         }
     }()};
     #else
-    auto defaultBindThreadParameter = InferenceEngine::Parameter{std::string{CONFIG_VALUE(YES)}};
+    auto defaultBindThreadParameter = InferenceEngine::Parameter{[] {
+        auto coreTypes = InferenceEngine::getAvailableCoresTypes();
+        if (coreTypes.size() > 1) {
+                return std::string{CONFIG_VALUE(HYBRID_AWARE)};
+        } else {
+                return std::string{CONFIG_VALUE(YES)};
+        }
+    }()};
     #endif
 
     INSTANTIATE_TEST_SUITE_P(

From fbdd15861501f17a262bc0176c3b26ff6cc630e6 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 24 Mar 2023 10:29:09 +0400
Subject: [PATCH 074/296] Small fixes for template plugin developer
 documentation (#16521)

---
 docs/IE_PLUGIN_DG/InferRequest.md               | 12 ++++++++++--
 docs/IE_PLUGIN_DG/Intro.md                      |  4 +---
 src/plugins/template/src/sync_infer_request.cpp |  2 ++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/docs/IE_PLUGIN_DG/InferRequest.md b/docs/IE_PLUGIN_DG/InferRequest.md
index b40f23221fe90f..0a3f467d847d6a 100644
--- a/docs/IE_PLUGIN_DG/InferRequest.md
+++ b/docs/IE_PLUGIN_DG/InferRequest.md
@@ -23,7 +23,9 @@ The example class has several fields:
 - backend specific fields:
     - `m_backend_input_tensors` - input backend tensors.
     - `m_backend_output_tensors` - output backend tensors.
-	- `m_executable` - an executable object / backend computational graph.
+    - `m_executable` - an executable object / backend computational graph.
+    - `m_eval_context` - an evaluation context to save backend states after the inference.
+    - `m_variable_states` - a vector of variable states.
 
 ### InferRequest Constructor
 
@@ -69,7 +71,13 @@ Executes a pipeline synchronously using `m_executable` object:
 
 @snippet src/sync_infer_request.cpp infer_request:start_pipeline
 
-#### 3. infer_postprocess()
+#### 3. wait_pipeline()
+
+Waits a pipeline in case of plugin asynchronous execution:
+
+@snippet src/sync_infer_request.cpp infer_request:wait_pipeline
+
+#### 4. infer_postprocess()
 
 Converts backend specific tensors to tensors passed by user:
 
diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md
index 8334f2db744714..97e2acaf7b52db 100644
--- a/docs/IE_PLUGIN_DG/Intro.md
+++ b/docs/IE_PLUGIN_DG/Intro.md
@@ -53,9 +53,7 @@ OpenVINO plugin dynamic library consists of several main components:
 7. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor)
     - Provides the device specific remote tensor API and implementation.
 
-> **NOTE**: This documentation is written based on the `Template` plugin, which demonstrates plugin 
-
-development details. Find the complete code of the `Template`, which is fully compilable and up-to-date,
+> **NOTE**: This documentation is written based on the `Template` plugin, which demonstrates plugin development details. Find the complete code of the `Template`, which is fully compilable and up-to-date,
 at `<openvino source dir>/src/plugins/template`.
 
 
diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp
index 6fa96c02a23d68..7a274cf94b3610 100644
--- a/src/plugins/template/src/sync_infer_request.cpp
+++ b/src/plugins/template/src/sync_infer_request.cpp
@@ -212,6 +212,7 @@ void ov::template_plugin::InferRequest::start_pipeline() {
 }
 // ! [infer_request:start_pipeline]
 
+// ! [infer_request:wait_pipeline]
 void ov::template_plugin::InferRequest::wait_pipeline() {
     OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, m_profiling_task[WaitPipeline])
     auto start = Time::now();
@@ -219,6 +220,7 @@ void ov::template_plugin::InferRequest::wait_pipeline() {
     // NOTE: not used in current implementation since `startPipeline` executes pipiline synchronously
     m_durations[WaitPipeline] = Time::now() - start;
 }
+// ! [infer_request:wait_pipeline]
 
 // ! [infer_request:infer_postprocess]
 void ov::template_plugin::InferRequest::infer_postprocess() {

From 3f4b1e8205c878fa96731bbc71e4965271a17785 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Fri, 24 Mar 2023 11:07:56 +0400
Subject: [PATCH 075/296] [TF FE] Post leftovers to support the MUSE model in
 SavedModel format (#16520)

* [TF FE] Post leftovers to support the MUSE model in SavedModel format

It contains tests imitating a case with Tokenizer extension and raised problems:
setting custom type for body graph Parameter, named ports for RaggedTensorToSparse
and Unique operations.

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Update src/frontends/tensorflow/tests/convert_tricky_models.cpp

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../tensorflow/src/decoder_proto.cpp          |  5 ++
 .../tensorflow/src/translate_session.cpp      |  9 +++
 .../tests/convert_tricky_models.cpp           | 80 ++++++++++++++++++-
 .../generate_partitioned_call_with_unique.py  | 33 ++++++++
 .../generate_ragged_tensor_to_sparse.py       | 35 ++++++++
 5 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 src/frontends/tensorflow/tests/test_models/gen_scripts/generate_partitioned_call_with_unique.py
 create mode 100644 src/frontends/tensorflow/tests/test_models/gen_scripts/generate_ragged_tensor_to_sparse.py

diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp
index 98413e884cd82c..3f1a526b2a5e63 100644
--- a/src/frontends/tensorflow/src/decoder_proto.cpp
+++ b/src/frontends/tensorflow/src/decoder_proto.cpp
@@ -306,6 +306,11 @@ void parse_producer_name(const std::string& producer_port_name,
         {"FusedBatchNormV2:batch_variance", 2},
         {"FusedBatchNormV3:batch_mean", 1},
         {"FusedBatchNormV3:batch_variance", 2},
+        {"Unique:y", 0},
+        {"Unique:idx", 1},
+        {"RaggedTensorToSparse:sparse_indices", 0},
+        {"RaggedTensorToSparse:sparse_values", 1},
+        {"RaggedTensorToSparse:sparse_dense_shape", 2},
     };
     // Body graph nodes may have two colons `:` input names, for example,
     // `TopKV2Name:indices:0` means that producer operation name is `TopKV2Name`
diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp
index 4f1979851dd8d8..d5d31d86e8c32c 100644
--- a/src/frontends/tensorflow/src/translate_session.cpp
+++ b/src/frontends/tensorflow/src/translate_session.cpp
@@ -66,7 +66,16 @@ void TranslateSession::inject_body_model(std::shared_ptr<ov::Model> body_model,
                             "inputs and arguments to the function " +
                                 operation_type + " do not match.");
     for (size_t param_ind = 0; param_ind < body_parameters.size(); ++param_ind) {
+        auto orig_type = body_parameters[param_ind]->get_element_type();
         body_parameters[param_ind]->output(0).replace(ov_inputs[param_ind]);
+        if (auto ext_parameter = as_type_ptr<ov::opset8::Parameter>(ov_inputs[param_ind].get_node_shared_ptr())) {
+            // save type of a Parameter as converted in the body
+            // this is important if the external conversion extension is applied to body graph node
+            // with setting its own type
+            if (orig_type != element::dynamic) {
+                ext_parameter->set_element_type(orig_type);
+            }
+        }
     }
     for (const auto& result_node : body_model->get_results()) {
         ov_outputs.push_back(result_node->input_value(0));
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
index e1bde1af03784f..80ee6436b12774 100644
--- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp
+++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -3,6 +3,7 @@
 //
 
 #include <openvino/frontend/exception.hpp>
+#include <openvino/frontend/extension.hpp>
 #include <openvino/frontend/manager.hpp>
 #include <openvino/opsets/opset10.hpp>
 #include <transformations/common_optimizations/moc_transformations.hpp>
@@ -20,12 +21,15 @@ using namespace ov::opset10;
 using namespace ov::frontend;
 
 namespace {
-shared_ptr<Model> convert_model(const string& model_path) {
+shared_ptr<Model> convert_model(const string& model_path, const ConversionExtension::Ptr& conv_ext = nullptr) {
     FrontEndManager fem;
     auto front_end = fem.load_by_framework(TF_FE);
     if (!front_end) {
         throw "TensorFlow Frontend is not initialized";
     }
+    if (conv_ext) {
+        front_end->add_extension(conv_ext);
+    }
     auto model_filename = FrontEndTestUtils::make_model_path(string(TEST_TENSORFLOW_MODELS_DIRNAME) + model_path);
     auto input_model = front_end->load(model_filename);
     if (!input_model) {
@@ -38,6 +42,36 @@ shared_ptr<Model> convert_model(const string& model_path) {
 
     return model;
 }
+
+ov::OutputVector fake_translator_ragged_tensor_to_sparse(const ov::frontend::NodeContext& node) {
+    // NOTE: pay attention that this is a fake translator for RaggedTensorToSparse
+    // only serves for testing purposes
+    FRONT_END_GENERAL_CHECK(node.get_input_size() > 1, "RaggedTensorToSparse expects at least two inputs.");
+    auto node_name = node.get_name();
+    auto row_splits = node.get_input(0);
+    auto strings = node.get_input(1);
+
+    // Override type of input tensor if this is a Parameter
+    if (auto parameter = as_type_ptr<ov::opset10::Parameter>(strings.get_node_shared_ptr())) {
+        parameter->set_partial_shape(ov::PartialShape{Dimension()});
+        parameter->set_element_type(ov::element::u8);
+        parameter->validate_and_infer_types();
+    }
+
+    row_splits = make_shared<ConvertLike>(row_splits, strings);
+    auto const_one = make_shared<Constant>(row_splits.get_element_type(), Shape{}, 1);
+    Output<Node> mul = make_shared<Multiply>(row_splits, const_one);
+    auto const_two = make_shared<Constant>(ov::element::u8, Shape{}, 2);
+    Output<Node> add = make_shared<Add>(strings, const_two);
+    auto const_three = make_shared<Constant>(ov::element::u8, Shape{}, 3);
+    Output<Node> sub = make_shared<Subtract>(strings, const_three);
+
+    mul.get_tensor().add_names({node_name + ":0"});
+    add.get_tensor().add_names({node_name + ":1"});
+    sub.get_tensor().add_names({node_name + ":2"});
+
+    return {mul, add, sub};
+}
 }  // namespace
 
 TEST(FrontEndConvertTrickyModels, undefined_input_shape) {
@@ -387,3 +421,47 @@ TEST_F(TransformationTestsF, ModelWithAssertNode) {
         model_ref = make_shared<Model>(OutputVector{add}, ParameterVector{x, y});
     }
 }
+
+TEST_F(TransformationTestsF, PartitionedCallWithUnique) {
+    // This test aims to test named output ports for Unique operation
+    { model = convert_model("partitioned_call_with_unique/partitioned_call_with_unique.pb"); }
+    {
+        auto x = make_shared<Parameter>(f32, Shape{5});
+        auto relu = make_shared<Relu>(x);
+        auto unique = make_shared<Unique>(relu, false, i32);
+        auto const_one = make_shared<Constant>(i32, Shape{}, 1);
+        auto add = make_shared<Add>(unique->output(2), const_one);
+        auto sigmoid = make_shared<Sigmoid>(unique->output(0));
+        model_ref = make_shared<Model>(OutputVector{sigmoid, add}, ParameterVector{x});
+    }
+}
+
+TEST_F(TransformationTestsF, RaggedTensorToSparse) {
+    // This test aims to test named output ports for RaggedTensorToSparse operation
+    // also, it tests propagation of custom type (specified in the extension) to Parameter node in the parent graph
+    {
+        // create FAKE conversion extension for RaggedTensorToSparse
+        auto conv_ext = std::make_shared<ov::frontend::ConversionExtension>("RaggedTensorToSparse",
+                                                                            fake_translator_ragged_tensor_to_sparse);
+        model = convert_model("ragged_tensor_to_sparse/ragged_tensor_to_sparse.pb", conv_ext);
+    }
+    {
+        auto strings = make_shared<Parameter>(u8, PartialShape{3});
+        auto row_splits = make_shared<Parameter>(i32, PartialShape{5});
+        auto convert_like = make_shared<ConvertLike>(row_splits, strings);
+
+        auto const_one = make_shared<Constant>(u8, Shape{}, 1);
+        Output<Node> mul = make_shared<Multiply>(convert_like, const_one);
+        auto const_three = make_shared<Constant>(u8, Shape{}, 3);
+        Output<Node> sub = make_shared<Subtract>(strings, const_three);
+
+        auto target_shape1 = make_shared<Constant>(i32, Shape{1}, -1);
+        auto reshape1 = make_shared<Reshape>(mul, target_shape1, false);
+        auto target_shape2 = make_shared<Constant>(i32, Shape{1}, -1);
+        auto reshape2 = make_shared<Reshape>(sub, target_shape2, false);
+
+        auto concat = make_shared<Concat>(OutputVector{reshape1, reshape2}, 0);
+
+        model_ref = make_shared<Model>(OutputVector{concat}, ParameterVector{row_splits, strings});
+    }
+}
diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_partitioned_call_with_unique.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_partitioned_call_with_unique.py
new file mode 100644
index 00000000000000..7ed60ab836d128
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_partitioned_call_with_unique.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+
+import tensorflow.compat.v1 as tf
+
+
+def main():
+    tf.compat.v1.reset_default_graph()
+
+    @tf.function
+    def second_func(x):
+        relu = tf.raw_ops.Relu(features=x)
+        y, idx = tf.raw_ops.Unique(x=relu)
+        return y, idx
+
+    @tf.function
+    def first_func(x):
+        y, idx = second_func(x)
+        sigmoid = tf.raw_ops.Sigmoid(x=y)
+        const_one = tf.constant(1, dtype=tf.int32)
+        add = tf.raw_ops.AddV2(x=idx, y=const_one)
+        return sigmoid, add
+
+    tf_net = first_func.get_concrete_function(tf.constant([1, 2, 3, 4, 5], dtype=tf.float32)).graph.as_graph_def()
+    tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "partitioned_call_with_unique"),
+                      "partitioned_call_with_unique.pb", False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_ragged_tensor_to_sparse.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_ragged_tensor_to_sparse.py
new file mode 100644
index 00000000000000..0c1a218e684e68
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_ragged_tensor_to_sparse.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+
+import tensorflow.compat.v1 as tf
+
+
+def main():
+    tf.compat.v1.reset_default_graph()
+
+    @tf.function
+    def second_func(strings, row_splits):
+        sparse_indices, sparse_values, sparse_dense_shape = tf.raw_ops.RaggedTensorToSparse(
+            rt_nested_splits=[row_splits],
+            rt_dense_values=strings)
+        return sparse_indices, sparse_values, sparse_dense_shape
+
+    @tf.function
+    def first_func(strings, row_splits):
+        sparse_indices, _, sparse_dense_shape = second_func(strings, row_splits)
+        sparse_indices = tf.raw_ops.Reshape(tensor=sparse_indices, shape=[-1])
+        sparse_dense_shape = tf.raw_ops.Reshape(tensor=sparse_dense_shape, shape=[-1])
+        concat = tf.raw_ops.Concat(concat_dim=0, values=[sparse_indices, sparse_dense_shape])
+        return concat
+
+    tf_net = first_func.get_concrete_function(tf.constant(["abc", "bcd", "cc"], dtype=tf.string),
+                                              tf.constant([0, 2, 2, 3, 3], dtype=tf.int32)).graph.as_graph_def()
+    tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "ragged_tensor_to_sparse"),
+                      "ragged_tensor_to_sparse.pb", False)
+
+
+if __name__ == "__main__":
+    main()

From 613b66ba35a056215260427e6c3230af0cf82011 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Fri, 24 Mar 2023 15:27:13 +0800
Subject: [PATCH 076/296] include nireq during streams calculation (#16378)

* include nireq during streams calculation

* update description for comments

* update description
---
 .../intel_cpu/src/cpu_streams_calculation.cpp |  38 +++--
 .../intel_cpu/src/cpu_streams_calculation.hpp |  33 ++--
 .../tests/unit/streams_info_table_test.cpp    | 158 +++++++++++++++++-
 3 files changed, 201 insertions(+), 28 deletions(-)

diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index b5a22160228820..ea5111015d9894 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -19,6 +19,7 @@ namespace intel_cpu {
 
 std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_threads,
+                                                     const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table) {
     std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
@@ -74,13 +75,12 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
         }
 
         if (0 != input_streams) {
-            if (input_streams >= n_threads) {
+            n_streams = (input_infer_requests > 0) ? std::min(input_streams, input_infer_requests) : input_streams;
+            if (n_streams >= n_threads) {
                 n_streams = n_threads;
                 n_threads_per_stream = 1;
             } else {
-                n_streams = input_streams;
-                n_threads_per_stream =
-                    std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
                 if (proc_type_table.size() == 1) {
                     if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
                         (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
@@ -107,21 +107,29 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                     n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4));
                 }
                 n_streams = static_cast<int>(n_threads / n_threads_per_stream);
-
-                while (n_streams < n_threads_per_stream) {
-                    if (1 == n_threads_per_stream) {
-                        break;
-                    } else {
-                        n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
-                        n_threads_per_stream = static_cast<int>(
-                            proc_type_table[0][MAIN_CORE_PROC] /
-                            ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
-                        n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+                if ((input_infer_requests > 0) && (n_streams > input_infer_requests)) {
+                    n_streams = input_infer_requests;
+                    n_threads_per_stream =
+                        std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                } else {
+                    while (n_streams < n_threads_per_stream) {
+                        if (1 == n_threads_per_stream) {
+                            break;
+                        } else {
+                            n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
+                            n_threads_per_stream =
+                                static_cast<int>(proc_type_table[0][MAIN_CORE_PROC] /
+                                                 ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) /
+                                                  n_threads_per_stream));
+                            n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+                        }
                     }
                 }
             } else {
                 n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
-                n_threads_per_stream = static_cast<int>(n_threads / n_streams);
+                n_streams = (input_infer_requests > 0) ? std::min(n_streams, input_infer_requests) : n_streams;
+                n_threads_per_stream =
+                    std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
             }
         }
 
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index c43388242ab071..53614942bf8cae 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -14,21 +14,30 @@
 namespace ov {
 namespace intel_cpu {
 /**
- * @brief      Generate streams information table according to processors type table
- * @param[in]  input_streams is target streams set by user via NUM_STREAMS or hints.
- *               - input "0" mean function generate the optimal number of streams
- *               - LATENCY hint equals 1 stream.
- * @param[in]  input_threads is max threads set by user via INFERNECE_NUM_THREADS.
- *               - input "0" mean function can use all resource in proc_type_table
- *               - When user limit max threads, streams in output cannot be more than max threads
- * @param[in]  model_prefer_threads is preferred threads per stream based on model generated in previous function
- *               - input "0" mean function generate the optimal threads per stream based on platform
- * @param[in]  proc_type_table candidate processors available at this time
- *               - candidate processors have benn updated based on properties like "Ecore only" in previous function
- * @return     summary table of streams info will be used by StreamsExecutor
+ * @brief      Generate streams information table according to processors type table.
+ * @param[in]  input_streams is the targeted number of streams set by user via ov::num_streams or hints.
+ *               - input "0" indicates the optimal number of streams generated by the function.
+ *               - When user sets LATENCY hint, OpenVINO runtime generate one stream per CPU node.
+ * @param[in]  input_threads is the max number of threads set by user via ov::inference_num_threads.
+ *               - input "0" indicates that the function can use all resource in proc_type_table.
+ *               - If user limits the max number of threads, the final number of streams output cannot exceed the max
+ * number of threads.
+ * @param[in]  input_infer_requests is max number of infer requests set by user via ov::hint::num_requests.
+ *               - input "0" indicates that the function can use all resource in proc_type_table.
+ *               - If user limits the max number of infer requests, the final number of streams output cannot exceed the
+ * max number of infer requests.
+ * @param[in]  model_prefer_threads is preferred number of threads per stream based on the model generated in previous
+ * function.
+ *               - input "0" indicates that the function generates the optimal number of threads per stream based on
+ * processors type information.
+ * @param[in]  proc_type_table is currently available candidate processors.
+ *               - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type
+ * in previous function.
+ * @return     streams information table which will be used by StreamsExecutor.
  */
 std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_threads,
+                                                     const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table);
 }  // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
index 2fc0c36712a4c0..696c6508c479e3 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -18,6 +18,7 @@ namespace {
 struct StreamsCalculationTestCase {
     int input_streams;
     int input_threads;
+    int input_infer_requests;
     int model_prefer_threads;
     std::vector<std::vector<int>> proc_type_table;
     std::vector<std::vector<int>> stream_info_table;
@@ -32,6 +33,7 @@ class StreamsCalculationTests : public CommonTestUtils::TestsCommon,
         std::vector<std::vector<int>> test_stream_info_table =
             ov::intel_cpu::get_streams_info_table(test_data.input_streams,
                                                   test_data.input_threads,
+                                                  test_data.input_infer_requests,
                                                   test_data.model_prefer_threads,
                                                   test_data.proc_type_table);
 
@@ -43,6 +45,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{1, MAIN_CORE_PROC, 104}},
 };
@@ -51,6 +54,16 @@ StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
     1,
     20,
     0,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{1, MAIN_CORE_PROC, 20}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_latency_3 = {
+    1,
+    20,
+    5,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{1, MAIN_CORE_PROC, 20}},
 };
@@ -59,6 +72,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{26, MAIN_CORE_PROC, 4}},
 };
@@ -67,6 +81,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{2, MAIN_CORE_PROC, 52}},
 };
@@ -75,6 +90,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
     0,
     20,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{5, MAIN_CORE_PROC, 4}},
 };
@@ -83,11 +99,13 @@ StreamsCalculationTestCase _2sockets_104cores_tput_4 = {
     2,
     20,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{2, MAIN_CORE_PROC, 10}},
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -96,6 +114,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -104,6 +123,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
+    0,
     0,
     0,
     8,
@@ -114,15 +134,53 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
 StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
     0,
     40,
+    0,
     8,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{5, MAIN_CORE_PROC, 8}},
 };
 
+StreamsCalculationTestCase _2sockets_104cores_tput_9 = {
+    5,
+    20,
+    2,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 10}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_10 = {
+    0,
+    0,
+    2,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_11 = {
+    2,
+    0,
+    5,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_12 = {
+    0,
+    0,
+    2,
+    2,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
 StreamsCalculationTestCase _2sockets_48cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{1, MAIN_CORE_PROC, 48}},
 };
@@ -131,6 +189,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{12, MAIN_CORE_PROC, 4}},
 };
@@ -139,6 +198,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_2 = {
     100,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{48, MAIN_CORE_PROC, 1}},
 };
@@ -147,6 +207,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
     0,
     100,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{12, MAIN_CORE_PROC, 4}},
 };
@@ -154,6 +215,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
 StreamsCalculationTestCase _2sockets_48cores_tput_4 = {
     2,
     20,
+    0,
     1,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{2, MAIN_CORE_PROC, 10}},
@@ -163,6 +225,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
 };
@@ -171,6 +234,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
     1,
     10,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -178,6 +242,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
     1,
     0,
+    0,
     6,
     {{20, 6, 8, 6}},
     {{1, MAIN_CORE_PROC, 6}},
@@ -186,6 +251,16 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
     1,
     0,
+    0,
+    14,
+    {{20, 6, 8, 6}},
+    {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_5 = {
+    1,
+    0,
+    2,
     14,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
@@ -195,6 +270,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
@@ -203,6 +279,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
 };
@@ -211,6 +288,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
 };
@@ -219,11 +297,13 @@ StreamsCalculationTestCase _1sockets_14cores_tput_4 = {
     0,
     12,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
 };
 
 StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -232,6 +312,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _1sockets_14cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -243,6 +324,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_7 = {
     100,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
 };
@@ -251,14 +333,52 @@ StreamsCalculationTestCase _1sockets_14cores_tput_8 = {
     0,
     100,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
 
+StreamsCalculationTestCase _1sockets_14cores_tput_9 = {
+    4,
+    0,
+    8,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_10 = {
+    6,
+    0,
+    4,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_11 = {
+    0,
+    0,
+    2,
+    0,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_12 = {
+    0,
+    0,
+    2,
+    2,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
+};
+
 StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
 };
@@ -267,6 +387,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
     1,
     8,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}},
 };
@@ -274,6 +395,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
     1,
     0,
+    0,
     2,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}},
@@ -282,6 +404,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
     1,
     0,
+    0,
     10,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
@@ -291,6 +414,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
 };
@@ -299,6 +423,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
 };
@@ -307,6 +432,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}},
 };
@@ -315,11 +441,13 @@ StreamsCalculationTestCase _1sockets_10cores_tput_4 = {
     0,
     6,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -328,6 +456,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _1sockets_10cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -339,6 +468,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -347,6 +477,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
     1,
     100,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -354,6 +485,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
     1,
     0,
+    0,
     4,
     {{12, 4, 4, 4}},
     {{1, MAIN_CORE_PROC, 4}},
@@ -362,6 +494,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
     1,
     0,
+    0,
     8,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
@@ -371,6 +504,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
 };
@@ -379,6 +513,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}},
 };
@@ -387,6 +522,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
@@ -395,6 +531,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_4 = {
     6,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
 };
@@ -403,6 +540,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_5 = {
     0,
     6,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
 };
@@ -411,11 +549,13 @@ StreamsCalculationTestCase _1sockets_8cores_tput_6 = {
     0,
     8,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
+    0,
     0,
     0,
     1,
@@ -427,6 +567,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}},
 };
@@ -435,6 +576,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
     1,
     100,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}},
 };
@@ -443,6 +585,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
@@ -451,6 +594,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}},
 };
@@ -459,11 +603,13 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
     0,
     8,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
+    0,
     0,
     0,
     1,
@@ -476,7 +622,8 @@ TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                          StreamsCalculationTests,
                          testing::Values(_2sockets_104cores_latency_1,
-                                         _2sockets_104cores_latency_1,
+                                         _2sockets_104cores_latency_2,
+                                         _2sockets_104cores_latency_3,
                                          _2sockets_104cores_tput_1,
                                          _2sockets_104cores_tput_2,
                                          _2sockets_104cores_tput_3,
@@ -485,6 +632,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _2sockets_104cores_tput_6,
                                          _2sockets_104cores_tput_7,
                                          _2sockets_104cores_tput_8,
+                                         _2sockets_104cores_tput_9,
+                                         _2sockets_104cores_tput_10,
+                                         _2sockets_104cores_tput_11,
+                                         _2sockets_104cores_tput_12,
                                          _2sockets_48cores_latency_1,
                                          _2sockets_48cores_tput_1,
                                          _2sockets_48cores_tput_2,
@@ -494,6 +645,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_14cores_latency_2,
                                          _1sockets_14cores_latency_3,
                                          _1sockets_14cores_latency_4,
+                                         _1sockets_14cores_latency_5,
                                          _1sockets_14cores_tput_1,
                                          _1sockets_14cores_tput_2,
                                          _1sockets_14cores_tput_3,
@@ -502,6 +654,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_14cores_tput_6,
                                          _1sockets_14cores_tput_7,
                                          _1sockets_14cores_tput_8,
+                                         _1sockets_14cores_tput_9,
+                                         _1sockets_14cores_tput_10,
+                                         _1sockets_14cores_tput_11,
+                                         _1sockets_14cores_tput_12,
                                          _1sockets_10cores_latency_1,
                                          _1sockets_10cores_latency_2,
                                          _1sockets_10cores_latency_3,

From 16933efc0682ac9a5579a6855d3cec9da87fb809 Mon Sep 17 00:00:00 2001
From: Luo Cheng <cheng.luo@intel.com>
Date: Fri, 24 Mar 2023 15:31:00 +0800
Subject: [PATCH 077/296] [CPU] Enable brgconv primitives with binary post-ops
 by default on AVX512+ ISA (#16286)

---
 src/plugins/intel_cpu/src/nodes/conv.cpp | 30 +++---------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index ab2f07c5d9ca10..6d993da7a9cd38 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -955,17 +955,6 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) {
     }
 }
 
-static bool attrContainsAnyOfPostOps(const dnnl::primitive_attr& attr, std::initializer_list<dnnl::impl::primitive_kind_t> kinds) {
-    const auto ops = attr.get_post_ops();
-
-    for (const auto& kind : kinds) {
-        if (ops.get()->find(kind) != -1)
-            return true;
-    }
-
-    return false;
-}
-
 static bool attrContainsPostOp(const dnnl::primitive_attr& attr, const dnnl::impl::primitive_kind_t kind) {
     const auto ops = attr.get_post_ops();
     return ops.get()->find(kind) != -1;
@@ -1640,28 +1629,15 @@ void Convolution::appendZeroPointsArgs() {
     }
 }
 
-// Due to performance issue, brgconv will only be enabled by default:
+// brgconv will be enabled by default:
 // 1, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
-// 2, support amx except having input zero point.
-// 3, support avx512 without legacy postops/per channel zero point when avx512
+// 2, hw supports avx512+
 void Convolution::initTryBrgconvFlag() {
     if (isDynamicNode())
         return;
 
-    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
-        shouldTryBrgconv = true;
-    } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
+    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
         shouldTryBrgconv = true;
-        // should remove after binary postops performance issue resolved
-        // heuristics: if it's  avx512 ISA model && it doesn't have binary post ops or per channel zero point.
-        dnnl::primitive_attr attr;
-        DEBUG_LOG("setPostOps, useLegacyPostOps=false");
-        setPostOps(attr, outputStaticShape(), false);
-
-        if (attrContainsPostOp(attr, dnnl::impl::primitive_kind::binary) &&
-            attrContainsAnyOfPostOps(attr, {dnnl::impl::primitive_kind::eltwise})) {
-            shouldTryBrgconv = false;
-        }
     }
 
     // Temporary debug functionality to be able to force brgconv for any model

From 65e5ed7dd7fd6f382d9e618063fff8a3683f8972 Mon Sep 17 00:00:00 2001
From: Nadezhda Ageeva <nadezhda.ageeva@intel.com>
Date: Fri, 24 Mar 2023 12:25:55 +0400
Subject: [PATCH 078/296] [HETERO]: support caching properties (#16451)

* Fixed build

* [HETERO]: support caching properties

* Fix caching test

* Code style

* Change result type from map to vector

* Review comments

---------

Co-authored-by: Ilya Churaev <ilya.churaev@intel.com>
---
 src/plugins/hetero/internal_properties.hpp | 17 +++++++
 src/plugins/hetero/plugin.cpp              | 52 ++++++++++++++--------
 src/plugins/hetero/plugin.hpp              |  2 +-
 3 files changed, 52 insertions(+), 19 deletions(-)
 create mode 100644 src/plugins/hetero/internal_properties.hpp

diff --git a/src/plugins/hetero/internal_properties.hpp b/src/plugins/hetero/internal_properties.hpp
new file mode 100644
index 00000000000000..799d07dbe7be1a
--- /dev/null
+++ b/src/plugins/hetero/internal_properties.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/runtime/properties.hpp"
+
+namespace ov {
+namespace hetero {
+/**
+ * @brief Read-only property to get device caching properties
+ */
+static constexpr Property<std::string, PropertyMutability::RO> caching_device_properties{"CACHING_DEVICE_PROPERTIES"};
+
+}  // namespace hetero
+}  // namespace ov
diff --git a/src/plugins/hetero/plugin.cpp b/src/plugins/hetero/plugin.cpp
index 10edbe72fe78c9..78b3c78eb3b40d 100644
--- a/src/plugins/hetero/plugin.cpp
+++ b/src/plugins/hetero/plugin.cpp
@@ -17,6 +17,8 @@
 #include "executable_network.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "internal_properties.hpp"
+#include "openvino/util/common_util.hpp"
 // clang-format on
 
 using namespace InferenceEngine;
@@ -159,16 +161,21 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
     if (ov::supported_properties == name) {
         return decltype(ov::supported_properties)::value_type{
             ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO},
+            ov::PropertyName{ov::caching_properties.name(), ov::PropertyMutability::RO},
             ov::PropertyName{ov::device::full_name.name(), ov::PropertyMutability::RO},
-            ov::PropertyName{ov::device::architecture.name(), ov::PropertyMutability::RO},
             ov::PropertyName{ov::device::capabilities.name(), ov::PropertyMutability::RO},
             ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RW}};
+    } else if (ov::caching_properties == name) {
+        return decltype(ov::caching_properties)::value_type{ov::hetero::caching_device_properties.name()};
+    } else if (ov::hetero::caching_device_properties == name) {
+        auto tconfig = mergeConfigs(_config, options);
+        std::string targetFallback = GetTargetFallback(tconfig);
+        return decltype(ov::hetero::caching_device_properties)::value_type{DeviceCachingProperties(targetFallback)};
     } else if (METRIC_KEY(SUPPORTED_METRICS) == name) {
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
                              std::vector<std::string>{METRIC_KEY(SUPPORTED_METRICS),
                                                       ov::device::full_name.name(),
                                                       METRIC_KEY(SUPPORTED_CONFIG_KEYS),
-                                                      ov::device::architecture.name(),
                                                       METRIC_KEY(IMPORT_EXPORT_SUPPORT),
                                                       ov::device::capabilities.name()});
     } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
@@ -179,30 +186,39 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
         IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true);
     } else if (ov::device::capabilities == name) {
         return decltype(ov::device::capabilities)::value_type{{ov::device::capability::EXPORT_IMPORT}};
-    } else if (ov::device::architecture == name) {
-        auto tconfig = mergeConfigs(_config, options);
-        std::string targetFallback = GetTargetFallback(tconfig);
-        return decltype(ov::device::architecture)::value_type{DeviceArchitecture(targetFallback)};
     } else {
         IE_THROW() << "Unsupported metric key: " << name;
     }
 }
-std::string Engine::DeviceArchitecture(const std::string& targetFallback) const {
+
+std::string Engine::DeviceCachingProperties(const std::string& targetFallback) const {
     auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(targetFallback);
-    std::string resArch;
+    // Vector of caching configs for devices
+    std::vector<ov::AnyMap> result = {};
     for (const auto& device : fallbackDevices) {
         ov::DeviceIDParser parser(device);
-
-        auto supportedMetricKeys = GetCore()
-                                       ->GetMetric(parser.get_device_name(), METRIC_KEY(SUPPORTED_METRICS))
-                                       .as<std::vector<std::string>>();
-        auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(DEVICE_ARCHITECTURE));
-        auto arch = (it != supportedMetricKeys.end())
-                        ? GetCore()->GetMetric(device, METRIC_KEY(DEVICE_ARCHITECTURE)).as<std::string>()
-                        : parser.get_device_name();
-        resArch += " " + arch;
+        ov::AnyMap properties = {};
+        // Use name without id
+        auto device_name = parser.get_device_name();
+        auto supported_properties =
+            GetCore()->GetMetric(device, ov::supported_properties.name()).as<std::vector<ov::PropertyName>>();
+        if (ov::util::contains(supported_properties, ov::caching_properties.name())) {
+            auto caching_properties =
+                GetCore()->GetMetric(device, ov::caching_properties.name()).as<std::vector<ov::PropertyName>>();
+            for (auto& property_name : caching_properties) {
+                properties[property_name] = GetCore()->GetMetric(device, property_name);
+            }
+            // If caching properties are not supported by device, try to add at least device architecture
+        } else if (ov::util::contains(supported_properties, ov::device::architecture.name())) {
+            auto device_architecture = GetCore()->GetMetric(device, ov::device::architecture.name());
+            properties = ov::AnyMap{{ov::device::architecture.name(), device_architecture}};
+            // Device architecture is not supported, add device name as achitecture
+        } else {
+            properties = ov::AnyMap{{ov::device::architecture.name(), device_name}};
+        }
+        result.emplace_back(properties);
     }
-    return resArch;
+    return result.empty() ? "" : ov::Any(result).as<std::string>();
 }
 
 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
diff --git a/src/plugins/hetero/plugin.hpp b/src/plugins/hetero/plugin.hpp
index 296793c243b251..23f9f299f94174 100644
--- a/src/plugins/hetero/plugin.hpp
+++ b/src/plugins/hetero/plugin.hpp
@@ -49,6 +49,6 @@ class Engine : public InferenceEngine::IInferencePlugin {
     std::string GetTargetFallback(const Engine::Configs& config, bool raise_exception = true) const;
 
 private:
-    std::string DeviceArchitecture(const std::string& targetFallback) const;
+    std::string DeviceCachingProperties(const std::string& targetFallback) const;
 };
 }  // namespace HeteroPlugin

From 86c4489aca2ea9c7abe2362113d792151cc42d84 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Fri, 24 Mar 2023 10:11:12 +0100
Subject: [PATCH 079/296] [PT FE] Add telemetry extension support (#16438)

* Initial telemetry introduction in PyTorch frontend

* Add test

* remove obsolete checks from test

* Move statistics gathering into TranslateSession

* Fix code style

* Fix codestyle
---
 .../openvino/frontend/pytorch/frontend.hpp    |  2 +
 src/frontends/pytorch/src/frontend.cpp        | 11 ++++--
 src/frontends/pytorch/src/input_model.cpp     |  2 +-
 src/frontends/pytorch/src/input_model.hpp     |  3 +-
 .../pytorch/src/translate_session.cpp         | 17 ++++++++-
 .../pytorch/src/translate_session.hpp         |  9 ++++-
 .../py_frontend_tests/test_torch_frontend.py  | 38 +++++++++++++++++++
 7 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
index 9bd62ada8ff706..8100592e797333 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "openvino/frontend/extension/telemetry.hpp"
 #include "openvino/frontend/frontend.hpp"
 #include "openvino/frontend/pytorch/node_context.hpp"
 #include "openvino/frontend/pytorch/visibility.hpp"
@@ -61,6 +62,7 @@ class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd {
     ov::frontend::InputModel::Ptr load_impl(const std::vector<ov::Any>& variants) const override;
 
     std::map<std::string, CreatorFunction> m_op_translators;
+    TelemetryExtension::Ptr m_telemetry;
 };
 
 }  // namespace pytorch
diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 685b14c157d3fe..45a7ef2f2b536f 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -61,6 +61,9 @@ std::shared_ptr<Model> FrontEnd::convert(const InputModel::Ptr& model) const {
     std::set<std::string> unconverted_ops_types = get_unconverted_types_from_model(converted_model);
     std::stringstream ops_str;
     for (auto&& op_type : unconverted_ops_types) {
+        if (m_telemetry) {
+            m_telemetry->send_event("error_cause", "pytorch_" + op_type);
+        }
         ops_str << op_type << '\n';
     }
     FRONT_END_OP_CONVERSION_CHECK(unconverted_ops_types.size() == 0,
@@ -75,7 +78,7 @@ void FrontEnd::convert(const std::shared_ptr<Model>& partiallyConverted) const {
 std::shared_ptr<Model> FrontEnd::convert_partially(const ov::frontend::InputModel::Ptr& model) const {
     FRONT_END_GENERAL_CHECK(std::dynamic_pointer_cast<pytorch::InputModel>(model), "Invalid input model");
     try {
-        TranslateSession translate_session(model, m_op_translators);
+        TranslateSession translate_session(model, m_op_translators, m_telemetry);
         return translate_session.get_converted_model();
     } catch (const std::runtime_error& e) {
         std::cerr << "[ ERROR ] Unexpected error while converting pytorch model: " << e.what() << '\n';
@@ -132,9 +135,9 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
 }
 
 void FrontEnd::add_extension(const std::shared_ptr<ov::Extension>& extension) {
-    // Extension loading mechanism is not implemented, any extensions will be ignored
-    // see CVS-98766 for tracking progress
-    return;
+    if (const auto& telemetry = std::dynamic_pointer_cast<TelemetryExtension>(extension)) {
+        m_telemetry = telemetry;
+    }
 }
 
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
diff --git a/src/frontends/pytorch/src/input_model.cpp b/src/frontends/pytorch/src/input_model.cpp
index d273b091b8dce6..ae4d15f6c430b0 100644
--- a/src/frontends/pytorch/src/input_model.cpp
+++ b/src/frontends/pytorch/src/input_model.cpp
@@ -11,7 +11,7 @@ namespace ov {
 namespace frontend {
 namespace pytorch {
 
-InputModel::InputModel(std::shared_ptr<TorchDecoder> model_decoder) : m_model_decoder(model_decoder) {
+InputModel::InputModel(const std::shared_ptr<TorchDecoder>& model_decoder) : m_model_decoder(model_decoder) {
     const auto& inputs = m_model_decoder->inputs();
     for (size_t i = 0; i < inputs.size(); ++i) {
         auto in_place = std::make_shared<pytorch::Place>(*this, inputs[i]);
diff --git a/src/frontends/pytorch/src/input_model.hpp b/src/frontends/pytorch/src/input_model.hpp
index 14ede27c23e61b..c4517129a4bf26 100644
--- a/src/frontends/pytorch/src/input_model.hpp
+++ b/src/frontends/pytorch/src/input_model.hpp
@@ -31,8 +31,7 @@ class InputModel : public ov::frontend::InputModel {
     friend class ::ov::frontend::pytorch::Place;
 
 public:
-    // TODO: pass telemetry extension to this ctor
-    explicit InputModel(std::shared_ptr<TorchDecoder> model_decoder);
+    explicit InputModel(const std::shared_ptr<TorchDecoder>& model_decoder);
 
     std::vector<frontend::Place::Ptr> get_inputs() const override;
     std::vector<frontend::Place::Ptr> get_outputs() const override;
diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp
index 376b466c0a268c..89e19a1609ee0e 100644
--- a/src/frontends/pytorch/src/translate_session.cpp
+++ b/src/frontends/pytorch/src/translate_session.cpp
@@ -20,11 +20,22 @@ namespace pytorch {
 using namespace ov::op;
 
 TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model,
-                                   const std::map<std::string, CreatorFunction>& translator_map)
+                                   const std::map<std::string, CreatorFunction>& translator_map,
+                                   const std::shared_ptr<TelemetryExtension>& telemetry)
     : m_input_model(input_model),
       m_translator_map(translator_map),
+      m_telemetry(telemetry),
       m_ov_model(nullptr) {}
 
+TranslateSession::~TranslateSession() {
+    if (m_telemetry) {
+        // Send statistics
+        for (const auto& op : m_op_statistics) {
+            m_telemetry->send_event("op_count", "pytorch_" + op.first, static_cast<int>(op.second));
+        }
+    }
+}
+
 std::shared_ptr<ov::Model> TranslateSession::get_converted_model() {
     if (m_ov_model) {
         return m_ov_model;
@@ -118,13 +129,15 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
                 }
             }
             auto context = NodeContext(node, external_tensor_map, tensor_map, parameters, mutated_tensors, this);
+            // Add op type in the statistics
+            m_op_statistics[context.get_op_type()]++;
             auto converted_outputs = convert_node(context);
 
             auto fw_outputs = node->outputs();
             // Ops with subgraphs or with mutated inputs may have more outputs after conversion compared to pytorch ones
             FRONT_END_OP_CONVERSION_CHECK(fw_outputs.size() <= converted_outputs.size(),
                                           "Number of ",
-                                          node->get_op_type(),
+                                          context.get_op_type(),
                                           " outputs greater then number of converted outputs.");
 
             // TODO: Make sure that mapping of fw_outputs to converted_outputs does always work
diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp
index 4931c274984485..939cba7d1bdc21 100644
--- a/src/frontends/pytorch/src/translate_session.hpp
+++ b/src/frontends/pytorch/src/translate_session.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "input_model.hpp"
+#include "openvino/frontend/extension/telemetry.hpp"
 #include "openvino/frontend/pytorch/node_context.hpp"
 
 namespace ov {
@@ -17,7 +18,9 @@ namespace pytorch {
 class TranslateSession {
 public:
     TranslateSession(const frontend::InputModel::Ptr& input_model,
-                     const std::map<std::string, CreatorFunction>& translator_map);
+                     const std::map<std::string, CreatorFunction>& translator_map,
+                     const std::shared_ptr<TelemetryExtension>& telemetry);
+    ~TranslateSession();
     std::shared_ptr<Model> get_converted_model();
     std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);
 
@@ -42,9 +45,11 @@ class TranslateSession {
 
     const frontend::InputModel::Ptr m_input_model;
     const std::map<std::string, CreatorFunction>& m_translator_map;
-
+    std::shared_ptr<TelemetryExtension> m_telemetry;
     std::shared_ptr<Model> m_ov_model;
+
     std::map<size_t, std::pair<size_t, Output<Node>>> m_counter_map;
+    std::map<std::string, uint64_t> m_op_statistics;
 };
 
 }  // namespace pytorch
diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
index 7c6a25dc7e2959..7be15ba9cd3595 100644
--- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
+++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
@@ -65,3 +65,41 @@ def test_pytorch_fe_set_input_value():
     im.set_tensor_value(place, np.random.randn(1, 2, 3, 4).astype(np.float32))
     om = fe.convert(im)
     assert len(om.get_parameters()) == 0
+
+
+def test_pytorch_telemetry():
+    from openvino.frontend import TelemetryExtension
+    from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder
+
+    class MockTelemetry:
+        def __init__(self, stat):
+            self.stat = stat
+
+        def send_event(self, *arg, **kwargs):
+            self.stat["send_event"] += 1
+
+        def send_error(self, *arg, **kwargs):
+            self.stat["send_error"] += 1
+
+        def send_stack_trace(self, *arg, **kwargs):
+            self.stat["send_stack_trace"] += 1
+
+    def add_ext(front_end, stat):
+        tel = MockTelemetry(stat)
+        front_end.add_extension(TelemetryExtension("mock",
+                                                   tel.send_event,
+                                                   tel.send_error,
+                                                   tel.send_stack_trace))
+
+    tel_stat = {"send_event": 0, "send_error": 0, "send_stack_trace": 0}
+    # Ensure that MockTelemetry object is alive and can receive events (due to callbacks hold the object)
+    model = get_scripted_model(aten_relu())
+    decoder = TorchScriptPythonDecoder(model)
+    fe_manager = FrontEndManager()
+    fe = fe_manager.load_by_framework("pytorch")
+    add_ext(fe, tel_stat)
+    im = fe.load(decoder)
+    fe.convert(im)
+    assert tel_stat["send_event"] == 2
+    assert tel_stat["send_error"] == 0
+    assert tel_stat["send_stack_trace"] == 0

From cabb917b1f7acaf0c659d99adbca2d0dcc3de9e3 Mon Sep 17 00:00:00 2001
From: Andrei Gorbachev <andrei.gorbachev@intel.com>
Date: Fri, 24 Mar 2023 09:26:24 +0000
Subject: [PATCH 080/296] [GPU] Fix warnings (#16516)

* fix a few warnings

* cast size_t to uint32_t
---
 src/plugins/intel_gpu/src/graph/broadcast.cpp                | 4 ++--
 src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp    | 2 +-
 src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp | 2 +-
 src/plugins/intel_gpu/src/graph/network.cpp                  | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp
index 348bac0081c3c9..d20698f483d21a 100644
--- a/src/plugins/intel_gpu/src/graph/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp
@@ -84,11 +84,11 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
         ov::op::v3::shape_infer(&op, input_shapes, output_shapes, const_data);
     } else if (impl_param.input_layouts.size() >= 2) {
         auto input1 = impl_param.get_input_layout(1);
-        int output_rank = input1.get<ShapeType>().size();
+        auto output_rank = input1.get<ShapeType>().size();
         if (input1.is_static()) {
             output_rank = input1.get_dim(0);    // target shape rank is set as second input.
         }
-        output_shapes[0] = ShapeType::dynamic(std::max(output_rank, 1));
+        output_shapes[0] = ShapeType::dynamic(std::max(static_cast<int>(output_rank), 1));
     }
 
     format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size());
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
index 150ae0e61222b9..2e91cd92ab412f 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
@@ -66,7 +66,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
         const auto& mode = primitive->mode;
         const auto& sort_type = primitive->sort;
         const auto& values_first = primitive->values_first;
-        const auto& outputs_num = primitive->input_size() == 3 ? 2 : primitive->output_size();
+        const auto& outputs_num = primitive->input_size() == 3 ? 2 : static_cast<uint32_t>(primitive->output_size());
 
         auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param);
         auto argm_optional_params =
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 1fead9d0bff431..603cfde26b5c60 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -279,7 +279,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     void update_kernels_list_to_skip() {
         for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
             auto gws = _kernel_data.kernels[i].params.workGroups.global;
-            _kernel_data.kernels[i].skip_execution = (std::accumulate(gws.begin(), gws.end(), 1, std::multiplies<size_t>()) == 0);
+            _kernel_data.kernels[i].skip_execution = (std::accumulate(gws.begin(), gws.end(), static_cast<size_t>(1), std::multiplies<size_t>()) == 0);
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 44d361ac35aafe..63d025230fc9ea 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -385,7 +385,7 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
         _primitives[_primitive_id] = new_primitive_inst;
     }
 
-    int exec_order_size;
+    size_t exec_order_size;
     ib >> exec_order_size;
     _exec_order.clear();
 
@@ -540,7 +540,7 @@ void network::save(cldnn::BinaryOutputBuffer& ob) {
         }
     }
 
-    int exec_order_size = _exec_order.size();
+    size_t exec_order_size = _exec_order.size();
     ob << exec_order_size;
 
     for (const auto& p_inst : _exec_order) {

From 077d0e43f2846a524a0be2c6480992c3b8202c03 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 24 Mar 2023 13:34:06 +0400
Subject: [PATCH 081/296] Fixed Windows warnings for core (#16523)

---
 src/core/CMakeLists.txt                         |  5 -----
 .../include/ngraph/runtime/reference/gather.hpp |  2 +-
 .../ngraph/runtime/reference/grid_sample.hpp    | 17 ++++++++++++-----
 .../include/space_to_batch_shape_inference.hpp  |  2 +-
 .../include/tile_shape_inference.hpp            | 13 ++++++++++---
 5 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 5469e02a9ea82f..f14298f15eb63f 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -31,10 +31,6 @@ set_property(SOURCE ${MIXED_SRC}
         $<TARGET_PROPERTY:inference_engine_obj,SOURCE_DIR>/src
         $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
 
-
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4244)
-endif()
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 
@@ -112,7 +108,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     endif()
 
     target_link_options(ngraph_obj ${link_type} "/IGNORE:4217,4286")
-    ie_add_compiler_flags(/wd4267)
 endif()
 
 # some sources are located in ngraph, while headers are in inference_engine_transformations
diff --git a/src/core/reference/include/ngraph/runtime/reference/gather.hpp b/src/core/reference/include/ngraph/runtime/reference/gather.hpp
index 95cdc73c83e791..82a7fb50188af7 100644
--- a/src/core/reference/include/ngraph/runtime/reference/gather.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/gather.hpp
@@ -34,7 +34,7 @@ void gather(const T* const data,
     int64_t axis_size = data_shape[axis];
     int64_t data_offset, out_offset, idx;
     // for out of bound indices is filled with zeros
-    std::fill(out, out + shape_size(out_shape), 0);
+    std::fill(out, out + shape_size(out_shape), T{0});
 
     for (int64_t batch = 0; batch < batch_size; batch++)
         for (int64_t outer_idx = 0; outer_idx < outer_size; outer_idx++) {
diff --git a/src/core/reference/include/ngraph/runtime/reference/grid_sample.hpp b/src/core/reference/include/ngraph/runtime/reference/grid_sample.hpp
index f2662d56173525..ed3afda4601daa 100644
--- a/src/core/reference/include/ngraph/runtime/reference/grid_sample.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/grid_sample.hpp
@@ -133,10 +133,11 @@ DATA_ET bilinear(const DATA_ET* data,
     const auto x_topleft = std::floor(x_d);
     const auto dy = y_d - y_topleft;
     const auto dx = x_d - x_topleft;
-    const auto v00 = get_padded(data, data_shape, n, c, y_topleft, x_topleft);
-    const auto v01 = get_padded(data, data_shape, n, c, y_topleft, x_topleft + 1);
-    const auto v10 = get_padded(data, data_shape, n, c, y_topleft + 1, x_topleft);
-    const auto v11 = get_padded(data, data_shape, n, c, y_topleft + 1, x_topleft + 1);
+    const auto v00 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft));
+    const auto v01 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft + 1));
+    const auto v10 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft + 1), static_cast<long>(x_topleft));
+    const auto v11 =
+        get_padded(data, data_shape, n, c, static_cast<long>(y_topleft + 1), static_cast<long>(x_topleft + 1));
 
     const auto q0 = (1 - dx) * v00 + dx * v01;
     const auto q1 = (1 - dx) * v10 + dx * v11;
@@ -204,7 +205,13 @@ DATA_ET bicubic(const DATA_ET* data,
     const auto x_topleft = std::floor(x_d);
     const auto dy = y_d - y_topleft;
     const auto dx = x_d - x_topleft;
-    const auto s = gather_4x4(data, data_shape, n, c, y_topleft - 1, x_topleft - 1, get_padded);
+    const auto s = gather_4x4(data,
+                              data_shape,
+                              n,
+                              c,
+                              static_cast<long>(y_topleft - 1),
+                              static_cast<long>(x_topleft - 1),
+                              get_padded);
 
     const auto cy = cubic_coeffs(dy);
     const auto cx = cubic_coeffs(dx);
diff --git a/src/core/shape_inference/include/space_to_batch_shape_inference.hpp b/src/core/shape_inference/include/space_to_batch_shape_inference.hpp
index 7cc04ec0ac34b0..9b239e00fc59a7 100644
--- a/src/core/shape_inference/include/space_to_batch_shape_inference.hpp
+++ b/src/core/shape_inference/include/space_to_batch_shape_inference.hpp
@@ -59,7 +59,7 @@ std::vector<TShape> shape_infer(const SpaceToBatch* op,
 
         auto blocks = get_input_const_data_as<TShape, int64_t>(op, 1, constant_data);
         if (blocks) {
-            TVal block_prod = std::accumulate(begin(*blocks), end(*blocks), 1, std::multiplies<int64_t>());
+            TVal block_prod = std::accumulate(begin(*blocks), end(*blocks), int64_t(1), std::multiplies<int64_t>());
             out_shape.push_back(data_shape[0] * block_prod);
         } else {
             out_shape.emplace_back(dim::inf_bound);
diff --git a/src/core/shape_inference/include/tile_shape_inference.hpp b/src/core/shape_inference/include/tile_shape_inference.hpp
index abc220b999ccd3..465e4a423974cf 100644
--- a/src/core/shape_inference/include/tile_shape_inference.hpp
+++ b/src/core/shape_inference/include/tile_shape_inference.hpp
@@ -11,6 +11,15 @@ namespace ov {
 namespace op {
 namespace v0 {
 
+template <class T>
+struct NegativeToZero {
+    NegativeToZero() = default;
+    template <class U>
+    T operator()(const U u) const {
+        return static_cast<T>(std::max<U>(0, ov::util::InTypeRange<U>()(u)));
+    }
+};
+
 template <class T>
 std::vector<T> shape_infer(const Tile* op,
                            const std::vector<T>& input_shapes,
@@ -28,9 +37,7 @@ std::vector<T> shape_infer(const Tile* op,
     T output_shape;
 
     // Get repeats and pre process values
-    auto negative_repeats_to_zero = [](const TDimValue v) -> TDimValue {
-        return std::max<TDimValue>(0, ov::util::InTypeRange<TDimValue>()(v));
-    };
+    constexpr auto negative_repeats_to_zero = NegativeToZero<TDimValue>();
 
     auto repeats = get_input_const_data_as_shape<T>(op, 1, constant_data, negative_repeats_to_zero);
 

From 9eab122952fc8c595198c4efb609b2cfbdb7a632 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 24 Mar 2023 13:55:42 +0400
Subject: [PATCH 082/296] Disable QSpectre flag by default (#16526)

---
 cmake/developer_package/compile_flags/os_flags.cmake | 2 --
 cmake/developer_package/compile_flags/sdl.cmake      | 3 +++
 cmake/developer_package/features.cmake               | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake
index c15d22b3d487a3..03564d6b142671 100644
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@@ -389,8 +389,6 @@ if(WIN32)
         ie_add_compiler_flags(/Qdiag-disable:161,177,556,1744,1879,2586,2651,3180,11075,15335)
     endif()
 
-	ie_add_compiler_flags(/Qspectre)
-
     # Debug information flags, by default CMake adds /Zi option
     # but provides no way to specify CMAKE_COMPILE_PDB_NAME on root level
     # In order to avoid issues with ninja we are replacing default flag instead of having two of them
diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake
index 7e992f6b9cdd9f..3973ba74db655d 100644
--- a/cmake/developer_package/compile_flags/sdl.cmake
+++ b/cmake/developer_package/compile_flags/sdl.cmake
@@ -50,6 +50,9 @@ else()
     if(ENABLE_INTEGRITYCHECK)
         set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /INTEGRITYCHECK")
     endif()
+    if(ENABLE_QSPECTRE)
+        ie_add_compiler_flags(/Qspectre)
+    endif()
 endif()
 
 set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${IE_C_CXX_FLAGS}")
diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake
index cc1406a24c1fb9..b6bc479fb06b3c 100644
--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@@ -26,6 +26,8 @@ endif()
 
 ie_option (CMAKE_COMPILE_WARNING_AS_ERROR "Enable warnings as errors" ${CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT})
 
+ie_dependent_option (ENABLE_QSPECTRE "Enable Qspectre mitigation" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
+
 ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
 
 ie_option (ENABLE_SANITIZER "enable checking memory errors via AddressSanitizer" OFF)

From c5b348dd4fea1e1eba4205ae9743d3a56632b2b7 Mon Sep 17 00:00:00 2001
From: Georgy Krivoruchko <georgy.krivoruchko@intel.com>
Date: Fri, 24 Mar 2023 15:07:16 +0400
Subject: [PATCH 083/296] [POC][TF FE] Support SavedModel format (with
 compression) (#16317)

* Added Saved Model proto descriptors

* Included Google's protobuf repository

* Added wstring version of ov::util::directory_exists

* Added initial implementation of Saved Model iterator

# Conflicts:
#	src/frontends/tensorflow/src/frontend.cpp

* Added missing proto files to repository

* Implemented reading of variables index and data files

# Conflicts:
#	src/frontends/tensorflow/src/frontend.cpp

* Renamed class

# Conflicts:
#	src/frontends/tensorflow/src/frontend.cpp

* Fix for cross-platform directory_exists

* Fixed codestyle and simplified code

* CI fixes

* Separeted Saved Model iterator from Proto iterator

* Moved variables index into separate class

* Added initial implementation of reading a variables from
saved model

# Conflicts:
#	src/frontends/tensorflow/src/frontend.cpp

* Added external variable mapping

* Code cleanup

* Commit is for discussion purposes!!!
Implemented RestoreV2 with a workaround for strings
Not optimized, includes mem leak

* In progress...

* Added DT_STRING coverage into decoder_proto

* m_variables_index moved into underlying class

* Updated copyrgihts, added space between license and code

* Moved string constant to separate class

* Added AssignVariableOp operation

* Changed behavior of RestoreV2
Updated stubs for other ops

* Second working implementation, enabled:
Program-only models
Variables reading from data files

* Extended docs

* Fixed dynamic type

* Fixed naming

* Added Snappy submodule to support compression in TF FE

* Enabled Snappy Compression for TF FE

* Make static linkage of Snappy
Changing Warning as error behavior for 3rd party

* CI fixes

* Added Snappy copyright info

* Aligned behavior of StringConstant with UnsupportedConstant

* Added correct naming and removing unused inputs/outputs
---
 .gitmodules                                   |   3 +
 cmake/features.cmake                          |   2 +
 licensing/third-party-programs.txt            |  58 +++
 .../util/include/openvino/util/file_util.hpp  |   9 +
 src/common/util/src/file_util.cpp             |  18 +
 src/frontends/tensorflow/src/CMakeLists.txt   |  17 +-
 .../tensorflow/src/decoder_proto.cpp          |  32 +-
 src/frontends/tensorflow/src/frontend.cpp     |  33 ++
 .../tensorflow/src/graph_iterator_proto.hpp   |   8 +
 .../src/graph_iterator_saved_model.hpp        | 291 +++++++++++
 src/frontends/tensorflow/src/input_model.cpp  |  71 ++-
 src/frontends/tensorflow/src/input_model.hpp  |   9 +-
 .../tensorflow/src/op/var_handle.cpp          | 203 ++++++++
 src/frontends/tensorflow/src/op_table.cpp     |  17 +
 src/frontends/tensorflow/src/proto/any.proto  | 159 ++++++
 .../tensorflow/src/proto/meta_graph.proto     | 351 +++++++++++++
 .../tensorflow/src/proto/saved_model.proto    |  35 ++
 .../src/proto/saved_object_graph.proto        | 263 ++++++++++
 .../src/proto/saved_tensor_slice.proto        |  96 ++++
 .../tensorflow/src/proto/saver.proto          |  60 +++
 .../tensorflow/src/proto/struct.proto         | 172 +++++++
 .../tensorflow/src/proto/tensor_bundle.proto  |  78 +++
 .../src/proto/trackable_object_graph.proto    |  92 ++++
 .../tensorflow/src/proto/wrappers.proto       | 124 +++++
 src/frontends/tensorflow/src/saved_model.cpp  | 482 ++++++++++++++++++
 .../tensorflow/src/translate_session.cpp      |  54 +-
 .../tensorflow/src/translate_session.hpp      |   4 +
 .../include/helper_ops/string_constant.hpp    |  57 +++
 .../saved_model_unused_remover.hpp            |  27 +
 .../const_to_result_remover.cpp               |   4 +-
 .../saved_model_unused_remover.cpp            |  74 +++
 .../tensorflow_common/src/op/const.cpp        |  12 +-
 .../tensorflow_common/src/op/identity.cpp     |   8 +-
 thirdparty/CMakeLists.txt                     |  26 +
 thirdparty/snappy                             |   1 +
 35 files changed, 2923 insertions(+), 27 deletions(-)
 create mode 100644 src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
 create mode 100644 src/frontends/tensorflow/src/op/var_handle.cpp
 create mode 100644 src/frontends/tensorflow/src/proto/any.proto
 create mode 100644 src/frontends/tensorflow/src/proto/meta_graph.proto
 create mode 100644 src/frontends/tensorflow/src/proto/saved_model.proto
 create mode 100644 src/frontends/tensorflow/src/proto/saved_object_graph.proto
 create mode 100644 src/frontends/tensorflow/src/proto/saved_tensor_slice.proto
 create mode 100644 src/frontends/tensorflow/src/proto/saver.proto
 create mode 100644 src/frontends/tensorflow/src/proto/struct.proto
 create mode 100644 src/frontends/tensorflow/src/proto/tensor_bundle.proto
 create mode 100644 src/frontends/tensorflow/src/proto/trackable_object_graph.proto
 create mode 100644 src/frontends/tensorflow/src/proto/wrappers.proto
 create mode 100644 src/frontends/tensorflow/src/saved_model.cpp
 create mode 100644 src/frontends/tensorflow_common/include/helper_ops/string_constant.hpp
 create mode 100644 src/frontends/tensorflow_common/include/helper_transforms/saved_model_unused_remover.hpp
 create mode 100644 src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp
 create mode 160000 thirdparty/snappy

diff --git a/.gitmodules b/.gitmodules
index c166b67a762d7c..09894aceba7475 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -66,3 +66,6 @@
 [submodule "thirdparty/flatbuffers/flatbuffers"]
 	path = thirdparty/flatbuffers/flatbuffers
 	url = https://github.com/google/flatbuffers.git
+[submodule "thirdparty/snappy"]
+	path = thirdparty/snappy
+	url = https://github.com/google/snappy.git
diff --git a/cmake/features.cmake b/cmake/features.cmake
index 24dfaef46e89a7..c4b6caea5bd660 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -156,6 +156,8 @@ ie_option(ENABLE_OV_TF_FRONTEND "Enable TensorFlow FrontEnd" ON)
 ie_option(ENABLE_OV_TF_LITE_FRONTEND "Enable TensorFlow Lite FrontEnd" ON)
 ie_dependent_option(ENABLE_SYSTEM_PROTOBUF "Use system protobuf" OFF
     "ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_TF_FRONTEND;BUILD_SHARED_LIBS" OFF)
+ie_dependent_option(ENABLE_SNAPPY_COMPRESSION "Enables compression support for TF FE" ON
+    "ENABLE_OV_TF_FRONTEND" ON)
 ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
 ie_dependent_option(ENABLE_SYSTEM_FLATBUFFERS "Use system flatbuffers" ON
     "ENABLE_OV_TF_LITE_FRONTEND" OFF)
diff --git a/licensing/third-party-programs.txt b/licensing/third-party-programs.txt
index dc5c2ae4d7009e..27216060437ea4 100644
--- a/licensing/third-party-programs.txt
+++ b/licensing/third-party-programs.txt
@@ -1547,3 +1547,61 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    See the License for the specific language governing permissions and
    limitations under the License.
 
+-------------------------------------------------------------
+
+28. Snappy (https://github.com/google/snappy/)
+
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+===
+
+Some of the benchmark data in testdata/ is licensed differently:
+
+ - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and
+   is licensed under the Creative Commons Attribution 3.0 license
+   (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/
+   for more information.
+
+ - kppkn.gtb is taken from the Gaviota chess tablebase set, and
+   is licensed under the MIT License. See
+   https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1
+   for more information.
+
+ - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper
+   “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA
+   Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro,
+   which is licensed under the CC-BY license. See
+   http://www.ploscompbiol.org/static/license for more ifnormation.
+
+ - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project
+   Gutenberg. The first three have expired copyrights and are in the public
+   domain; the latter does not have expired copyright, but is still in the
+   public domain according to the license information
+   (http://www.gutenberg.org/ebooks/53).
diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp
index ccf8ed4e46c6e0..be1f35d779faba 100644
--- a/src/common/util/include/openvino/util/file_util.hpp
+++ b/src/common/util/include/openvino/util/file_util.hpp
@@ -123,6 +123,15 @@ void create_directory_recursive(const std::string& path);
  */
 bool directory_exists(const std::string& path);
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+/**
+ * @brief Interface function to check if directory exists for given path
+ * @param path - path to directory wide-string
+ * @return true if directory exists, false otherwise
+ */
+bool directory_exists(const std::wstring& path);
+#endif
+
 /**
  * @brief      Returns file size for file
  * @param[in]  path  The file name
diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp
index 0de2ab70d377b4..2ec2f9cfb7f575 100644
--- a/src/common/util/src/file_util.cpp
+++ b/src/common/util/src/file_util.cpp
@@ -27,6 +27,9 @@
 #    define get_absolute_path(result, path) _fullpath(result, path.c_str(), MAX_ABS_PATH)
 /// @brief Windows-specific 'stat' wrapper
 #    define stat _stat
+#    ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+#        define wstat _wstat
+#    endif
 /// @brief Windows-specific 'mkdir' wrapper
 #    define makedir(dir) _mkdir(dir)
 // Copied from linux libc sys/stat.h:
@@ -403,6 +406,21 @@ bool ov::util::directory_exists(const std::string& path) {
     return false;
 }
 
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+bool ov::util::directory_exists(const std::wstring& path) {
+#    ifdef _WIN32
+    struct stat sb;
+
+    if (wstat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+        return true;
+    }
+    return false;
+#    else
+    return directory_exists(wstring_to_string(path));
+#    endif
+}
+#endif
+
 namespace {
 
 template <typename C,
diff --git a/src/frontends/tensorflow/src/CMakeLists.txt b/src/frontends/tensorflow/src/CMakeLists.txt
index 42eb26e93c42cd..bcfa65658b0f7b 100644
--- a/src/frontends/tensorflow/src/CMakeLists.txt
+++ b/src/frontends/tensorflow/src/CMakeLists.txt
@@ -2,7 +2,22 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+list(APPEND CUSTOM_LINK_LIBRARIES
+	openvino::core::dev
+	openvino::frontend::tensorflow_common
+)
+
+if(ENABLE_SNAPPY_COMPRESSION)
+	list(APPEND CUSTOM_LINK_LIBRARIES
+		snappy
+	)
+endif()
+
 ov_add_frontend(NAME tensorflow
                 LINKABLE_FRONTEND
                 FILEDESCRIPTION "FrontEnd to load and convert TensorFlow file format"
-                LINK_LIBRARIES openvino::core::dev openvino::frontend::tensorflow_common)
+                LINK_LIBRARIES ${CUSTOM_LINK_LIBRARIES})
+
+if(ENABLE_SNAPPY_COMPRESSION)
+	target_compile_definitions(openvino_tensorflow_frontend PUBLIC ENABLE_SNAPPY_COMPRESSION)
+endif()
diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp
index 3f1a526b2a5e63..188fc5d3d30853 100644
--- a/src/frontends/tensorflow/src/decoder_proto.cpp
+++ b/src/frontends/tensorflow/src/decoder_proto.cpp
@@ -129,7 +129,12 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const {
     }
 
     case ::tensorflow::AttrValue::ValueCase::kType: {
-        return get_ov_type(attrs[0].type());
+        auto atype = attrs[0].type();
+        if (atype != ::tensorflow::DT_STRING) {
+            return get_ov_type(attrs[0].type());
+        } else {
+            return ov::Any("DT_STRING");
+        }
     }
 
     case ::tensorflow::AttrValue::ValueCase::kList: {
@@ -168,7 +173,11 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const {
         if (list.type_size()) {
             std::vector<ov::element::Type> res;
             for (int idx = 0; idx < list.type_size(); ++idx) {
-                res.emplace_back(get_ov_type(list.type(idx)));
+                if (list.type(idx) != ::tensorflow::DataType::DT_STRING) {
+                    res.emplace_back(get_ov_type(list.type(idx)));
+                } else {
+                    res.emplace_back(ov::element::undefined);
+                }
             }
             return res;
         }
@@ -194,9 +203,22 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const {
         FRONT_END_GENERAL_CHECK(pshape.is_static(), "Dynamic shapes are not supported for Tensor attribute.");
         const auto& tf_type = tensor_proto.dtype();
         auto ov_type = get_ov_type(tf_type);
-        FRONT_END_GENERAL_CHECK(
-            ov_type.is_static(),
-            "Encountered unknown element type " + DataType_Name(tf_type) + " on an empty tensor_proto");
+        if (tf_type != ::tensorflow::DataType::DT_STRING) {
+            FRONT_END_GENERAL_CHECK(
+                ov_type.is_static(),
+                "Encountered unknown element type " + DataType_Name(tf_type) + " on an empty tensor_proto");
+        } else {
+            ov_type = ov::element::u64;
+            pshape.resize(0);
+            pshape.push_back(tensor_proto.string_val_size());
+        }
+        if (tf_type == ::tensorflow::DataType::DT_STRING) {
+            auto data = std::vector<std::string>();
+            for (auto& item : tensor_proto.string_val()) {
+                data.push_back(item);
+            }
+            return data;
+        }
         ov::Tensor res(ov_type, pshape.get_shape());
         auto tensor_content = tensor_proto.tensor_content();
         if (!tensor_content.empty() && tensor_proto.has_tensor_shape()) {
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 4893ea0a9236c8..e3a57c5da05010 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -5,10 +5,12 @@
 #include "openvino/frontend/tensorflow/frontend.hpp"
 
 #include "graph_iterator_proto.hpp"
+#include "graph_iterator_saved_model.hpp"
 #include "helper_transforms/block_lstm_replacer.hpp"
 #include "helper_transforms/const_to_result_remover.hpp"
 #include "helper_transforms/embedding_segments_feature_fusing.hpp"
 #include "helper_transforms/gru_block_cell_replacer.hpp"
+#include "helper_transforms/saved_model_unused_remover.hpp"
 #include "input_model.hpp"
 #include "op_table.hpp"
 #include "openvino/frontend/tensorflow/extension/conversion.hpp"
@@ -86,6 +88,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
             // for automatic deduction of the frontend to convert the model
             // we have more strict rule that is to have `.pb` extension in the path
             return true;
+        } else if (GraphIteratorSavedModel::is_supported(model_path)) {
+            return true;
         }
     }
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@@ -97,6 +101,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
             // for automatic deduction of the frontend to convert the model
             // we have more strict rule that is to have `.pb` extension in the path
             return true;
+        } else if (GraphIteratorSavedModel::is_supported(model_path)) {
+            return true;
         }
     }
 #endif
@@ -118,6 +124,18 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
         if (GraphIteratorProto::is_supported(model_path)) {
             // handle binary protobuf format
             return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
+        } else if (GraphIteratorSavedModel::is_supported(model_path)) {
+            std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
+            if (variants.size() > 1 && variants[1].is<std::string>()) {
+                graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, variants[1].as<std::string>());
+            } else {
+                graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
+            }
+            return std::make_shared<InputModel>(graph_iterator,
+                                                m_telemetry,
+                                                graph_iterator->get_variables_index(),
+                                                graph_iterator->get_saved_model_input_names(),
+                                                graph_iterator->get_saved_model_output_names());
         }
     }
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@@ -126,6 +144,20 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
         if (GraphIteratorProto::is_supported(model_path)) {
             // handle binary protobuf format with a path in Unicode
             return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
+        } else if (GraphIteratorSavedModel::is_supported(model_path)) {
+            std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
+            if (variants.size() > 1 && variants[1].is<std::string>()) {
+                graph_iterator = std::make_shared<GraphIteratorSavedModel>(
+                    model_path,
+                    ov::util::wstring_to_string(variants[1].as<std::wstring>()));
+            } else {
+                graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
+            }
+            return std::make_shared<InputModel>(graph_iterator,
+                                                m_telemetry,
+                                                graph_iterator->get_variables_index(),
+                                                graph_iterator->get_saved_model_input_names(),
+                                                graph_iterator->get_saved_model_output_names());
         }
     }
 #endif
@@ -232,6 +264,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
         // run transformations to convert sub-graphs with intermediate (or FrameworkNode) operations
         // into sub-graphs with only OpenVINO operations
         ov::pass::Manager manager;
+        manager.register_pass<pass::SavedModelUnusedRemover>();
         manager.register_pass<pass::EmbeddingSegmentSingleFeatureFusion>();
         manager.register_pass<pass::BlockLSTMReplacer>();
         manager.register_pass<pass::GRUBlockCellReplacer>();
diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
index a5e2fc1ae6c0c0..a4b5a83644faf1 100644
--- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <fstream>
+#include <vector>
 
 #include "decoder_argdef.hpp"
 #include "decoder_proto.hpp"
@@ -18,6 +19,7 @@ namespace frontend {
 namespace tensorflow {
 
 class GraphIteratorProto : public GraphIterator {
+protected:
     std::shared_ptr<::tensorflow::GraphDef> m_graph_def;
     std::shared_ptr<::tensorflow::FunctionDef> m_func_def;
 
@@ -27,6 +29,11 @@ class GraphIteratorProto : public GraphIterator {
     std::vector<std::string> m_input_names;
     std::vector<std::string> m_output_names;
 
+    GraphIteratorProto()
+        : m_graph_def(std::make_shared<::tensorflow::GraphDef>()),
+          m_func_def(nullptr),
+          m_library_map() {}
+
 public:
     GraphIteratorProto(const std::shared_ptr<::tensorflow::GraphDef>& graph_def,
                        const std::shared_ptr<::tensorflow::FunctionDef>& func_def,
@@ -150,6 +157,7 @@ class GraphIteratorProto : public GraphIterator {
         return m_output_names;
     }
 };
+
 }  // namespace tensorflow
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
new file mode 100644
index 00000000000000..160912657c7a1a
--- /dev/null
+++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp
@@ -0,0 +1,291 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+
+#include "graph_iterator_proto.hpp"
+#include "openvino/util/file_util.hpp"
+#include "saved_model.pb.h"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+struct VIBlock;
+
+template <typename T>
+std::basic_string<T> get_saved_model_name() {}
+template <typename T>
+std::basic_string<T> get_variables_index_name() {}
+
+template <>
+std::basic_string<char> get_saved_model_name<char>();
+template <>
+std::basic_string<char> get_variables_index_name<char>();
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+template <>
+std::basic_string<wchar_t> get_saved_model_name<wchar_t>();
+template <>
+std::basic_string<wchar_t> get_variables_index_name<wchar_t>();
+#endif
+
+// Stores information about variables index
+class SavedModelVariablesIndex {
+    // Contains maximum amount of shards, used for creating corrext extension
+    int32_t m_total_shards;
+    // Contains BundleEntryProto variables list, readed from .index file
+    std::map<std::string, std::vector<char>> m_variables_index;
+    // List of opened data files for using with BundleEntryProto
+    std::map<int32_t, std::shared_ptr<std::ifstream>> m_data_files;
+    // List of mapped variables which could be read using TrackableObjectGraph
+    std::map<std::string, std::string> m_variables_map;
+
+public:
+    /// \brief Reads variables from opened variable index file. Can cause an asserts in case of issues.
+    /// \param vi_stream Opened stream file, file pointer doesn't matter, it will be rewind internally.
+    /// \param path A path to file with variables data
+    /// \returns Returns true in case of everything loads successfully, false otherwise
+    bool read_variables(std::ifstream& vi_stream, const std::string& path);
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    /// \brief Reads variables from opened variable index file. Can cause an asserts in case of issues.
+    /// \param vi_stream Opened stream file, file pointer doesn't matter, it will be rewind internally.
+    /// \param path A path to file with variables data
+    /// \returns Returns true in case of everything loads successfully, false otherwise
+    bool read_variables(std::ifstream& vi_stream, const std::wstring& path);
+#endif
+
+    /// \brief Returns data and size of data of stored variable
+    /// \param name Name of variable
+    /// \param data Pointer on a pointer where data pointer will be returned
+    /// \param size Pointer on a variable which will stores data size
+    /// \returns Returns true in case variable was found, false otherwise (data and size will be untouched)
+    bool get_variable(const std::string& name, const char** data, size_t* size) const {
+        auto varItem = m_variables_index.find(name);
+        if (varItem == m_variables_index.end()) {
+            return false;
+        }
+        if (data != nullptr) {
+            *data = varItem->second.data();
+        }
+        if (size != nullptr) {
+            *size = varItem->second.size();
+        }
+        return true;
+    }
+
+    /// \brief Returns data and size of data of mapped variable from trackable object graph to variables index
+    /// \param name Name of a mapping variable
+    /// \param data Pointer on a pointer where data pointer will be returned
+    /// \param size Pointer on a variable which will stores data size
+    /// \returns Returns true in case variable was found, false otherwise (data and size will be untouched)
+    bool get_mapped_variable(const std::string& name, const char** data, size_t* size) const {
+        auto mapItem = m_variables_map.find(name);
+        if (mapItem == m_variables_map.end()) {
+            return false;
+        }
+        return get_variable(mapItem->second, data, size);
+    }
+
+    /// \brief Checks if variable has a mapped pair
+    /// \param name Name of variable for checking existance
+    /// \returns True in case variable has mapped value and false otherwise
+    bool has_mapped_variable(const std::string& name) const {
+        auto mapItem = m_variables_map.find(name);
+        return mapItem != m_variables_map.end();
+    }
+
+    /// \brief Returns shared pointer to a requested shard_id, or nullptr in case of shard_id isn't found
+    /// \param shard_id Requested shard_id
+    /// \returns Valid shared_ptr with ifstream or with nullptr if shard isn't found
+    std::shared_ptr<std::ifstream> get_data_file(const int32_t shard_id) const {
+        auto result = m_data_files.find(shard_id);
+        return result != m_data_files.end() ? result->second : nullptr;
+    }
+
+    /// \brief Adds variable mapping to the variables map
+    /// \param var_name Variable full name (from .index file)
+    /// \param map_name Mapped name
+    /// \param rewrite Rewrite mapped value in case it exists
+    /// \returns True if map updated. False if nothing changed (if variable exists and rewrite is false).
+    bool map_variable(const std::string& var_name, const std::string& map_name, bool rewrite = false) {
+        if (m_variables_map.find(var_name) != m_variables_map.end() && rewrite == false) {
+            return false;
+        }
+
+        m_variables_map[var_name] = map_name;
+        return true;
+    }
+
+private:
+    /// \brief Reads block structure of .index file
+    /// \param[in,out] fs Filestream of .index file, position in file will be updated
+    /// \param[in] index Variables index block which stores information about block
+    /// \param[out] data Block data will be readed
+    /// \param[out] offset Offset of block start
+    /// \param[out] offset_end Offset of block end
+    void read_variables_index_block(std::ifstream& fs,
+                                    const VIBlock& index,
+                                    std::vector<char>& data,
+                                    uint32_t& offset,
+                                    uint32_t& offset_end);
+    /// \brief Reads key=value pair from provided pointer
+    /// \param[in,out] ptr Actual pointer, will be moved to the end of readed pair (to read next)
+    /// \param[in] ptr_end End of memory which shouldn't be passed in case of broken structure
+    /// \param[out] key Key name
+    /// \param[out] value Stored value for key (isn't a pure string, data block)
+    /// \param[out] val_lenght Length of readed value
+    void read_variables_index_pair(char*& ptr,
+                                   const char* ptr_end,
+                                   std::string& key,
+                                   char*& value,
+                                   uint32_t& val_length);
+    /// \brief Reads .index file and stores key=value map in provided varIndex
+    /// \param[in,out] fs Filestream should be parsed. Position in file will be updated
+    /// \param[out] varIndex Variables indx (key=value) from given filestream
+    void read_variables_index(std::ifstream& fs, std::map<std::string, std::vector<char>>& varIndex);
+    /// \brief Reads bundle header if it is available. Checks version and saves info about amount of shards
+    void read_bundle_header();
+    /// \brief Reads key=value map from storef _CHECKPOINTABLE_OBJECT_GRAPH variable
+    void read_checkpointable_object_graph();
+};
+
+// Loads graph from Tensorflow Saved Model file (saved_model.pb)
+class GraphIteratorSavedModel : public GraphIteratorProto {
+    std::shared_ptr<::tensorflow::SavedModel> m_saved_model;
+    std::shared_ptr<SavedModelVariablesIndex> m_variables_index;
+    std::shared_ptr<std::map<std::string, std::string>> m_inputs_map;
+    std::shared_ptr<std::map<std::string, std::string>> m_outputs_map;
+
+public:
+    template <typename T>
+    GraphIteratorSavedModel(const std::basic_string<T>& path, const std::string& tags)
+        : m_saved_model(std::make_shared<::tensorflow::SavedModel>()) {
+        this->read_saved_model(path, tags);
+    }
+
+    static bool is_supported(const std::string& path);
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    static bool is_supported(const std::wstring& path);
+#endif
+
+    std::shared_ptr<SavedModelVariablesIndex> get_variables_index() {
+        return m_variables_index;
+    }
+
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_input_names() const {
+        return m_inputs_map;
+    }
+
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_output_names() const {
+        return m_outputs_map;
+    }
+
+private:
+    bool is_valid_signature(const ::tensorflow::SignatureDef& signature) const;
+
+    template <typename T>
+    bool read_saved_model(const std::basic_string<T>& path, const std::string& tags) {
+        std::ifstream sm_stream{path + get_saved_model_name<T>(), std::ifstream::in | std::ifstream::binary};
+        FRONT_END_GENERAL_CHECK(sm_stream && sm_stream.is_open(), "Model file does not exist");
+
+        std::basic_string<T> varIndexPath = path + get_variables_index_name<T>();
+        if (ov::util::file_exists(varIndexPath)) {
+            m_variables_index = std::make_shared<SavedModelVariablesIndex>();
+            std::ifstream vi_stream{varIndexPath, std::ifstream::in | std::ifstream::binary};
+            FRONT_END_GENERAL_CHECK(vi_stream && vi_stream.is_open(),
+                                    "Saved Model's variable index file does not exist");
+            FRONT_END_GENERAL_CHECK(m_variables_index->read_variables(vi_stream, path),
+                                    "Saved Model's variable index file cannot be parsed");
+        }
+
+        bool res = m_saved_model->ParseFromIstream(&sm_stream);
+        FRONT_END_GENERAL_CHECK(res && m_saved_model->meta_graphs_size(), "Saved Model cannot be parsed");
+
+        for (const auto& meta_graph : m_saved_model->meta_graphs()) {
+            if (!meta_graph.has_graph_def()) {
+                continue;
+            }
+
+            if (m_saved_model->meta_graphs_size() > 1) {
+                bool tag_found = false;
+                for (const auto& tag : meta_graph.meta_info_def().tags()) {
+                    if (tags.find(tag) != std::string::npos) {
+                        tag_found = true;
+                        break;
+                    }
+                }
+                if (!tag_found) {
+                    continue;
+                }
+            }
+
+            std::map<std::string, const ::tensorflow::SignatureDef*> validSignatures = {};
+            for (const auto& sit : meta_graph.signature_def()) {
+                const std::string& key = sit.first;
+                const ::tensorflow::SignatureDef& val = sit.second;
+                if (is_valid_signature(val)) {
+                    validSignatures[key] = &val;
+                }
+            }
+
+            auto serving_default = validSignatures.find("serving_default");
+
+            if (serving_default != validSignatures.end()) {
+                m_inputs_map = std::make_shared<std::map<std::string, std::string>>();
+                m_outputs_map = std::make_shared<std::map<std::string, std::string>>();
+                for (const auto& input : serving_default->second->inputs()) {
+                    (*m_inputs_map)[input.second.name()] = input.first;
+                }
+                for (const auto& output : serving_default->second->outputs()) {
+                    (*m_outputs_map)[output.second.name()] = output.first;
+                }
+            }
+
+            m_graph_def = std::make_shared<::tensorflow::GraphDef>(meta_graph.graph_def());
+
+            // Update variables map using information by resolving AssignVariableOp graph nodes
+            std::map<std::string, std::string> var_map;
+            map_assignvariable(m_graph_def, var_map);
+            for (auto var : var_map) {
+                m_variables_index->map_variable(var.first, var.second);
+            }
+
+            auto nodes_size = m_graph_def->node_size();
+            m_decoders.resize(static_cast<size_t>(nodes_size));
+            for (int node_ind = 0; node_ind < nodes_size; ++node_ind) {
+                m_decoders[node_ind] = std::make_shared<DecoderProto>(&m_graph_def->node(node_ind), m_graph_def);
+            }
+
+            // initialize a library map
+            auto num_funcs = m_graph_def->library().function_size();
+            for (int func_ind = 0; func_ind < num_funcs; ++func_ind) {
+                auto func = m_graph_def->library().function(func_ind);
+                auto func_name = func.signature().name();
+                m_library_map.insert(std::pair<std::string, int>(func_name, func_ind));
+            }
+
+            return true;
+        }
+
+        FRONT_END_GENERAL_CHECK(false, "Saved Model doesn't contain MetaGraph with requested tag");
+
+        return false;
+    }
+
+    /// \brief Reads relationship between VarHandleOp - RestoreV2 - AssignVariableOp and
+    /// stores this information in a provided key=value map. Where key - name of VarHandleOp,
+    /// value - long variable name which is stored in RestoreV2.
+    /// It needs to map VarHandleOp to right place in .index file.
+    /// \param[in] graph_def GraphDef object for analysis
+    /// \param[out] variables_map Map of variables found in graph_def
+    void map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def,
+                            std::map<std::string, std::string>& variables_map) const;
+};  // GraphIteratorSavedModel
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow/src/input_model.cpp b/src/frontends/tensorflow/src/input_model.cpp
index f6292da7dc964a..904e5f6071fc8e 100644
--- a/src/frontends/tensorflow/src/input_model.cpp
+++ b/src/frontends/tensorflow/src/input_model.cpp
@@ -55,7 +55,10 @@ class InputModel::InputModelTFImpl {
     InputModelTFImpl(const GraphIterator::Ptr& graph_iterator, const ov::frontend::InputModel& input_model);
     InputModelTFImpl(const GraphIterator::Ptr& graph_iterator,
                      const ov::frontend::InputModel& input_model,
-                     const std::shared_ptr<TelemetryExtension>& telemetry);
+                     const std::shared_ptr<TelemetryExtension>& telemetry,
+                     const std::shared_ptr<SavedModelVariablesIndex>& variables_index,
+                     const std::shared_ptr<std::map<std::string, std::string>> saved_model_input_names,
+                     const std::shared_ptr<std::map<std::string, std::string>> saved_model_output_names);
     std::vector<ov::frontend::Place::Ptr> get_inputs() const;
     std::vector<ov::frontend::Place::Ptr> get_outputs() const;
     ov::frontend::Place::Ptr get_place_by_tensor_name(const std::string& tensorName) const;
@@ -79,6 +82,9 @@ class InputModel::InputModelTFImpl {
     std::shared_ptr<InputModel> get_body_input_model(const std::string& body_model_name) const;
     std::vector<std::string> get_input_names() const;
     std::vector<std::string> get_output_names() const;
+    std::shared_ptr<SavedModelVariablesIndex> get_variables_index() const;
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_input_names() const;
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_output_names() const;
 
 private:
     void load_places();
@@ -99,6 +105,10 @@ class InputModel::InputModelTFImpl {
 
     std::shared_ptr<TelemetryExtension> m_telemetry;
 
+    std::shared_ptr<SavedModelVariablesIndex> m_variables_index;
+    std::shared_ptr<std::map<std::string, std::string>> m_saved_model_input_names;
+    std::shared_ptr<std::map<std::string, std::string>> m_saved_model_output_names;
+
     // shows if some nodes might be deleted from graph
     bool m_graph_changed = false;
 };
@@ -152,10 +162,10 @@ void InputModel::InputModelTFImpl::load_places() {
             }
             auto dtype_any = node_decoder->get_attribute("dtype");
             auto placeholder_name = node_decoder->get_op_name();
-            FRONT_END_GENERAL_CHECK(
-                dtype_any.is<ov::element::Type>(),
-                "Incorrect input model: Placeholder node " + placeholder_name + " has unspecified type.");
-            auto type = dtype_any.as<ov::element::Type>();
+            ov::element::Type type = ov::element::dynamic;
+            if (dtype_any.is<ov::element::Type>()) {
+                type = dtype_any.as<ov::element::Type>();
+            }
             std::vector<std::string> names = {op_name};
             auto tensor_place = std::make_shared<TensorPlace>(m_input_model, pshape, type, names);
             m_tensor_places[op_name] = tensor_place;
@@ -202,6 +212,17 @@ void InputModel::InputModelTFImpl::load_places() {
         m_outputs.push_back(output_place);
     }
 }
+std::shared_ptr<SavedModelVariablesIndex> InputModel::InputModelTFImpl::get_variables_index() const {
+    return m_variables_index;
+}
+
+std::shared_ptr<std::map<std::string, std::string>> InputModel::InputModelTFImpl::get_saved_model_input_names() const {
+    return m_saved_model_input_names;
+}
+
+std::shared_ptr<std::map<std::string, std::string>> InputModel::InputModelTFImpl::get_saved_model_output_names() const {
+    return m_saved_model_output_names;
+}
 
 std::vector<std::shared_ptr<OpPlace>> InputModel::InputModelTFImpl::get_op_places() const {
     return topologically_sort_op_nodes();
@@ -337,12 +358,19 @@ std::shared_ptr<InputModel> InputModel::InputModelTFImpl::get_body_input_model(
     return std::make_shared<InputModel>(body_graph_iterator, m_telemetry);
 }
 
-InputModel::InputModelTFImpl::InputModelTFImpl(const GraphIterator::Ptr& graph_iterator,
-                                               const ov::frontend::InputModel& input_model,
-                                               const std::shared_ptr<TelemetryExtension>& telemetry)
+InputModel::InputModelTFImpl::InputModelTFImpl(
+    const GraphIterator::Ptr& graph_iterator,
+    const ov::frontend::InputModel& input_model,
+    const std::shared_ptr<TelemetryExtension>& telemetry,
+    const std::shared_ptr<SavedModelVariablesIndex>& variables_index,
+    const std::shared_ptr<std::map<std::string, std::string>> saved_model_input_names,
+    const std::shared_ptr<std::map<std::string, std::string>> saved_model_output_names)
     : m_graph_iterator(graph_iterator),
       m_input_model(input_model),
-      m_telemetry(telemetry) {
+      m_telemetry(telemetry),
+      m_variables_index(variables_index),
+      m_saved_model_input_names(saved_model_input_names),
+      m_saved_model_output_names(saved_model_output_names) {
     FRONT_END_GENERAL_CHECK(m_graph_iterator, "Null pointer specified for GraphIterator");
     m_input_names = graph_iterator->get_input_names();
     m_output_names = graph_iterator->get_output_names();
@@ -445,8 +473,29 @@ void InputModel::InputModelTFImpl::set_tensor_value(ov::frontend::Place::Ptr pla
     m_tensor_values[name] = constant;
 }
 
-InputModel::InputModel(const GraphIterator::Ptr& graph_iterator, const std::shared_ptr<TelemetryExtension>& telemetry)
-    : _impl{std::make_shared<InputModelTFImpl>(graph_iterator, *this, telemetry)} {}
+InputModel::InputModel(const GraphIterator::Ptr& graph_iterator,
+                       const std::shared_ptr<TelemetryExtension>& telemetry,
+                       const std::shared_ptr<SavedModelVariablesIndex>& variables_index,
+                       const std::shared_ptr<std::map<std::string, std::string>> saved_model_input_names,
+                       const std::shared_ptr<std::map<std::string, std::string>> saved_model_output_names)
+    : _impl{std::make_shared<InputModelTFImpl>(graph_iterator,
+                                               *this,
+                                               telemetry,
+                                               variables_index,
+                                               saved_model_input_names,
+                                               saved_model_output_names)} {}
+
+std::shared_ptr<SavedModelVariablesIndex> InputModel::get_variables_index() {
+    return _impl->get_variables_index();
+}
+
+std::shared_ptr<std::map<std::string, std::string>> InputModel::get_saved_model_input_names() const {
+    return _impl->get_saved_model_input_names();
+}
+
+std::shared_ptr<std::map<std::string, std::string>> InputModel::get_saved_model_output_names() const {
+    return _impl->get_saved_model_output_names();
+}
 
 std::vector<std::string> InputModel::get_input_names() const {
     return _impl->get_input_names();
diff --git a/src/frontends/tensorflow/src/input_model.hpp b/src/frontends/tensorflow/src/input_model.hpp
index 9cfacafb0f0df4..ce75194467d9fa 100644
--- a/src/frontends/tensorflow/src/input_model.hpp
+++ b/src/frontends/tensorflow/src/input_model.hpp
@@ -16,6 +16,7 @@ namespace tensorflow {
 
 class OpPlace;
 class TensorPlace;
+class SavedModelVariablesIndex;
 
 class InputModel : public ov::frontend::InputModel {
     friend class TranslateSession;
@@ -31,7 +32,10 @@ class InputModel : public ov::frontend::InputModel {
 
 public:
     explicit InputModel(const GraphIterator::Ptr& graph_iterator,
-                        const std::shared_ptr<TelemetryExtension>& telemetry = {});
+                        const std::shared_ptr<TelemetryExtension>& telemetry = {},
+                        const std::shared_ptr<SavedModelVariablesIndex>& variables_index = {},
+                        const std::shared_ptr<std::map<std::string, std::string>> saved_model_input_names = nullptr,
+                        const std::shared_ptr<std::map<std::string, std::string>> saved_model_output_names = nullptr);
 
     std::vector<ov::frontend::Place::Ptr> get_inputs() const override;
     std::vector<ov::frontend::Place::Ptr> get_outputs() const override;
@@ -45,6 +49,9 @@ class InputModel : public ov::frontend::InputModel {
     void set_element_type(const ov::frontend::Place::Ptr& place, const ov::element::Type&) override;
     ov::element::Type get_element_type(const ov::frontend::Place::Ptr& place) const override;
     void set_tensor_value(const ov::frontend::Place::Ptr& place, const void* value) override;
+    std::shared_ptr<SavedModelVariablesIndex> get_variables_index();
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_input_names() const;
+    std::shared_ptr<std::map<std::string, std::string>> get_saved_model_output_names() const;
 };
 
 }  // namespace tensorflow
diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp
new file mode 100644
index 00000000000000..723a9d1ac21802
--- /dev/null
+++ b/src/frontends/tensorflow/src/op/var_handle.cpp
@@ -0,0 +1,203 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "graph_iterator_saved_model.hpp"
+#include "helper_ops/string_constant.hpp"
+#include "helper_ops/unsupported_constant.hpp"
+#include "input_model.hpp"
+#include "openvino/opsets/opset8.hpp"
+#include "tensor_bundle.pb.h"
+
+using namespace std;
+using namespace ov::opset8;
+using namespace ov;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+
+// Reading variable from shard file
+template <typename T>
+static std::shared_ptr<ov::Node> read_variable(std::shared_ptr<SavedModelVariablesIndex> var_index,
+                                               const ov::element::Type ov_type,
+                                               const ov::Shape shape,
+                                               const ::tensorflow::BundleEntryProto& entry,
+                                               const NodeContext& node) {
+    std::vector<T> var_data;
+    google::protobuf::int64 size = 1;
+    for (uint64_t i = 0; i < shape.size(); ++i) {
+        size *= static_cast<google::protobuf::int64>(shape[i]);
+    }
+    var_data.resize(size);
+    TENSORFLOW_OP_VALIDATION(node,
+                             size == static_cast<google::protobuf::int64>(entry.size() / sizeof(T)),
+                             "[TensorFlow Frontend] Internal error: Available data size isn't equal to calculated.");
+    auto fs = var_index->get_data_file(entry.shard_id());
+    if (!fs.get()) {
+        TENSORFLOW_OP_VALIDATION(node, var_index, "[TensorFlow Frontend] Internal error: Cannot get shard file.");
+    }
+    fs->seekg(entry.offset(), std::ios::beg);
+    fs->read(reinterpret_cast<char*>(var_data.data()), entry.size());
+    return std::make_shared<Constant>(ov_type, shape, var_data);
+}
+
+OutputVector translate_varhandle_op(const NodeContext& node) {
+    default_op_checks(node, 0, {"VarHandleOp"});
+    auto translate_session = node.get_translate_session();
+    TENSORFLOW_OP_VALIDATION(node,
+                             translate_session,
+                             "[TensorFlow Frontend] Internal error: Translate session is nullptr.");
+    auto model = reinterpret_cast<ov::frontend::tensorflow::InputModel*>(translate_session->get_input_model().get());
+    auto var_index = model->get_variables_index();
+    auto ov_type = node.get_attribute<element::Type>("dtype");
+    std::shared_ptr<Node> const_node;
+    if (ov_type == element::undefined) {
+        const_node = std::make_shared<UnsupportedConstant>();
+    } else {
+        // Getting variable description from variables index
+        const char* entry_data = nullptr;
+        size_t entry_size = 0;
+        auto var_name = node.get_name();
+        auto shape = node.get_attribute<::ov::PartialShape>("shape").get_shape();
+        bool result = var_index->get_mapped_variable(var_name, &entry_data, &entry_size);
+
+        TENSORFLOW_OP_VALIDATION(node, result, "[TensorFlow Frontend] Internal error: Cannot find requested variable.");
+
+        ::tensorflow::BundleEntryProto entry;
+        TENSORFLOW_OP_VALIDATION(node,
+                                 entry.ParseFromArray(entry_data, static_cast<int>(entry_size)),
+                                 "[TensorFlow Frontend] Internal error: Cannot get read bundle entry.");
+
+        switch (ov_type) {
+        case ov::element::u8:
+            const_node = read_variable<uint8_t>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::i8:
+            const_node = read_variable<int8_t>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::i16:
+            const_node = read_variable<int16_t>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::i32:
+            const_node = read_variable<int32_t>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::i64:
+            const_node = read_variable<int64_t>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::f16:
+            const_node = read_variable<float16>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::f32:
+            const_node = read_variable<float>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::f64:
+            const_node = read_variable<double>(var_index, ov_type, shape, entry, node);
+            break;
+        case ov::element::bf16:
+            const_node = read_variable<bfloat16>(var_index, ov_type, shape, entry, node);
+            break;
+        default:
+            FRONT_END_THROW("Encountered unknown element type " + ov_type.get_type_name());
+        }
+    }
+    set_node_name(node.get_name(), const_node);
+    return {const_node};
+}
+
+OutputVector translate_varisinitialized_op(const NodeContext& node) {
+    auto const_node = std::make_shared<Constant>(::ov::element::boolean, Shape{}, true);
+    set_node_name(node.get_name(), const_node);
+    return {const_node};
+}
+
+OutputVector translate_readvariable_op(const NodeContext& node) {
+    default_op_checks(node, 1, {"ReadVariableOp"});
+    // Documentation says it should return only one tensor with dtype, but
+    // _output_shapes in a vector of shapes and it means it could have multiple outputs
+    // https://www.tensorflow.org/api_docs/python/tf/raw_ops/ReadVariableOp
+    auto output_shapes = node.get_attribute<std::vector<::ov::PartialShape>>("_output_shapes");
+
+    OutputVector outs = {};
+
+    for (size_t i = 0; i < output_shapes.size(); ++i) {
+        std::shared_ptr<ov::Node> output_node;
+        if (node.get_input(0).get_partial_shape().is_static() &&
+            output_shapes[i].get_shape() != node.get_input(0).get_shape()) {
+            auto reshape_shape = make_shared<Constant>(ov::element::i32, output_shapes[i].get_shape());
+            output_node = make_shared<Reshape>(node.get_input(0), reshape_shape, false);
+        } else {
+            output_node = node.get_input(0).get_node_shared_ptr();
+        }
+        if (i == 0) {
+            set_out_name(node.get_name(), output_node);
+            set_out_name(node.get_name() + ":" + "0", output_node);
+        } else {
+            set_node_name(node.get_name() + ":" + std::to_string(i), output_node);
+        }
+        outs.push_back(output_node);
+    }
+    return outs;
+}
+
+OutputVector translate_assignvariable_op(const NodeContext& node) {
+    default_op_checks(node, 2, {"AssignVariableOp"});
+    auto assignvariableop_node = std::make_shared<UnsupportedConstant>();
+    set_node_name(node.get_name(), assignvariableop_node);
+    return {assignvariableop_node};
+}
+
+OutputVector translate_restorev2_op(const NodeContext& node) {
+    default_op_checks(node, 3, {"RestoreV2"});
+    auto translate_session = node.get_translate_session();
+    TENSORFLOW_OP_VALIDATION(node,
+                             translate_session,
+                             "[TensorFlow Frontend] Internal error: Translate session is nullptr.");
+    auto model = reinterpret_cast<ov::frontend::tensorflow::InputModel*>(translate_session->get_input_model().get());
+    auto var_index = model->get_variables_index();
+    auto tensor_names =
+        reinterpret_cast<StringConstant*>(node.get_input(1).get_node())->get_data().as<std::vector<std::string>>();
+    auto tensor_types = node.get_attribute<std::vector<ov::element::Type>>("dtypes");
+
+    OutputVector outs = {};
+
+    for (size_t i = 0; i < tensor_names.size(); ++i) {
+        auto const_node = std::make_shared<UnsupportedConstant>();
+        if (i == 0)
+            set_node_name(node.get_name(), const_node);
+        else
+            set_node_name(node.get_name() + ":" + std::to_string(i), const_node);
+        outs.push_back(const_node);
+    }
+
+    return outs;
+}
+
+OutputVector translate_staticregexfullmatch_op(const NodeContext& node) {
+    default_op_checks(node, 1, {"StaticRegexFullMatch"});
+    // auto pattern = node.get_attribute_as_any("pattern").as<std::string>();
+    auto const_node = std::make_shared<Constant>(ov::element::boolean, ov::Shape{}, true);
+    set_node_name(node.get_name(), const_node);
+    return {const_node};
+}
+
+OutputVector translate_stringjoin_op(const NodeContext& node) {
+    default_op_checks(node, 1, {"StringJoin"});
+    auto const_node = std::make_shared<UnsupportedConstant>();
+    set_node_name(node.get_name(), const_node);
+    return {const_node};
+}
+
+OutputVector translate_mergev2checkpoint_op(const NodeContext& node) {
+    default_op_checks(node, 1, {"MergeV2Checkpoint"});
+    auto const_node = std::make_shared<UnsupportedConstant>();
+    set_node_name(node.get_name(), const_node);
+    return {const_node};
+}
+
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index 26a2a332345f99..594a5b4a0ed65b 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -32,6 +32,14 @@ TF_OP_CONVERTER(translate_queue_dequeue_many_op);
 TF_OP_CONVERTER(translate_sparse_fill_empty_rows_op);
 TF_OP_CONVERTER(translate_sparse_reshape_op);
 TF_OP_CONVERTER(translate_sparse_segment_sum_op);
+TF_OP_CONVERTER(translate_varisinitialized_op);
+TF_OP_CONVERTER(translate_readvariable_op);
+TF_OP_CONVERTER(translate_assignvariable_op);
+TF_OP_CONVERTER(translate_varhandle_op);
+TF_OP_CONVERTER(translate_restorev2_op);
+TF_OP_CONVERTER(translate_staticregexfullmatch_op);
+TF_OP_CONVERTER(translate_stringjoin_op);
+TF_OP_CONVERTER(translate_mergev2checkpoint_op);
 TF_OP_CONVERTER(translate_while_op);
 
 const std::map<std::string, CreatorFunction> get_supported_ops() {
@@ -246,6 +254,15 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"TopK", translate_top_k_op},
         {"TopKV2", translate_top_k_v2_op},
         {"Transpose", translate_transpose_op},
+        {"ReadVariableOp", translate_readvariable_op},
+        {"AssignVariableOp", translate_assignvariable_op},
+        {"VarIsInitializedOp", translate_varisinitialized_op},
+        {"VarHandleOp", translate_varhandle_op},
+        {"RestoreV2", translate_restorev2_op},
+        {"StaticRegexFullMatch", translate_staticregexfullmatch_op},
+        {"StringJoin", translate_stringjoin_op},
+        {"ShardedFilename", translate_identity_op},
+        {"MergeV2Checkpoints", translate_identity_op},
         {"Unpack", translate_unpack_op},
         {"While", translate_while_op},
         {"Where", translate_where_op},
diff --git a/src/frontends/tensorflow/src/proto/any.proto b/src/frontends/tensorflow/src/proto/any.proto
new file mode 100644
index 00000000000000..a52639dc72b3d1
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/any.proto
@@ -0,0 +1,159 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package google.protobuf;
+
+option csharp_namespace = "Google.Protobuf.WellKnownTypes";
+option go_package = "google.golang.org/protobuf/types/known/anypb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "AnyProto";
+option java_multiple_files = true;
+option objc_class_prefix = "GPB";
+
+// `Any` contains an arbitrary serialized protocol buffer message along with a
+// URL that describes the type of the serialized message.
+//
+// Protobuf library provides support to pack/unpack Any values in the form
+// of utility functions or additional generated methods of the Any type.
+//
+// Example 1: Pack and unpack a message in C++.
+//
+//     Foo foo = ...;
+//     Any any;
+//     any.PackFrom(foo);
+//     ...
+//     if (any.UnpackTo(&foo)) {
+//       ...
+//     }
+//
+// Example 2: Pack and unpack a message in Java.
+//
+//     Foo foo = ...;
+//     Any any = Any.pack(foo);
+//     ...
+//     if (any.is(Foo.class)) {
+//       foo = any.unpack(Foo.class);
+//     }
+//
+//  Example 3: Pack and unpack a message in Python.
+//
+//     foo = Foo(...)
+//     any = Any()
+//     any.Pack(foo)
+//     ...
+//     if any.Is(Foo.DESCRIPTOR):
+//       any.Unpack(foo)
+//       ...
+//
+//  Example 4: Pack and unpack a message in Go
+//
+//      foo := &pb.Foo{...}
+//      any, err := anypb.New(foo)
+//      if err != nil {
+//        ...
+//      }
+//      ...
+//      foo := &pb.Foo{}
+//      if err := any.UnmarshalTo(foo); err != nil {
+//        ...
+//      }
+//
+// The pack methods provided by protobuf library will by default use
+// 'type.googleapis.com/full.type.name' as the type URL and the unpack
+// methods only use the fully qualified type name after the last '/'
+// in the type URL, for example "foo.bar.com/x/y.z" will yield type
+// name "y.z".
+//
+//
+// JSON
+// ====
+// The JSON representation of an `Any` value uses the regular
+// representation of the deserialized, embedded message, with an
+// additional field `@type` which contains the type URL. Example:
+//
+//     package google.profile;
+//     message Person {
+//       string first_name = 1;
+//       string last_name = 2;
+//     }
+//
+//     {
+//       "@type": "type.googleapis.com/google.profile.Person",
+//       "firstName": <string>,
+//       "lastName": <string>
+//     }
+//
+// If the embedded message type is well-known and has a custom JSON
+// representation, that representation will be embedded adding a field
+// `value` which holds the custom JSON in addition to the `@type`
+// field. Example (for message [google.protobuf.Duration][]):
+//
+//     {
+//       "@type": "type.googleapis.com/google.protobuf.Duration",
+//       "value": "1.212s"
+//     }
+//
+message Any {
+  // A URL/resource name that uniquely identifies the type of the serialized
+  // protocol buffer message. This string must contain at least
+  // one "/" character. The last segment of the URL's path must represent
+  // the fully qualified name of the type (as in
+  // `path/google.protobuf.Duration`). The name should be in a canonical form
+  // (e.g., leading "." is not accepted).
+  //
+  // In practice, teams usually precompile into the binary all types that they
+  // expect it to use in the context of Any. However, for URLs which use the
+  // scheme `http`, `https`, or no scheme, one can optionally set up a type
+  // server that maps type URLs to message definitions as follows:
+  //
+  // * If no scheme is provided, `https` is assumed.
+  // * An HTTP GET on the URL must yield a [google.protobuf.Type][]
+  //   value in binary format, or produce an error.
+  // * Applications are allowed to cache lookup results based on the
+  //   URL, or have them precompiled into a binary to avoid any
+  //   lookup. Therefore, binary compatibility needs to be preserved
+  //   on changes to types. (Use versioned type names to manage
+  //   breaking changes.)
+  //
+  // Note: this functionality is not currently available in the official
+  // protobuf release, and it is not used for type URLs beginning with
+  // type.googleapis.com.
+  //
+  // Schemes other than `http`, `https` (or the empty scheme) might be
+  // used with implementation specific semantics.
+  //
+  string type_url = 1;
+
+  // Must be a valid serialized protocol buffer of the above specified type.
+  bytes value = 2;
+}
diff --git a/src/frontends/tensorflow/src/proto/meta_graph.proto b/src/frontends/tensorflow/src/proto/meta_graph.proto
new file mode 100644
index 00000000000000..b6918fa853bf8c
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/meta_graph.proto
@@ -0,0 +1,351 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2018-2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "any.proto";
+import "graph.proto";
+import "op_def.proto";
+import "tensor_shape.proto";
+import "types.proto";
+import "saved_object_graph.proto";
+import "saver.proto";
+import "struct.proto";
+
+option cc_enable_arenas = true;
+option java_outer_classname = "MetaGraphProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// Protocol buffer containing the following which are necessary to restart
+// training, run inference. It can be used to serialize/de-serialize memory
+// objects necessary for running computation in a graph when crossing the
+// process boundary. It can be used for long term storage of graphs,
+// cross-language execution of graphs, etc.
+//   MetaInfoDef
+//   GraphDef
+//   SaverDef
+//   CollectionDef
+//   TensorInfo
+//   SignatureDef
+message MetaGraphDef {
+  // Meta information regarding the graph to be exported.  To be used by users
+  // of this protocol buffer to encode information regarding their meta graph.
+  message MetaInfoDef {
+    // User specified Version string. Can be the name of the model and revision,
+    // steps this model has been trained to, etc.
+    string meta_graph_version = 1;
+
+    // A copy of the OpDefs used by the producer of this graph_def.
+    // Descriptions and Ops not used in graph_def are stripped out.
+    OpList stripped_op_list = 2;
+
+    // A serialized protobuf. Can be the time this meta graph is created, or
+    // modified, or name of the model.
+    google.protobuf.Any any_info = 3;
+
+    // User supplied tag(s) on the meta_graph and included graph_def.
+    //
+    // MetaGraphDefs should be tagged with their capabilities or use-cases.
+    // Examples: "train", "serve", "gpu", "tpu", etc.
+    // These tags enable loaders to access the MetaGraph(s) appropriate for a
+    // specific use-case or runtime environment.
+    repeated string tags = 4;
+
+    // The __version__ string of the tensorflow build used to write this graph.
+    // This will be populated by the framework, which will overwrite any user
+    // supplied value.
+    string tensorflow_version = 5;
+
+    // The __git_version__ string of the tensorflow build used to write this
+    // graph. This will be populated by the framework, which will overwrite any
+    // user supplied value.
+    string tensorflow_git_version = 6;
+
+    // A flag to denote whether default-valued attrs have been stripped from
+    // the nodes in this graph_def.
+    bool stripped_default_attrs = 7;
+
+    // FunctionDef name to aliases mapping.
+    map<string, string> function_aliases = 8;
+  }
+  MetaInfoDef meta_info_def = 1;
+
+  // GraphDef.
+  GraphDef graph_def = 2;
+
+  // SaverDef.
+  SaverDef saver_def = 3;
+
+  // collection_def: Map from collection name to collections.
+  // See CollectionDef section for details.
+  map<string, CollectionDef> collection_def = 4;
+
+  // signature_def: Map from user supplied key for a signature to a single
+  // SignatureDef.
+  map<string, SignatureDef> signature_def = 5;
+
+  // Asset file def to be used with the defined graph.
+  repeated AssetFileDef asset_file_def = 6;
+
+  // Extra information about the structure of functions and stateful objects.
+  SavedObjectGraph object_graph_def = 7;
+}
+
+// CollectionDef should cover most collections.
+// To add a user-defined collection, do one of the following:
+// 1. For simple data types, such as string, int, float:
+//      tf.add_to_collection("your_collection_name", your_simple_value)
+//    strings will be stored as bytes_list.
+//
+// 2. For Protobuf types, there are three ways to add them:
+//    1) tf.add_to_collection("your_collection_name",
+//         your_proto.SerializeToString())
+//
+//       collection_def {
+//         key: "user_defined_bytes_collection"
+//         value {
+//           bytes_list {
+//             value: "queue_name: \"test_queue\"\n"
+//           }
+//         }
+//       }
+//
+//  or
+//
+//    2) tf.add_to_collection("your_collection_name", str(your_proto))
+//
+//       collection_def {
+//         key: "user_defined_string_collection"
+//         value {
+//          bytes_list {
+//             value: "\n\ntest_queue"
+//           }
+//         }
+//       }
+//
+//  or
+//
+//    3) any_buf = any_pb2.Any()
+//       tf.add_to_collection("your_collection_name",
+//         any_buf.Pack(your_proto))
+//
+//       collection_def {
+//         key: "user_defined_any_collection"
+//         value {
+//           any_list {
+//             value {
+//               type_url: "type.googleapis.com/tensorflow.QueueRunnerDef"
+//               value: "\n\ntest_queue"
+//             }
+//           }
+//         }
+//       }
+//
+// 3. For Python objects, implement to_proto() and from_proto(), and register
+//    them in the following manner:
+//    ops.register_proto_function("your_collection_name",
+//                                proto_type,
+//                                to_proto=YourPythonObject.to_proto,
+//                                from_proto=YourPythonObject.from_proto)
+//    These functions will be invoked to serialize and de-serialize the
+//    collection. For example,
+//    ops.register_proto_function(ops.GraphKeys.GLOBAL_VARIABLES,
+//                                proto_type=variable_pb2.VariableDef,
+//                                to_proto=Variable.to_proto,
+//                                from_proto=Variable.from_proto)
+message CollectionDef {
+  // NodeList is used for collecting nodes in graph. For example
+  // collection_def {
+  //   key: "summaries"
+  //   value {
+  //     node_list {
+  //       value: "input_producer/ScalarSummary:0"
+  //       value: "shuffle_batch/ScalarSummary:0"
+  //       value: "ImageSummary:0"
+  //     }
+  //   }
+  message NodeList {
+    repeated string value = 1;
+  }
+
+  // BytesList is used for collecting strings and serialized protobufs. For
+  // example:
+  // collection_def {
+  //   key: "trainable_variables"
+  //   value {
+  //     bytes_list {
+  //       value: "\n\017conv1/weights:0\022\024conv1/weights/Assign
+  //              \032\024conv1/weights/read:0"
+  //       value: "\n\016conv1/biases:0\022\023conv1/biases/Assign\032
+  //              \023conv1/biases/read:0"
+  //     }
+  //   }
+  // }
+  message BytesList {
+    repeated bytes value = 1;
+  }
+
+  // Int64List is used for collecting int, int64 and long values.
+  message Int64List {
+    repeated int64 value = 1 [packed = true];
+  }
+
+  // FloatList is used for collecting float values.
+  message FloatList {
+    repeated float value = 1 [packed = true];
+  }
+
+  // AnyList is used for collecting Any protos.
+  message AnyList {
+    repeated google.protobuf.Any value = 1;
+  }
+
+  oneof kind {
+    NodeList node_list = 1;
+    BytesList bytes_list = 2;
+    Int64List int64_list = 3;
+    FloatList float_list = 4;
+    AnyList any_list = 5;
+  }
+}
+
+// Information about a Tensor necessary for feeding or retrieval.
+message TensorInfo {
+  // For sparse tensors, The COO encoding stores a triple of values, indices,
+  // and shape.
+  message CooSparse {
+    // The shape of the values Tensor is [?].  Its dtype must be the dtype of
+    // the SparseTensor as a whole, given in the enclosing TensorInfo.
+    string values_tensor_name = 1;
+
+    // The indices Tensor must have dtype int64 and shape [?, ?].
+    string indices_tensor_name = 2;
+
+    // The dynamic logical shape represented by the SparseTensor is recorded in
+    // the Tensor referenced here.  It must have dtype int64 and shape [?].
+    string dense_shape_tensor_name = 3;
+  }
+
+  // Generic encoding for composite tensors.
+  message CompositeTensor {
+    // The serialized TypeSpec for the composite tensor.
+    TypeSpecProto type_spec = 1;
+
+    // A TensorInfo for each flattened component tensor.
+    repeated TensorInfo components = 2;
+  }
+
+  oneof encoding {
+    // For dense `Tensor`s, the name of the tensor in the graph.
+    string name = 1;
+    // There are many possible encodings of sparse matrices
+    // (https://en.wikipedia.org/wiki/Sparse_matrix).  Currently, TensorFlow
+    // uses only the COO encoding.  This is supported and documented in the
+    // SparseTensor Python class.
+    CooSparse coo_sparse = 4;
+    // Generic encoding for CompositeTensors.
+    CompositeTensor composite_tensor = 5;
+  }
+  DataType dtype = 2;
+  // The static shape should be recorded here, to the extent that it can
+  // be known in advance.  In the case of a SparseTensor, this field describes
+  // the logical shape of the represented tensor (aka dense_shape).
+  TensorShapeProto tensor_shape = 3;
+}
+
+// SignatureDef defines the signature of a computation supported by a TensorFlow
+// graph.
+//
+// For example, a model with two loss computations, sharing a single input,
+// might have the following signature_def map, in a MetaGraphDef message.
+//
+// Note that across the two SignatureDefs "loss_A" and "loss_B", the input key,
+// output key, and method_name are identical, and will be used by system(s) that
+// implement or rely upon this particular loss method. The output tensor names
+// differ, demonstrating how different outputs can exist for the same method.
+//
+// signature_def {
+//   key: "loss_A"
+//   value {
+//     inputs {
+//       key: "input"
+//       value {
+//         name: "input:0"
+//         dtype: DT_STRING
+//         tensor_shape: ...
+//       }
+//     }
+//     outputs {
+//       key: "loss_output"
+//       value {
+//         name: "loss_output_A:0"
+//         dtype: DT_FLOAT
+//         tensor_shape: ...
+//       }
+//     }
+//     method_name: "some/package/compute_loss"
+//   }
+//   ...
+// }
+// signature_def {
+//   key: "loss_B"
+//   value {
+//     inputs {
+//       key: "input"
+//       value {
+//         name: "input:0"
+//         dtype: DT_STRING
+//         tensor_shape: ...
+//       }
+//     }
+//     outputs {
+//       key: "loss_output"
+//       value {
+//         name: "loss_output_B:0"
+//         dtype: DT_FLOAT
+//         tensor_shape: ...
+//       }
+//     }
+//     method_name: "some/package/compute_loss"
+//   }
+//   ...
+// }
+message SignatureDef {
+  // Named input parameters.
+  map<string, TensorInfo> inputs = 1;
+  // Named output parameters.
+  map<string, TensorInfo> outputs = 2;
+  // Extensible method_name information enabling third-party users to mark a
+  // SignatureDef as supporting a particular method. This enables producers and
+  // consumers of SignatureDefs, e.g. a model definition library and a serving
+  // library to have a clear hand-off regarding the semantics of a computation.
+  //
+  // Note that multiple SignatureDefs in a single MetaGraphDef may have the same
+  // method_name. This is commonly used to support multi-headed computation,
+  // where a single graph computation may return multiple results.
+  string method_name = 3;
+}
+
+// An asset file def for a single file or a set of sharded files with the same
+// name.
+message AssetFileDef {
+  // The tensor to bind the asset filename to.
+  TensorInfo tensor_info = 1;
+  // The filename within an assets directory. Note: does not include the path
+  // prefix, i.e. directories. For an asset at /tmp/path/vocab.txt, the filename
+  // would be "vocab.txt".
+  string filename = 2;
+}
diff --git a/src/frontends/tensorflow/src/proto/saved_model.proto b/src/frontends/tensorflow/src/proto/saved_model.proto
new file mode 100644
index 00000000000000..0034fdfd46dcf8
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/saved_model.proto
@@ -0,0 +1,35 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "meta_graph.proto";
+
+option cc_enable_arenas = true;
+option java_outer_classname = "SavedModelProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// SavedModel is the high level serialization format for TensorFlow Models.
+// See [todo: doc links, similar to session_bundle] for more information.
+message SavedModel {
+  // The schema version of the SavedModel instance. Used for versioning when
+  // making future changes to the specification/implementation. Initial value
+  // at release will be 1.
+  int64 saved_model_schema_version = 1;
+
+  // One or more MetaGraphs.
+  repeated MetaGraphDef meta_graphs = 2;
+}
diff --git a/src/frontends/tensorflow/src/proto/saved_object_graph.proto b/src/frontends/tensorflow/src/proto/saved_object_graph.proto
new file mode 100644
index 00000000000000..671441075c3628
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/saved_object_graph.proto
@@ -0,0 +1,263 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "any.proto";
+import "tensor_shape.proto";
+import "types.proto";
+import "variable.proto";
+import "versions.proto";
+import "struct.proto";
+import "trackable_object_graph.proto";
+
+option cc_enable_arenas = true;
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// A SavedObjectGraph is part of object-based SavedModels in TF 2.0. It
+// describes the directed graph of Python objects (or equivalent in other
+// languages) that make up a model, with nodes[0] at the root.
+
+// SavedObjectGraph shares some structure with TrackableObjectGraph, but
+// SavedObjectGraph belongs to the MetaGraph and contains pointers to functions
+// and type information, while TrackableObjectGraph lives in the checkpoint
+// and contains pointers only to variable values.
+
+message SavedObjectGraph {
+  // Flattened list of objects in the object graph.
+  //
+  // The position of the object in this list indicates its id.
+  // Nodes[0] is considered the root node.
+  repeated SavedObject nodes = 1;
+
+  // Information about captures and output structures in concrete functions.
+  // Referenced from SavedBareConcreteFunction and SavedFunction.
+  map<string, SavedConcreteFunction> concrete_functions = 2;
+}
+
+message SavedObject {
+  // Objects which this object depends on: named edges in the dependency
+  // graph.
+  //
+  // Note: All kinds of SavedObject may have children, except
+  // "constant" and "captured_tensor".
+  repeated TrackableObjectGraph.TrackableObject.ObjectReference children = 1;
+
+  // Ordered list of dependencies that must be loaded before this object.
+  // SavedModel loads with the bottom-up approach, by first creating all objects
+  // (in the order defined by the dependencies), then connecting the edges.
+  repeated TrackableObjectGraph.TrackableObject.ObjectReference dependencies =
+      15;
+
+  // Removed when forking SavedObject from TrackableObjectGraph.
+  reserved "attributes";
+  reserved 2;
+
+  // Slot variables owned by this object. This describes the three-way
+  // (optimizer, variable, slot variable) relationship; none of the three
+  // depend on the others directly.
+  //
+  // Note: currently only valid if kind == "user_object".
+  repeated TrackableObjectGraph.TrackableObject.SlotVariableReference
+      slot_variables = 3;
+
+  oneof kind {
+    SavedUserObject user_object = 4;
+    SavedAsset asset = 5;
+    SavedFunction function = 6;
+    SavedVariable variable = 7;
+    SavedBareConcreteFunction bare_concrete_function = 8;
+    SavedConstant constant = 9;
+    SavedResource resource = 10;
+    CapturedTensor captured_tensor = 12;
+  }
+
+  // Stores the functions used to save and restore this object. At most one of
+  // `saveable_objects` or `registered_saver` is defined for each SavedObject.
+  // See the comment below for the difference between SaveableObject and
+  // registered savers.
+  map<string, SaveableObject> saveable_objects = 11;
+
+  // The fields below are filled when the user serializes a registered Trackable
+  // class or an object with a registered saver function.
+  //
+  // Registered classes may save additional metadata and supersede the
+  // default loading process where nodes are recreated from the proto.
+  // If the registered class cannot be found, then the object will load as one
+  // one of the default trackable objects: Autotrackable (a class similar to
+  // tf.Module), tf.function, or tf.Variable.
+  //
+  // Unlike SaveableObjects, which store the functions for saving and restoring
+  // from tensors, registered savers allow Trackables to write checkpoint shards
+  // directly (e.g. for performance or coordination reasons).
+  // *All registered savers must be available when loading the SavedModel.*
+
+  // The name of the registered class of the form "{package}.{class_name}".
+  // This field is used to search for the registered class at loading time.
+  string registered_name = 13;
+  // The user-generated proto storing metadata for this object, to be passed to
+  // the registered classes's _deserialize_from_proto method when this object is
+  // loaded from the SavedModel.
+  google.protobuf.Any serialized_user_proto = 14;
+
+  // String name of the registered saver. At most one of `saveable_objects` or
+  // `registered_saver` is defined for each SavedObject.
+  string registered_saver = 16;
+}
+
+// A SavedUserObject is an object (in the object-oriented language of the
+// TensorFlow program) of some user- or framework-defined class other than
+// those handled specifically by the other kinds of SavedObjects.
+//
+// This object cannot be evaluated as a tensor, and therefore cannot be bound
+// to an input of a function.
+message SavedUserObject {
+  // Corresponds to a registration of the type to use in the loading program.
+  string identifier = 1;
+  // Version information from the producer of this SavedUserObject.
+  VersionDef version = 2;
+  // Metadata for deserializing this object.
+  //
+  // Deprecated! At the time of deprecation, Keras was the only user of this
+  // field, and its saving and loading code will be updated shortly.
+  // Please save your application-specific metadata to a separate file.
+  string metadata = 3 [deprecated = true];
+}
+
+// A SavedAsset points to an asset in the MetaGraph.
+//
+// When bound to a function this object evaluates to a tensor with the absolute
+// filename. Users should not depend on a particular part of the filename to
+// remain stable (e.g. basename could be changed).
+message SavedAsset {
+  // Index into `MetaGraphDef.asset_file_def[]` that describes the Asset.
+  //
+  // Only the field `AssetFileDef.filename` is used. Other fields, such as
+  // `AssetFileDef.tensor_info`, MUST be ignored.
+  int32 asset_file_def_index = 1;
+}
+
+// A function with multiple signatures, possibly with non-Tensor arguments.
+message SavedFunction {
+  repeated string concrete_functions = 1;
+  FunctionSpec function_spec = 2;
+}
+
+message CapturedTensor {
+  // Name of captured tensor
+  string name = 1;
+
+  // Name of concrete function which contains the computed graph tensor.
+  string concrete_function = 2;
+}
+
+// Stores low-level information about a concrete function. Referenced in either
+// a SavedFunction or a SavedBareConcreteFunction.
+message SavedConcreteFunction {
+  repeated int32 bound_inputs = 2;
+
+  // Input in canonicalized form that was received to create this concrete
+  // function.
+  StructuredValue canonicalized_input_signature = 3;
+  // Output that was the return value of this function after replacing all
+  // Tensors with TensorSpecs. This can be an arbitrary nested function and will
+  // be used to reconstruct the full structure from pure tensors.
+  StructuredValue output_signature = 4;
+}
+
+message SavedBareConcreteFunction {
+  // Identifies a SavedConcreteFunction.
+  string concrete_function_name = 1;
+
+  // A sequence of unique strings, one per Tensor argument.
+  repeated string argument_keywords = 2;
+  // The prefix of `argument_keywords` which may be identified by position.
+  int64 allowed_positional_arguments = 3;
+  // The spec of the function that this ConcreteFunction is traced from. This
+  // allows the ConcreteFunction to be called with nest structure inputs. This
+  // field may not be populated. If this field is absent, the concrete function
+  // can only be called with flat inputs.
+  // TODO(b/169361281): support calling saved ConcreteFunction with structured
+  // inputs in C++ SavedModel API.
+  FunctionSpec function_spec = 4;
+}
+
+message SavedConstant {
+  // An Operation name for a ConstantOp in this SavedObjectGraph's MetaGraph.
+  string operation = 1;
+}
+
+// Represents a Variable that is initialized by loading the contents from the
+// checkpoint.
+message SavedVariable {
+  DataType dtype = 1;
+  TensorShapeProto shape = 2;
+  bool trainable = 3;
+  VariableSynchronization synchronization = 4;
+  VariableAggregation aggregation = 5;
+  string name = 6;
+  string device = 7;
+  // List of component variables for a distributed variable.
+  //
+  // When this field is non-empty, the SavedVariable will be assumed
+  // to be a distributed variable defined by the components listed here.
+  //
+  // This is only supported by experimental loaders at the moment.
+  repeated SavedVariable experimental_distributed_variable_components = 8;
+}
+
+// Represents `FunctionSpec` used in `Function`. This represents a
+// function that has been wrapped as a TensorFlow `Function`.
+message FunctionSpec {
+  // Full arg spec from inspect.getfullargspec().
+  StructuredValue fullargspec = 1;
+  // Whether this represents a class method.
+  bool is_method = 2;
+  // The input signature, if specified.
+  StructuredValue input_signature = 5;
+
+  // Whether the function should be compiled by XLA.
+  //
+  // The public interface to `tf.function` uses an optional boolean to
+  // represent three distinct states for this field.  Unfortunately, proto3
+  // removes the ability to explicitly check for the presence or absence of a
+  // field, so we instead map to an enum.
+  //
+  // See `tf.function` for details.
+  enum JitCompile {
+    DEFAULT = 0;
+    ON = 1;
+    OFF = 2;
+  }
+  JitCompile jit_compile = 6;
+
+  reserved 3, 4;
+}
+
+// A SavedResource represents a TF object that holds state during its lifetime.
+// An object of this type can have a reference to a:
+// create_resource() and an initialize() function.
+message SavedResource {
+  // A device specification indicating a required placement for the resource
+  // creation function, e.g. "CPU". An empty string allows the user to select a
+  // device.
+  string device = 1;
+}
+
+message SaveableObject {
+  // Node ids of concrete functions for saving and loading from a checkpoint.
+  // These functions save and restore directly from tensors.
+  int32 save_function = 2;
+  int32 restore_function = 3;
+}
diff --git a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto b/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto
new file mode 100644
index 00000000000000..4645b2bdca9b89
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto
@@ -0,0 +1,96 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2018-2023 Intel Corporation
+// Protocol buffers for saved tensor slices. It's used for the brain tensor
+// ops checkpoints and the V3 checkpoints in dist_belief.
+
+// A checkpoint file is an sstable. The value for each record is a serialized
+// SavedTensorSlices message (defined below).
+//
+// Each checkpoint file has a record with the empty key (""), which corresponds
+// to a SavedTensorSlices message that contains a "meta", that serves as a
+// table of contents on all the tensor slices saved in this file. Since the key
+// is "", it's always the first record in each file.
+//
+// Each of the rest of the records in a checkpoint stores the raw data of a
+// particular tensor slice, in SavedSlice format. The corresponding key is an
+// ordered code that encodes the name of the tensor and the slice
+// information. The name is also stored in the SaveSlice message for ease of
+// debugging and manual examination.
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "SavedTensorSliceProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.util";
+
+import "tensor_shape.proto";
+import "tensor_slice.proto";
+import "tensor.proto";
+import "types.proto";
+import "versions.proto";
+
+// Metadata describing the set of slices of the same tensor saved in a
+// checkpoint file.
+message SavedSliceMeta {
+  // Name of the tensor.
+  string name = 1;
+
+  // Shape of the tensor
+  TensorShapeProto shape = 2;
+
+  // Type of the tensor
+  DataType type = 3;
+
+  // Explicit list of slices saved in the checkpoint file.
+  repeated TensorSliceProto slice = 4;
+};
+
+// Metadata describing the set of tensor slices saved in a checkpoint file.
+// It is always stored at the beginning of each checkpoint file.
+message SavedTensorSliceMeta {
+  // Each SavedSliceMeta describes the slices for one tensor.
+  repeated SavedSliceMeta tensor = 1;
+
+  // Compatibility version of this checkpoint.  See core/public/version.h
+  // for version history.
+  VersionDef versions = 2;
+};
+
+// Saved tensor slice: it stores the name of the tensors, the slice, and the
+// raw data.
+message SavedSlice {
+  // Name of the tensor that this slice belongs to. This must be identical to
+  // the name used to encode the key for this record.
+  string name = 1;
+
+  // Extent of the slice.  Must have one entry for each of the dimension of the
+  // tensor that this slice belongs to.
+  TensorSliceProto slice = 2;
+
+  // The raw data of the slice is stored as a TensorProto. Only raw data are
+  // stored (we don't fill in fields such as dtype or tensor_shape).
+  TensorProto data = 3;
+};
+
+// Each record in a v3 checkpoint file is a serialized SavedTensorSlices
+// message.
+message SavedTensorSlices {
+  // This is only present at the first item of each checkpoint file and serves
+  // as a table of contents, listing all the tensor slices saved in this file.
+  SavedTensorSliceMeta meta = 1;
+
+  // This exists in all but the first item of each checkpoint file.
+  SavedSlice data = 2;
+};
diff --git a/src/frontends/tensorflow/src/proto/saver.proto b/src/frontends/tensorflow/src/proto/saver.proto
new file mode 100644
index 00000000000000..7834f473e4ccdf
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/saver.proto
@@ -0,0 +1,60 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+option cc_enable_arenas = true;
+option java_outer_classname = "SaverProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.util";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// Protocol buffer representing the configuration of a Saver.
+message SaverDef {
+  // The name of the tensor in which to specify the filename when saving or
+  // restoring a model checkpoint.
+  string filename_tensor_name = 1;
+
+  // The operation to run when saving a model checkpoint.
+  string save_tensor_name = 2;
+
+  // The operation to run when restoring a model checkpoint.
+  string restore_op_name = 3;
+
+  // Maximum number of checkpoints to keep.  If 0, no checkpoints are deleted.
+  int32 max_to_keep = 4;
+
+  // Shard the save files, one per device that has Variable nodes.
+  bool sharded = 5;
+
+  // How often to keep an additional checkpoint. If not specified, only the last
+  // "max_to_keep" checkpoints are kept; if specified, in addition to keeping
+  // the last "max_to_keep" checkpoints, an additional checkpoint will be kept
+  // for every n hours of training.
+  float keep_checkpoint_every_n_hours = 6;
+
+  // A version number that identifies a different on-disk checkpoint format.
+  // Usually, each subclass of BaseSaverBuilder works with a particular
+  // version/format.  However, it is possible that the same builder may be
+  // upgraded to support a newer checkpoint format in the future.
+  enum CheckpointFormatVersion {
+    // Internal legacy format.
+    LEGACY = 0;
+    // Deprecated format: tf.Saver() which works with tensorflow::table::Table.
+    V1 = 1;
+    // Current format: more efficient.
+    V2 = 2;
+  }
+  CheckpointFormatVersion version = 7;
+}
diff --git a/src/frontends/tensorflow/src/proto/struct.proto b/src/frontends/tensorflow/src/proto/struct.proto
new file mode 100644
index 00000000000000..d03201b685ac79
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/struct.proto
@@ -0,0 +1,172 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "tensor.proto";
+import "tensor_shape.proto";
+import "types.proto";
+
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// `StructuredValue` represents a dynamically typed value representing various
+// data structures that are inspired by Python data structures typically used in
+// TensorFlow functions as inputs and outputs.
+//
+// For example when saving a Layer there may be a `training` argument. If the
+// user passes a boolean True/False, that switches between two concrete
+// TensorFlow functions. In order to switch between them in the same way after
+// loading the SavedModel, we need to represent "True" and "False".
+//
+// A more advanced example might be a function which takes a list of
+// dictionaries mapping from strings to Tensors. In order to map from
+// user-specified arguments `[{"a": tf.constant(1.)}, {"q": tf.constant(3.)}]`
+// after load to the right saved TensorFlow function, we need to represent the
+// nested structure and the strings, recording that we have a trace for anything
+// matching `[{"a": tf.TensorSpec(None, tf.float32)}, {"q": tf.TensorSpec([],
+// tf.float64)}]` as an example.
+//
+// Likewise functions may return nested structures of Tensors, for example
+// returning a dictionary mapping from strings to Tensors. In order for the
+// loaded function to return the same structure we need to serialize it.
+//
+// This is an ergonomic aid for working with loaded SavedModels, not a promise
+// to serialize all possible function signatures. For example we do not expect
+// to pickle generic Python objects, and ideally we'd stay language-agnostic.
+message StructuredValue {
+  // The kind of value.
+  oneof kind {
+    // Represents None.
+    NoneValue none_value = 1;
+
+    // Represents a double-precision floating-point value (a Python `float`).
+    double float64_value = 11;
+    // Represents a signed integer value, limited to 64 bits.
+    // Larger values from Python's arbitrary-precision integers are unsupported.
+    sint64 int64_value = 12;
+    // Represents a string of Unicode characters stored in a Python `str`.
+    // In Python 3, this is exactly what type `str` is.
+    // In Python 2, this is the UTF-8 encoding of the characters.
+    // For strings with ASCII characters only (as often used in TensorFlow code)
+    // there is effectively no difference between the language versions.
+    // The obsolescent `unicode` type of Python 2 is not supported here.
+    string string_value = 13;
+    // Represents a boolean value.
+    bool bool_value = 14;
+
+    // Represents a TensorShape.
+    tensorflow.TensorShapeProto tensor_shape_value = 31;
+    // Represents an enum value for dtype.
+    tensorflow.DataType tensor_dtype_value = 32;
+    // Represents a value for tf.TensorSpec.
+    TensorSpecProto tensor_spec_value = 33;
+    // Represents a value for tf.TypeSpec.
+    TypeSpecProto type_spec_value = 34;
+    // Represents a value for tf.BoundedTensorSpec.
+    BoundedTensorSpecProto bounded_tensor_spec_value = 35;
+
+    // Represents a list of `Value`.
+    ListValue list_value = 51;
+    // Represents a tuple of `Value`.
+    TupleValue tuple_value = 52;
+    // Represents a dict `Value`.
+    DictValue dict_value = 53;
+    // Represents Python's namedtuple.
+    NamedTupleValue named_tuple_value = 54;
+  }
+}
+
+// Represents None.
+message NoneValue {}
+
+// Represents a Python list.
+message ListValue {
+  repeated StructuredValue values = 1;
+}
+
+// Represents a Python tuple.
+message TupleValue {
+  repeated StructuredValue values = 1;
+}
+
+// Represents a Python dict keyed by `str`.
+// The comment on Unicode from Value.string_value applies analogously.
+message DictValue {
+  map<string, StructuredValue> fields = 1;
+}
+
+// Represents a (key, value) pair.
+message PairValue {
+  string key = 1;
+  StructuredValue value = 2;
+}
+
+// Represents Python's namedtuple.
+message NamedTupleValue {
+  string name = 1;
+  repeated PairValue values = 2;
+}
+
+// A protobuf to represent tf.TensorSpec.
+message TensorSpecProto {
+  string name = 1;
+  tensorflow.TensorShapeProto shape = 2;
+  tensorflow.DataType dtype = 3;
+}
+
+// A protobuf to represent tf.BoundedTensorSpec.
+message BoundedTensorSpecProto {
+  string name = 1;
+  tensorflow.TensorShapeProto shape = 2;
+  tensorflow.DataType dtype = 3;
+  tensorflow.TensorProto minimum = 4;
+  tensorflow.TensorProto maximum = 5;
+}
+
+// Represents a tf.TypeSpec
+message TypeSpecProto {
+  enum TypeSpecClass {
+    UNKNOWN = 0;
+    SPARSE_TENSOR_SPEC = 1;   // tf.SparseTensorSpec
+    INDEXED_SLICES_SPEC = 2;  // tf.IndexedSlicesSpec
+    RAGGED_TENSOR_SPEC = 3;   // tf.RaggedTensorSpec
+    TENSOR_ARRAY_SPEC = 4;    // tf.TensorArraySpec
+    DATA_DATASET_SPEC = 5;    // tf.data.DatasetSpec
+    DATA_ITERATOR_SPEC = 6;   // IteratorSpec from data/ops/iterator_ops.py
+    OPTIONAL_SPEC = 7;        // tf.OptionalSpec
+    PER_REPLICA_SPEC = 8;     // PerReplicaSpec from distribute/values.py
+    VARIABLE_SPEC = 9;        // tf.VariableSpec
+    ROW_PARTITION_SPEC = 10;  // RowPartitionSpec from ragged/row_partition.py
+    reserved 11;
+    REGISTERED_TYPE_SPEC = 12;  // The type registered as type_spec_class_name.
+    EXTENSION_TYPE_SPEC = 13;   // Subclasses of tf.ExtensionType
+  }
+  TypeSpecClass type_spec_class = 1;
+
+  // The value returned by TypeSpec._serialize().
+  StructuredValue type_state = 2;
+
+  // The name of the TypeSpec class.
+  //  * If type_spec_class == REGISTERED_TYPE_SPEC, the TypeSpec class is
+  //    the one registered under this name. For types registered outside
+  //    core TensorFlow by an add-on library, that library must be loaded
+  //    before this value can be deserialized by nested_structure_coder.
+  //  * If type_spec_class specifies a particular TypeSpec class, this field is
+  //    redundant with the type_spec_class enum, and is only used for error
+  //    reporting in older binaries that do not know the tupe_spec_class enum.
+  string type_spec_class_name = 3;
+
+  // The number of flat tensor components required by this TypeSpec.
+  int32 num_flat_components = 4;
+}
diff --git a/src/frontends/tensorflow/src/proto/tensor_bundle.proto b/src/frontends/tensorflow/src/proto/tensor_bundle.proto
new file mode 100644
index 00000000000000..43fea749b42172
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/tensor_bundle.proto
@@ -0,0 +1,78 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "tensor_shape.proto";
+import "tensor_slice.proto";
+import "types.proto";
+import "versions.proto";
+
+option cc_enable_arenas = true;
+option java_outer_classname = "TensorBundleProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.util";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// Protos used in the tensor bundle module (tf/core/util/tensor_bundle/).
+
+// Special header that is associated with a bundle.
+//
+// TODO(zongheng,zhifengc): maybe in the future, we can add information about
+// which binary produced this checkpoint, timestamp, etc. Sometime, these can be
+// valuable debugging information. And if needed, these can be used as defensive
+// information ensuring reader (binary version) of the checkpoint and the writer
+// (binary version) must match within certain range, etc.
+message BundleHeaderProto {
+  // Number of data files in the bundle.
+  int32 num_shards = 1;
+
+  // An enum indicating the endianness of the platform that produced this
+  // bundle.  A bundle can only be read by a platform with matching endianness.
+  // Defaults to LITTLE, as most modern platforms are little-endian.
+  //
+  // Affects the binary tensor data bytes only, not the metadata in protobufs.
+  enum Endianness {
+    LITTLE = 0;
+    BIG = 1;
+  }
+  Endianness endianness = 2;
+
+  // Versioning of the tensor bundle format.
+  VersionDef version = 3;
+}
+
+// Describes the metadata related to a checkpointed tensor.
+message BundleEntryProto {
+  // The tensor dtype and shape.
+  DataType dtype = 1;
+  TensorShapeProto shape = 2;
+  // The binary content of the tensor lies in:
+  //   File "shard_id": bytes [offset, offset + size).
+  int32 shard_id = 3;
+  int64 offset = 4;
+  int64 size = 5;
+
+  // The CRC32C checksum of the tensor bytes.
+  fixed32 crc32c = 6;
+
+  // Iff present, this entry represents a partitioned tensor.  The previous
+  // fields are interpreted as follows:
+  //
+  //   "dtype", "shape": describe the full tensor.
+  //   "shard_id", "offset", "size", "crc32c": all IGNORED.
+  //      These information for each slice can be looked up in their own
+  //      BundleEntryProto, keyed by each "slice_name".
+  repeated TensorSliceProto slices = 7;
+}
diff --git a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto b/src/frontends/tensorflow/src/proto/trackable_object_graph.proto
new file mode 100644
index 00000000000000..f4a8e4da34f129
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/trackable_object_graph.proto
@@ -0,0 +1,92 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.*/
+// Modification Copyright (C) 2023 Intel Corporation
+
+syntax = "proto3";
+
+package tensorflow;
+
+import "wrappers.proto";
+
+option cc_enable_arenas = true;
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
+
+// A TensorBundle addition which saves extra information about the objects which
+// own variables, allowing for more robust checkpoint loading into modified
+// programs.
+
+message TrackableObjectGraph {
+  message TrackableObject {
+    message ObjectReference {
+      // An index into `TrackableObjectGraph.nodes`, indicating the object
+      // being referenced.
+      int32 node_id = 1;
+      // A user-provided name for the edge.
+      string local_name = 2;
+    }
+
+    message SerializedTensor {
+      // A name for the Tensor. Simple variables have only one
+      // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may
+      // be restored on object creation as an optimization.
+      string name = 1;
+      // The full name of the variable/tensor, if applicable. Used to allow
+      // name-based loading of checkpoints which were saved using an
+      // object-based API. Should match the checkpoint key which would have been
+      // assigned by tf.train.Saver.
+      string full_name = 2;
+      // The generated name of the Tensor in the checkpoint.
+      string checkpoint_key = 3;
+      // Deprecated bool field for optional restore. This field has never been
+      // set to True.
+      reserved "optional_restore";
+      reserved 4;
+    }
+
+    message SlotVariableReference {
+      // An index into `TrackableObjectGraph.nodes`, indicating the
+      // variable object this slot was created for.
+      int32 original_variable_node_id = 1;
+      // The name of the slot (e.g. "m"/"v").
+      string slot_name = 2;
+      // An index into `TrackableObjectGraph.nodes`, indicating the
+      // `Object` with the value of the slot variable.
+      int32 slot_variable_node_id = 3;
+    }
+
+    // Objects which this object depends on.
+    repeated ObjectReference children = 1;
+    // Serialized data specific to this object.
+    repeated SerializedTensor attributes = 2;
+    // Slot variables owned by this object.
+    repeated SlotVariableReference slot_variables = 3;
+
+    // The registered saver used to save this object. If this saver is not
+    // present when loading the checkpoint, then loading will fail.
+    RegisteredSaver registered_saver = 4;
+
+    // Whether this object has checkpoint values or descendants with checkpoint
+    // values. This is computed at save time to avoid traversing the entire
+    // object graph proto when restoring (which also has to traverse the live
+    // object graph).
+    google.protobuf.BoolValue has_checkpoint_values = 5;
+  }
+
+  repeated TrackableObject nodes = 1;
+}
+
+message RegisteredSaver {
+  // The name of the registered saver/restore function.
+  string name = 1;
+
+  // Unique auto-generated name of the object.
+  string object_name = 2;
+}
diff --git a/src/frontends/tensorflow/src/proto/wrappers.proto b/src/frontends/tensorflow/src/proto/wrappers.proto
new file mode 100644
index 00000000000000..228a92f0ef58f2
--- /dev/null
+++ b/src/frontends/tensorflow/src/proto/wrappers.proto
@@ -0,0 +1,124 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Modification Copyright (C) 2023 Intel Corporation
+
+// Wrappers for primitive (non-message) types. These types are useful
+// for embedding primitives in the `google.protobuf.Any` type and for places
+// where we need to distinguish between the absence of a primitive
+// typed field and its default value.
+//
+// These wrappers have no meaningful use within repeated fields as they lack
+// the ability to detect presence on individual elements.
+// These wrappers have no meaningful use within a map or a oneof since
+// individual entries of a map or fields of a oneof can already detect presence.
+
+syntax = "proto3";
+
+package google.protobuf;
+
+option csharp_namespace = "Google.Protobuf.WellKnownTypes";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/protobuf/types/known/wrapperspb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "WrappersProto";
+option java_multiple_files = true;
+option objc_class_prefix = "GPB";
+
+// Wrapper message for `double`.
+//
+// The JSON representation for `DoubleValue` is JSON number.
+message DoubleValue {
+  // The double value.
+  double value = 1;
+}
+
+// Wrapper message for `float`.
+//
+// The JSON representation for `FloatValue` is JSON number.
+message FloatValue {
+  // The float value.
+  float value = 1;
+}
+
+// Wrapper message for `int64`.
+//
+// The JSON representation for `Int64Value` is JSON string.
+message Int64Value {
+  // The int64 value.
+  int64 value = 1;
+}
+
+// Wrapper message for `uint64`.
+//
+// The JSON representation for `UInt64Value` is JSON string.
+message UInt64Value {
+  // The uint64 value.
+  uint64 value = 1;
+}
+
+// Wrapper message for `int32`.
+//
+// The JSON representation for `Int32Value` is JSON number.
+message Int32Value {
+  // The int32 value.
+  int32 value = 1;
+}
+
+// Wrapper message for `uint32`.
+//
+// The JSON representation for `UInt32Value` is JSON number.
+message UInt32Value {
+  // The uint32 value.
+  uint32 value = 1;
+}
+
+// Wrapper message for `bool`.
+//
+// The JSON representation for `BoolValue` is JSON `true` and `false`.
+message BoolValue {
+  // The bool value.
+  bool value = 1;
+}
+
+// Wrapper message for `string`.
+//
+// The JSON representation for `StringValue` is JSON string.
+message StringValue {
+  // The string value.
+  string value = 1;
+}
+
+// Wrapper message for `bytes`.
+//
+// The JSON representation for `BytesValue` is JSON string.
+message BytesValue {
+  // The bytes value.
+  bytes value = 1;
+}
diff --git a/src/frontends/tensorflow/src/saved_model.cpp b/src/frontends/tensorflow/src/saved_model.cpp
new file mode 100644
index 00000000000000..797c8c2985a52e
--- /dev/null
+++ b/src/frontends/tensorflow/src/saved_model.cpp
@@ -0,0 +1,482 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <stdlib.h>
+
+#include <fstream>
+#include <string>
+
+#include "graph_iterator_saved_model.hpp"
+#include "openvino/core/type/element_type.hpp"
+#include "tensor_bundle.pb.h"
+#include "trackable_object_graph.pb.h"
+
+#ifdef ENABLE_SNAPPY_COMPRESSION
+#    include "snappy.h"
+#endif
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+template <typename T>
+static T smReadFixed(const char* ptr) {
+    T result = 0;
+    for (uint8_t i = 0; i < sizeof(T); ++i) {
+        result |= ptr[i] << (i * 8);
+    }
+    return result;
+}
+
+template <typename T>
+static T smUnpack(char*& ptr, const char* ptr_end) {
+    T result = 0;
+    for (uint8_t i = 0; i < sizeof(T) * 7 && ptr < ptr_end; i += 7) {
+        T byte = *(ptr++);
+        if (byte & 0x80) {
+            result |= ((byte & 0x7F) << i);
+        } else {
+            result |= byte << i;
+            return result;
+        }
+    }
+    return 0;
+}
+
+struct VIBlock {
+    uint64_t m_size;
+    uint64_t m_offset;
+
+    void read(char*& ptr, const char* ptr_end) {
+        m_offset = smUnpack<uint64_t>(ptr, ptr_end);
+        m_size = smUnpack<uint64_t>(ptr, ptr_end);
+    }
+};
+
+struct VIFooter {
+    VIBlock m_metaIndex;
+    VIBlock m_index;
+
+    void read(char*& ptr, const char* ptr_end) {
+        m_index.read(ptr, ptr_end);
+        m_metaIndex.read(ptr, ptr_end);
+    }
+
+    void read(std::ifstream& fs) {
+        fs.seekg(0, std::ios::end);
+        size_t size = fs.tellg();
+        char footerData[48] = {}, *ptr = &footerData[0];
+        fs.seekg(size - sizeof(footerData));
+        fs.read(ptr, sizeof(footerData));
+
+        // https://github.com/tensorflow/tensorflow/blob/9659b7bdca80a8ef8240eb021d4da089034eeb00/tensorflow/tsl/lib/io/format.cc#L59
+        ptr += sizeof(footerData) - 8;
+        uint32_t magic_lo = *reinterpret_cast<const uint32_t*>(ptr);
+        uint32_t magic_hi = *reinterpret_cast<const uint32_t*>(ptr + 4);
+        uint64_t magic_no = (static_cast<uint64_t>(magic_hi) << 32) | static_cast<uint64_t>(magic_lo);
+
+        FRONT_END_GENERAL_CHECK(magic_no == 0xdb4775248b80fb57ull, "Wrong index file, magic number mismatch detected");
+
+        ptr = &footerData[0];
+        m_metaIndex.read(ptr, ptr + sizeof(footerData));
+        m_index.read(ptr, ptr + sizeof(footerData));
+    }
+};
+
+void SavedModelVariablesIndex::read_variables_index_block(std::ifstream& fs,
+                                                          const VIBlock& index,
+                                                          std::vector<char>& data,
+                                                          uint32_t& offset,
+                                                          uint32_t& offset_end) {
+    size_t block_size = index.m_size;
+    data.clear();
+    data.resize(block_size + 5 /*kBlockTrailerSize*/);
+    fs.seekg(index.m_offset, std::ios::beg);
+    fs.read(data.data(), data.size());
+#ifndef ENABLE_SNAPPY_COMPRESSION
+    FRONT_END_GENERAL_CHECK(data[block_size] == 0, "Compressed files aren't supported");
+#else
+    FRONT_END_GENERAL_CHECK(data[block_size] == 0 || data[block_size] == 1, "Compression method isn't supported");
+    if (data[block_size] == 1) {
+        size_t uncompressed_length = 0;
+        FRONT_END_GENERAL_CHECK(snappy::GetUncompressedLength(data.data(), data.size(), &uncompressed_length),
+                                "Cannot retrieve uncompressed block length");
+        std::string uncompressed_string;
+        uncompressed_string.reserve(uncompressed_length);
+        snappy::Uncompress(data.data(), data.size(), &uncompressed_string);
+        data.resize(uncompressed_length);
+        std::copy(uncompressed_string.begin(), uncompressed_string.end(), data.begin());
+        block_size = uncompressed_length;
+    }
+#endif
+    uint32_t numRestarts = smReadFixed<uint32_t>(data.data() + block_size - sizeof(uint32_t));
+    size_t maxRestarts = (block_size - sizeof(uint32_t)) / sizeof(uint32_t);
+    FRONT_END_GENERAL_CHECK(maxRestarts >= numRestarts, "Wrong restarts value");
+    offset_end = static_cast<uint32_t>(block_size) - ((numRestarts + 1) * sizeof(uint32_t));
+    offset = smReadFixed<uint32_t>(data.data() + offset_end);
+}
+
+void SavedModelVariablesIndex::read_variables_index_pair(char*& ptr,
+                                                         const char* ptr_end,
+                                                         std::string& key,
+                                                         char*& value,
+                                                         uint32_t& val_length) {
+    uint32_t shared, nonShared;
+    shared = smUnpack<uint32_t>(ptr, ptr_end);
+    nonShared = smUnpack<uint32_t>(ptr, ptr_end);
+    val_length = smUnpack<uint32_t>(ptr, ptr_end);
+
+    // Key inherits last part of string (shared-size bytes) and appends new string
+    // shared_part_key1 //resize(0) + append(shared_part_key1)
+    // ............key2 //resize(12) + append(key2)
+    // ............key3 //resize(12) + append(key3)
+    // new_shared_key4 //resize(0) + append(new_shared_key4)
+    // ...........key5 //resize(11) + append(key5)
+    key.resize(shared);
+    key.append(ptr, nonShared);
+
+    value = ptr + nonShared;
+    ptr = value + val_length;
+}
+
+void SavedModelVariablesIndex::read_variables_index(std::ifstream& fs,
+                                                    std::map<std::string, std::vector<char>>& varIndex) {
+    VIFooter footer;
+
+    footer.read(fs);
+
+    std::vector<VIBlock> secondLevel;
+    std::vector<char> blockData;
+
+    uint32_t offset = 0, offset_end = 0;
+
+    read_variables_index_block(fs, footer.m_index, blockData, offset, offset_end);
+    char *ptr = blockData.data() + offset, *ptr_end = blockData.data() + offset_end, *value = nullptr;
+    std::string key = "";
+    uint32_t valLength;
+
+    while (ptr < ptr_end) {
+        read_variables_index_pair(ptr, ptr_end, key, value, valLength);
+
+        VIBlock valBlock;
+        valBlock.read(value, value + valLength);
+        secondLevel.push_back(valBlock);
+        ptr = value + valLength;
+    }
+
+    for (auto& block : secondLevel) {
+        read_variables_index_block(fs, block, blockData, offset, offset_end);
+
+        key = "";
+        ptr = blockData.data() + offset;
+        ptr_end = blockData.data() + offset_end;
+        while (ptr < ptr_end) {
+            read_variables_index_pair(ptr, ptr_end, key, value, valLength);
+            varIndex[key] = std::vector<char>(value, value + valLength);
+        }
+    }
+}
+
+void SavedModelVariablesIndex::read_bundle_header() {
+    auto item = m_variables_index.find("");
+    FRONT_END_GENERAL_CHECK(item != m_variables_index.end(), "Bundle Header isn't found in index");
+
+    ::tensorflow::BundleHeaderProto bundleHeader;
+    FRONT_END_GENERAL_CHECK(bundleHeader.ParseFromString(item->second.data()),
+                            "Bundle Header: Cannot parse Bundle Header");
+    FRONT_END_GENERAL_CHECK(bundleHeader.version().producer() == 1, "Bundle Header: Unsupported producer version");
+    FRONT_END_GENERAL_CHECK(bundleHeader.version().min_consumer() == 0, "Bundle Header: Unsupported consumer version");
+    FRONT_END_GENERAL_CHECK(bundleHeader.endianness() == 0, "Bundle Header: BIG endian isn't supported");
+
+    m_total_shards = bundleHeader.num_shards();
+}
+
+void SavedModelVariablesIndex::read_checkpointable_object_graph() {
+    m_variables_map.clear();
+
+    auto item = m_variables_index.find("_CHECKPOINTABLE_OBJECT_GRAPH");
+    FRONT_END_GENERAL_CHECK(item != m_variables_index.end(), "Checkpointable Object Graph isn't found in index");
+
+    ::tensorflow::BundleEntryProto entry;
+    FRONT_END_GENERAL_CHECK(entry.ParseFromArray(item->second.data(), static_cast<int>(item->second.size())),
+                            "CMO: Cannot parse Bundle Entry");
+
+    FRONT_END_GENERAL_CHECK(entry.slices().empty(), "CMO: Slices are not supported");
+
+    auto shard = m_data_files.find(entry.shard_id());
+    FRONT_END_GENERAL_CHECK(shard != m_data_files.end(), "CMO: data files isn't found");
+
+    std::vector<char> data(entry.size());
+    ::tensorflow::TrackableObjectGraph tog;
+
+    // TODO: have to understand this offset
+    // It looks like reinterpret_cast artifact
+    // https://github.com/tensorflow/tensorflow/blob/d90f1947ebcf510b23c238f43c2191e5b3817cb3/tensorflow/cc/experimental/libexport/load.cc#L70
+    int chg = 6;
+    shard->second->seekg(entry.offset() + chg);
+    shard->second->read(data.data(), entry.size() - chg);
+
+    // Might be need to remove this verification:
+    // https://github.com/tensorflow/tensorflow/blob/d90f1947ebcf510b23c238f43c2191e5b3817cb3/tensorflow/cc/experimental/libexport/load.cc#L73
+    // FRONT_END_GENERAL_CHECK(tog.ParseFromArray(data.data(), static_cast<int>(data.size()) - chg), "CMO: Trackable
+    // Object Graph couldn't be read");
+
+    tog.ParseFromArray(data.data(), static_cast<int>(data.size()) - chg);
+
+    for (const auto& node : tog.nodes()) {
+        for (const auto& attr : node.attributes()) {
+            m_variables_map[attr.full_name()] = attr.checkpoint_key();
+        }
+    }
+}
+
+bool GraphIteratorSavedModel::is_valid_signature(const ::tensorflow::SignatureDef& signature) const {
+    const std::map<::tensorflow::DataType, ov::element::Type> types{
+        {::tensorflow::DataType::DT_BOOL, ov::element::boolean},
+        {::tensorflow::DataType::DT_INT16, ov::element::i16},
+        {::tensorflow::DataType::DT_INT32, ov::element::i32},
+        {::tensorflow::DataType::DT_INT64, ov::element::i64},
+        {::tensorflow::DataType::DT_HALF, ov::element::f16},
+        {::tensorflow::DataType::DT_FLOAT, ov::element::f32},
+        {::tensorflow::DataType::DT_DOUBLE, ov::element::f64},
+        {::tensorflow::DataType::DT_UINT8, ov::element::u8},
+        {::tensorflow::DataType::DT_INT8, ov::element::i8},
+        {::tensorflow::DataType::DT_BFLOAT16, ov::element::bf16},
+        {::tensorflow::DataType::DT_STRING, ov::element::undefined}};
+
+    for (const auto& it : signature.inputs()) {
+        if (it.second.name().empty() || types.find(it.second.dtype()) == types.end())
+            return false;
+    }
+    for (const auto& it : signature.outputs()) {
+        if (it.second.name().empty() || types.find(it.second.dtype()) == types.end())
+            return false;
+    }
+    return true;
+}
+
+bool SavedModelVariablesIndex::read_variables(std::ifstream& vi_stream, const std::string& path) {
+    m_variables_index.clear();
+    read_variables_index(vi_stream, m_variables_index);
+    read_bundle_header();
+
+    std::vector<char> suffix(20);
+    for (int32_t shard = 0; shard < m_total_shards; ++shard) {
+        std::snprintf(suffix.data(), suffix.size(), "data-%05d-of-%05d", shard, m_total_shards);
+        std::string fullPath = ov::util::path_join({path, "variables", std::string("variables.") + suffix.data()});
+        m_data_files[shard] =
+            std::shared_ptr<std::ifstream>(new std::ifstream(fullPath, std::ifstream::in | std::ifstream::binary));
+        FRONT_END_GENERAL_CHECK(m_data_files[shard]->is_open(), "Saved Model's variable index file does not exist");
+    }
+
+    read_checkpointable_object_graph();
+    return true;
+}
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+bool SavedModelVariablesIndex::read_variables(std::ifstream& vi_stream, const std::wstring& path) {
+    m_variables_index.clear();
+    read_variables_index(vi_stream, m_variables_index);
+    read_bundle_header();
+
+    std::vector<wchar_t> suffix(20);
+    for (int32_t shard = 0; shard < m_total_shards; ++shard) {
+        swprintf_s(suffix.data(), suffix.size(), L"data-%05d-of-%05d", shard, m_total_shards);
+        std::wstring fullPath =
+            ov::util::path_join_w({path, L"variables", std::wstring(L"variables.") + suffix.data()});
+        m_data_files[shard] =
+            std::shared_ptr<std::ifstream>(new std::ifstream(fullPath, std::ifstream::in | std::ifstream::binary));
+        FRONT_END_GENERAL_CHECK(m_data_files[shard]->is_open(), "Saved Model's variable index file does not exist");
+    }
+
+    read_checkpointable_object_graph();
+    return true;
+}
+#endif
+
+struct PtrNode {
+    const ::tensorflow::NodeDef* node;
+    std::vector<PtrNode*> inputs;
+    std::vector<PtrNode*> outputs;
+
+    PtrNode() : node(nullptr), inputs(), outputs() {}
+
+    PtrNode(const ::tensorflow::NodeDef& src_node, const std::map<std::string, PtrNode*>& node_dictionary) {
+        node = &src_node;
+        std::vector<std::string> parsedName;
+        for (const auto& input_name : node->input()) {
+            parse_node_name(input_name, parsedName);
+
+            auto input_node = node_dictionary.find(parsedName[0]);
+            if (input_node == node_dictionary.end()) {
+                continue;
+            }
+
+            input_node->second->outputs.push_back(this);
+            inputs.push_back(input_node->second);
+        }
+    }
+
+    void find_parent_by_op(const std::string& op, std::vector<PtrNode*>& result) const {
+        for (auto input : inputs) {
+            if (input->op() == op) {
+                result.push_back(input);
+            }
+            input->find_parent_by_op(op, result);
+        }
+    }
+
+    static void parse_node_name(const std::string& name, std::vector<std::string>& result) {
+        result.clear();
+        size_t left_pos = name.find_first_of('^'), right_pos = name.find(':');
+        if (left_pos != std::string::npos && left_pos < right_pos) {
+            ++left_pos;
+        } else {
+            left_pos = 0;
+        }
+        while (right_pos != std::string::npos && right_pos > left_pos) {
+            result.push_back(name.substr(left_pos, right_pos - left_pos));
+            left_pos = right_pos + 1;
+            right_pos = name.find(':', left_pos);
+        }
+        result.push_back(name.substr(left_pos, name.length() - left_pos));
+    }
+
+    const std::string& op() const {
+        return node->op();
+    }
+};
+
+static void read_stateful_partitioned_call(const std::shared_ptr<::tensorflow::GraphDef> graph_def,
+                                           const ::tensorflow::NodeDef& partCall,
+                                           std::map<std::string, PtrNode*>& node_dictionary) {
+    FRONT_END_GENERAL_CHECK(partCall.op() == "StatefulPartitionedCall", "Passed node isn't StatefulPartitionedCall");
+
+    std::string func_name = partCall.attr().at("f").func().name();
+
+    const ::tensorflow::FunctionDef* func_def = nullptr;
+    for (const auto& func : graph_def->library().function()) {
+        if (func.signature().name() == func_name) {
+            func_def = &func;
+            break;
+        }
+    }
+
+    FRONT_END_GENERAL_CHECK(func_def, "Function isn't found in the library");
+    FRONT_END_GENERAL_CHECK(graph_def->has_library(), "GraphDef contains functions, but doesn't have the library");
+
+    std::map<std::string, PtrNode*> nodes;
+
+    // Filling temporary input nodes for exact function
+    for (int i = 0; i < func_def->signature().input_arg_size(); ++i) {
+        const auto& input_arg = func_def->signature().input_arg(i).name();
+        const auto& parent_input = partCall.input(i);
+        auto input_node = node_dictionary.find(parent_input);
+        if (input_node != node_dictionary.end()) {
+            nodes[input_arg] = input_node->second;
+        }
+    }
+
+    // Parsing nodes and inline partitioned calls
+    for (const auto& node : func_def->node_def()) {
+        nodes[node.name()] = new PtrNode(node, nodes);
+
+        if (node.op() == "StatefulPartitionedCall") {
+            read_stateful_partitioned_call(graph_def, node, nodes);
+        }
+    }
+
+    // Removing temporary input nodes
+    for (int i = 0; i < func_def->signature().input_arg_size(); ++i) {
+        const auto& input_arg = func_def->signature().input_arg(i).name();
+        auto input_node = nodes.find(input_arg);
+        if (input_node != nodes.end()) {
+            nodes.erase(input_node);
+        }
+    }
+
+    // Moving nodes to the global dictionary
+    for (const auto& node : nodes) {
+        std::string global_name = partCall.name() + "/" + node.first;
+        node_dictionary[global_name] = node.second;
+    }
+}
+
+void GraphIteratorSavedModel::map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def,
+                                                 std::map<std::string, std::string>& variables_map) const {
+    std::map<std::string, PtrNode*> nodes;
+
+    for (const auto& node : graph_def->node()) {
+        nodes[node.name()] = new PtrNode(node, nodes);
+
+        if (node.op() == "StatefulPartitionedCall") {
+            read_stateful_partitioned_call(graph_def, node, nodes);
+        }
+    }
+
+    for (const auto& node : nodes) {
+        if (node.second->op() != "AssignVariableOp") {
+            continue;
+        }
+
+        // TODO: assets reading
+
+        std::vector<PtrNode*> restorev2_nodes;
+        std::vector<PtrNode*> varhandle_nodes;
+
+        node.second->find_parent_by_op("RestoreV2", restorev2_nodes);
+        node.second->find_parent_by_op("VarHandleOp", varhandle_nodes);
+
+        FRONT_END_GENERAL_CHECK(restorev2_nodes.size() == 1, "Found unexpected amount of RestoreV2 nodes");
+        FRONT_END_GENERAL_CHECK(varhandle_nodes.size() == 1, "Found unexpected amount of VarHandleOp nodes");
+
+        std::vector<std::string> restore_output;
+        // Expected path is: RestoreV2 -(output_index)-(0)-> Identity -(0)-(1)-> AssignVariableOp
+        PtrNode::parse_node_name(node.second->inputs[1]->node->input(0), restore_output);
+
+        int output_index = std::atoi(restore_output[restore_output.size() - 1].c_str());
+
+        // Expected path is: Const(tensor_names) -(0)-(1)-> RestoreV2
+        const auto& variable_name =
+            restorev2_nodes[0]->inputs[1]->node->attr().at("value").tensor().string_val(output_index);
+
+        variables_map[varhandle_nodes[0]->node->name()] = variable_name;
+    }
+
+    nodes.clear();
+}
+
+bool GraphIteratorSavedModel::is_supported(const std::string& path) {
+    return ov::util::directory_exists(path) && ov::util::file_exists(ov::util::path_join({path, "saved_model.pb"}));
+}
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+bool GraphIteratorSavedModel::is_supported(const std::wstring& path) {
+    return ov::util::directory_exists(path) && ov::util::file_exists(ov::util::path_join_w({path, L"saved_model.pb"}));
+}
+#endif
+
+template <>
+std::basic_string<char> get_saved_model_name<char>() {
+    return "/saved_model.pb";
+}
+template <>
+std::basic_string<char> get_variables_index_name<char>() {
+    return "/variables/variables.index";
+}
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+template <>
+std::basic_string<wchar_t> get_saved_model_name<wchar_t>() {
+    return L"/saved_model.pb";
+}
+template <>
+std::basic_string<wchar_t> get_variables_index_name<wchar_t>() {
+    return L"/variables/variables.index";
+}
+#endif
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp
index d5d31d86e8c32c..2bb70b5b3baea3 100644
--- a/src/frontends/tensorflow/src/translate_session.cpp
+++ b/src/frontends/tensorflow/src/translate_session.cpp
@@ -36,6 +36,25 @@ std::vector<T> reorder_ops_by_names(const std::vector<std::string>& names, const
     }
     return resulted_ops;
 };
+
+/// \brief Adds known input names from Saved Model file format
+/// \param[in] node Node which should be updated
+/// \param[in] saved_model_names Map of names from saved model
+/// \returns True if node was updated, false otherwise
+static bool apply_saved_model_names(std::shared_ptr<ov::Node> node,
+                                    const std::shared_ptr<std::map<std::string, std::string>>& saved_model_names) {
+    for (size_t i = 0; i < node->get_output_size(); ++i) {
+        const auto& node_names = node->get_output_tensor(i).get_names();
+        for (const auto& name : node_names) {
+            const auto& saved_model_name = saved_model_names->find(name);
+            if (saved_model_name != saved_model_names->end()) {
+                node->set_friendly_name(saved_model_name->second);
+                return true;
+            }
+        }
+    }
+    return false;
+}
 }  // namespace
 
 TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model,
@@ -94,6 +113,8 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
     const auto& model_inputs = model_tf->get_inputs();
     const auto& model_outputs = model_tf->get_outputs();
     const auto& model_frozen_inputs = model_tf->get_tensor_values();
+    const auto& saved_model_inputs = model_tf->get_saved_model_input_names();
+    const auto& saved_model_outputs = model_tf->get_saved_model_output_names();
 
     // fill ng_op_map with Constant outputs for frozen inputs
     for (const auto& frozen_input : model_frozen_inputs) {
@@ -123,6 +144,11 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
 
         auto param = std::make_shared<ov::opset8::Parameter>(input_type, input_shape);
         set_node_name(input_name, param);
+        if (saved_model_inputs.get() && saved_model_inputs->size() > 0) {
+            if (!apply_saved_model_names(param, saved_model_inputs)) {
+                param->get_output_tensor(0).add_names({"saved_model_unused"});
+            }
+        }
         params.push_back(param);
         ng_op_map[input_name] = {param};
     }
@@ -273,10 +299,30 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
             if (port_type == "none") {
                 for (const auto& node_output : ng_op_map[operation_name]) {
                     auto result_node = std::make_shared<ov::opset8::Result>(node_output);
-                    // to be aligned with Legacy Frontend we set a name along with output port index
-                    // though, the Result name is not used in the OV API 2.0 but it is checked in MO args tests
-                    result_node->set_friendly_name(model_output_name + ":0");
-                    results.push_back(result_node);
+                    // Customize output name in case we have mapping from Saved Model format
+                    if (saved_model_outputs.get() && saved_model_outputs->size() > 0) {
+                        bool isUsed = true;
+                        for (const auto& name : model_output_tensor_place->get_names()) {
+                            auto saved_model_name = saved_model_outputs->find(name);
+                            if (saved_model_name == saved_model_outputs->end()) {
+                                saved_model_name = saved_model_outputs->find(name + ":0");
+                            }
+                            if (saved_model_name != saved_model_outputs->end()) {
+                                result_node->set_friendly_name(saved_model_name->second);
+                                results.push_back(result_node);
+                                isUsed = false;
+                                break;
+                            }
+                            if (!isUsed) {
+                                result_node->get_input_tensor(0).add_names({"saved_model_unused"});
+                            }
+                        }
+                    } else {
+                        // to be aligned with Legacy Frontend we set a name along with output port index
+                        // though, the Result name is not used in the OV API 2.0 but it is checked in MO args tests
+                        result_node->set_friendly_name(model_output_name + ":0");
+                        results.push_back(result_node);
+                    }
                 }
             } else if (port_type == "out") {
                 const auto& node_outputs = ng_op_map[operation_name];
diff --git a/src/frontends/tensorflow/src/translate_session.hpp b/src/frontends/tensorflow/src/translate_session.hpp
index 86f5d5cd973e35..9da2150eb31d65 100644
--- a/src/frontends/tensorflow/src/translate_session.hpp
+++ b/src/frontends/tensorflow/src/translate_session.hpp
@@ -31,6 +31,10 @@ class TranslateSession {
 
     std::shared_ptr<ov::Model> get_body_ov_model(const std::string& body_graph_name);
 
+    ov::frontend::InputModel::Ptr get_input_model(void) const {
+        return m_input_model;
+    }
+
 private:
     const ov::frontend::InputModel::Ptr m_input_model;
     const std::shared_ptr<TranslatorDictionaryType> m_translator_map;
diff --git a/src/frontends/tensorflow_common/include/helper_ops/string_constant.hpp b/src/frontends/tensorflow_common/include/helper_ops/string_constant.hpp
new file mode 100644
index 00000000000000..40d4a4ce306d7c
--- /dev/null
+++ b/src/frontends/tensorflow_common/include/helper_ops/string_constant.hpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "internal_operation.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+/// Pseudo-entity for storing strings
+class StringConstant : public InternalOperation {
+public:
+    OPENVINO_OP("StringConstant", "ov::frontend::tensorflow::util", InternalOperation);
+
+    StringConstant(ov::Any data, const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, {}, 1),
+          m_data(data) {
+        validate_and_infer_types();
+    }
+
+    StringConstant(std::string& str, const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, {}, 1),
+          m_data({str}) {
+        validate_and_infer_types();
+    }
+
+    StringConstant(const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, {}, 1) {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        set_output_type(0, ov::element::dynamic, ov::PartialShape::dynamic());
+    }
+
+    ov::Any get_data() {
+        return m_data;
+    }
+
+    std::string& get_string() {
+        return m_data.as<std::vector<std::string>>()[0];
+    }
+
+private:
+    ov::Any m_data;
+    ov::Shape m_shape;
+};
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/include/helper_transforms/saved_model_unused_remover.hpp b/src/frontends/tensorflow_common/include/helper_transforms/saved_model_unused_remover.hpp
new file mode 100644
index 00000000000000..b86ddf31ce57db
--- /dev/null
+++ b/src/frontends/tensorflow_common/include/helper_transforms/saved_model_unused_remover.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace pass {
+
+// This transformation removes isolated subgraph unused Parameters and
+// Results marked as unused by Saved Model settings
+class SavedModelUnusedRemover : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("ov::frontend::tensorflow::pass::SavedModelUnusedRemover");
+    SavedModelUnusedRemover() {}
+
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+};
+
+}  // namespace pass
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp
index 1963bcf47dae22..04dd756b3e320c 100644
--- a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp
+++ b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp
@@ -4,6 +4,7 @@
 
 #include "helper_transforms/const_to_result_remover.hpp"
 
+#include "helper_ops/string_constant.hpp"
 #include "helper_ops/unsupported_constant.hpp"
 #include "openvino/opsets/opset10.hpp"
 
@@ -22,7 +23,8 @@ bool ConstToResultRemover::run_on_model(const std::shared_ptr<ov::Model>& m) {
     for (const auto& result : m->get_results()) {
         auto unsupported_const = as_type_ptr<UnsupportedConstant>(result->get_input_node_shared_ptr(0));
         auto const_node = as_type_ptr<Constant>(result->get_input_node_shared_ptr(0));
-        if (unsupported_const || const_node) {
+        auto string_const = as_type_ptr<StringConstant>(result->get_input_node_shared_ptr(0));
+        if (unsupported_const || const_node || string_const) {
             results_to_remove.push_back(result);
         }
     }
diff --git a/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp b/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp
new file mode 100644
index 00000000000000..a7e21c670e782a
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "helper_transforms/saved_model_unused_remover.hpp"
+
+#include "openvino/opsets/opset8.hpp"
+
+using namespace std;
+using namespace ov::opset8;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace pass {
+
+bool SavedModelUnusedRemover::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    ParameterVector params_to_remove;
+    ResultVector results_to_remove;
+
+    // There is two cases
+    // 1. When we found unused result with/without unused parameter
+    // 2. When we found unused parameter
+
+    for (const auto& result : m->get_results()) {
+        bool isUsed = false;
+        for (size_t i = 0; i < result->get_input_size(); ++i) {
+            const auto& node_names = result->get_input_tensor(i).get_names();
+            isUsed = std::find(node_names.begin(), node_names.end(), "saved_model_unused") == node_names.end();
+        }
+        if (!isUsed) {
+            results_to_remove.push_back(result);
+            continue;
+        }
+
+        auto param = as_type_ptr<Parameter>(result->get_input_node_shared_ptr(0));
+        if (param) {
+            for (size_t i = 0; i < param->get_output_size(); ++i) {
+                const auto& node_names = param->get_output_tensor(i).get_names();
+                isUsed = std::find(node_names.begin(), node_names.end(), "saved_model_unused") == node_names.end();
+            }
+            if (!isUsed) {
+                results_to_remove.push_back(result);
+                params_to_remove.push_back(param);
+            }
+        }
+    }
+
+    for (const auto& param : m->get_parameters()) {
+        bool isUsed = false;
+        for (size_t i = 0; i < param->get_output_size(); ++i) {
+            const auto& node_names = param->get_output_tensor(i).get_names();
+            isUsed = std::find(node_names.begin(), node_names.end(), "saved_model_unused") == node_names.end();
+        }
+        if (!isUsed && std::find(params_to_remove.begin(), params_to_remove.end(), param) == params_to_remove.end()) {
+            params_to_remove.push_back(param);
+        }
+    }
+
+    for (const auto& result : results_to_remove) {
+        m->remove_result(result);
+    }
+
+    for (const auto& param : params_to_remove) {
+        m->remove_parameter(param);
+    }
+
+    return true;
+}
+
+}  // namespace pass
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/op/const.cpp b/src/frontends/tensorflow_common/src/op/const.cpp
index 292b9e0190a42d..956d2f1c0bcbb8 100644
--- a/src/frontends/tensorflow_common/src/op/const.cpp
+++ b/src/frontends/tensorflow_common/src/op/const.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/string_constant.hpp"
 #include "helper_ops/unsupported_constant.hpp"
 #include "openvino/opsets/opset8.hpp"
 
@@ -16,10 +17,15 @@ namespace tensorflow {
 namespace op {
 
 OutputVector translate_const_op(const NodeContext& node) {
-    auto ov_type = node.get_attribute<element::Type>("dtype");
+    auto ov_type = node.get_attribute_as_any("dtype");
     std::shared_ptr<Node> const_node;
-    if (ov_type == element::dynamic) {
-        const_node = std::make_shared<UnsupportedConstant>();
+    if (!ov_type.is<ov::element::Type>() || ov_type.as<ov::element::Type>() == ov::element::dynamic ||
+        ov_type.as<ov::element::Type>() == ov::element::undefined) {
+        if (ov_type.is<std::string>() && ov_type.as<std::string>() == "DT_STRING") {
+            const_node = std::make_shared<StringConstant>(node.get_attribute_as_any("value"));
+        } else {
+            const_node = std::make_shared<UnsupportedConstant>();
+        }
     } else {
         auto tensor = node.get_attribute<Tensor>("value");
         const_node = std::make_shared<Constant>(tensor.get_element_type(), tensor.get_shape(), tensor.data());
diff --git a/src/frontends/tensorflow_common/src/op/identity.cpp b/src/frontends/tensorflow_common/src/op/identity.cpp
index 7bd6d7735e37fb..a2b36f1f1a6083 100644
--- a/src/frontends/tensorflow_common/src/op/identity.cpp
+++ b/src/frontends/tensorflow_common/src/op/identity.cpp
@@ -14,7 +14,13 @@ namespace tensorflow {
 namespace op {
 
 OutputVector translate_identity_op(const NodeContext& node) {
-    vector<string> supported_ops = {"Identity", "PreventGradient", "Snapshot", "StopGradient"};
+    vector<string> supported_ops = {"Identity",
+                                    "PreventGradient",
+                                    "Snapshot",
+                                    "StopGradient",
+                                    "ReadVariableOp",
+                                    "ShardedFilename",
+                                    "MergeV2Checkpoints"};
     default_op_checks(node, 1, supported_ops);
     auto input = node.get_input(0);
 
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index b2c7d97959bb95..c95826acddc4a3 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -416,6 +416,32 @@ if(ENABLE_OV_TF_LITE_FRONTEND)
     set(flatbuffers_DEPENDENCY ${flatbuffers_DEPENDENCY} PARENT_SCOPE)
 endif()
 
+#
+# Snappy Compression
+#
+if(ENABLE_SNAPPY_COMPRESSION)
+    function(tf_build_snappy)
+        set(BUILD_SHARED_LIBS OFF)
+        set(SNAPPY_BUILD_BENCHMARKS OFF)
+        set(SNAPPY_BUILD_TESTS OFF)
+        set(INSTALL_GTEST OFF)
+        set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
+        set(CMAKE_CXX_STANDARD 14)
+        if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+            # Removes 3rd party errors which may affect OpenVINO CI
+            if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+                if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror=return-type")
+                    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type")
+                endif()
+            endif()
+        endif()
+        add_subdirectory(snappy EXCLUDE_FROM_ALL)
+    endfunction()
+
+    tf_build_snappy()
+    ov_install_static_lib(snappy ${OV_CPACK_COMP_CORE})
+endif()
+
 #
 # ONNX
 #
diff --git a/thirdparty/snappy b/thirdparty/snappy
new file mode 160000
index 00000000000000..dc05e026488865
--- /dev/null
+++ b/thirdparty/snappy
@@ -0,0 +1 @@
+Subproject commit dc05e026488865bc69313a68bcc03ef2e4ea8e83

From 5a8a195dade533ea6d11e9b969f258c31add7dd2 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Fri, 24 Mar 2023 17:01:15 +0400
Subject: [PATCH 084/296] TransposeSinking: add support for Slice and Reshape
 ops (#16208)

* Resolve the performance issues in TransposeSinking transformation

* codestyle

* fix warning as error, fix tests failures

* fix ts for Concat and Reduce

* Fix TransposeReduceBackward

* fix the issue in TransposeFuse transformation

* fix TransposeReduce transformations

* Fix TransposeReduction, fix TransposeSinkingSplit, add unsqueeze support

* delete debug print

* Add additional validations

* fix node validation

* Fix validate for split, revert changes for concat, add BatchToSpace/SpaceToBatch

* Add SpaceToBatch/BatchToSpace

* fix TS for Interpolate + codestyle

* fix gna build

* Support TS for Interpolate, VariadicSplit, IsInf, IsNan, IsFinite + refactoring

* add the missed line

* add include

* TransposeSinking tests refactoring: part1

* TransposeSinking tests refactoring: part2

* Add limited support for StridedSlice op

* codestye

* TransposeReduction: skip the case when 2nd input for Squeeze is not provided

* Transpose sinking tests refactoring: part 3. + Revert changes in MOC.

* fix build

* codestyle

* Add tests for TS backward transformations, update TransposeSinkingFuse transformation, delete StridedSlice transformation prototype + tests refactoring

* fix unary tests

* Fix warning as error on Windows

* Add new tests for Unsqueeze/Squeeze; refactoring; remove debug code

* TransposeSinking: add support for Slice op

* Add descriptions to the transformations, add additional checks

* fix a warning

* TransposeSinking Rafactoring part2: move the transformations to a separate folder, align namespaces

* TransposeSinking refactoring: class names, namespaces

* codestyle

* resolve merge conflicts

* codestyle

* TSReduction refactoring, move Unsqueeze/Squeeze transformations to separate files, added limited support for Reshape op + tests

* fix minor mistakes

* fix warnings

* Added TSSlice transformation to TSGeneral, created TransposeSinkingGeneral alias in ov::pass namespace

* refactoring

* codestyle

* fix TSSqueeze/TSUnsqueeze transformations

* delete debug serialize

* remove TransposeSinking from MOC

* fix TSSqueeze/TSUnsqueeze transformations in case of Reshape op

* delete debug code

* codestyle

* fix unit tests, revert changes for TSSlice transformation

* fix TSSqueeze transformation

* resolve review comments

* codestyle
---
 .../transpose_sinking/ts_general.hpp          |   3 +
 .../transpose_sinking/ts_reduction.hpp        |   4 +-
 .../transpose_sinking/ts_slice.hpp            |  32 ++
 .../transpose_sinking/ts_squeeze.hpp          |  42 +++
 .../transpose_sinking/ts_unsqueeze.hpp        |  42 +++
 .../transpose_sinking/ts_utils.hpp            |  30 +-
 .../moc_transformations.cpp                   |   1 -
 .../transpose_sinking/ts_general.cpp          |   9 +
 .../transpose_sinking/ts_reduction.cpp        | 206 +++----------
 .../transpose_sinking/ts_slice.cpp            | 116 +++++++
 .../transpose_sinking/ts_squeeze.cpp          | 271 ++++++++++++++++
 .../transpose_sinking/ts_unsqueeze.cpp        | 243 +++++++++++++++
 .../transpose_sinking/ts_utils.cpp            |  47 +++
 .../transpose_sinking/ts_common_test.cpp      | 289 +++++++++++++++++-
 src/frontends/tensorflow/src/frontend.cpp     |   2 +-
 .../tensorflow_lite/src/frontend.cpp          |   2 +-
 16 files changed, 1168 insertions(+), 171 deletions(-)
 create mode 100644 src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp
 create mode 100644 src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp
 create mode 100644 src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp
 create mode 100644 src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
 create mode 100644 src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
 create mode 100644 src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp

diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp
index 8199f4c378dc9a..18b080d4b10e27 100644
--- a/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp
@@ -16,6 +16,9 @@ class TRANSFORMATIONS_API TSGeneralBackward;
 class TRANSFORMATIONS_API TSGeneral;
 
 }  // namespace transpose_sinking
+
+using TransposeSinkingGeneral = ov::pass::transpose_sinking::TSGeneral;
+
 }  // namespace pass
 }  // namespace ov
 
diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp
index 6f462875b7f6ba..7f7eeb9e270699 100644
--- a/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp
@@ -21,7 +21,7 @@ class TRANSFORMATIONS_API TSReductionBackward;
 
 /**
  * @ingroup ie_transformation_common_api
- * @brief TransposeReductionForward transformation sinks Transpose through Reduce, Squeeze, Unsqueeze operations
+ * @brief TransposeReductionForward transformation sinks Transpose through Reduce operations
  * in the forward direction.
  */
 class ov::pass::transpose_sinking::TSReductionForward : public ov::pass::MatcherPass {
@@ -32,7 +32,7 @@ class ov::pass::transpose_sinking::TSReductionForward : public ov::pass::Matcher
 
 /**
  * @ingroup ie_transformation_common_api
- * @brief TransposeReductionBackward transformation sinks Transpose through Reduce, Squeeze, Unsqueeze operations
+ * @brief TransposeReductionBackward transformation sinks Transpose through Reduce operations
  * in the backward direction.
  */
 class ov::pass::transpose_sinking::TSReductionBackward : public ov::pass::MatcherPass {
diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp
new file mode 100644
index 00000000000000..a5a135d44b39e2
--- /dev/null
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2022-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+namespace transpose_sinking {
+
+class TRANSFORMATIONS_API TSSliceForward;
+class TRANSFORMATIONS_API TSSliceBackward;
+
+}  // namespace transpose_sinking
+}  // namespace pass
+}  // namespace ov
+
+class ov::pass::transpose_sinking::TSSliceForward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSSliceForward", "0");
+    TSSliceForward();
+};
+
+class ov::pass::transpose_sinking::TSSliceBackward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSSliceBackward", "0");
+    TSSliceBackward();
+};
diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp
new file mode 100644
index 00000000000000..c7aa6f2aa0fbc5
--- /dev/null
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2022-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+namespace transpose_sinking {
+
+class TRANSFORMATIONS_API TSSqueezeForward;
+class TRANSFORMATIONS_API TSSqueezeBackward;
+
+}  // namespace transpose_sinking
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief TSSqueezeForward transformation sinks Transpose through Reshape, Squeeze operations
+ * in the forward direction.
+ */
+class ov::pass::transpose_sinking::TSSqueezeForward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSSqueezeForward", "0");
+    TSSqueezeForward();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief TSSqueezeBackward transformation sinks Transpose through Reshape, Squeeze operations
+ * in the backward direction.
+ */
+class ov::pass::transpose_sinking::TSSqueezeBackward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSSqueezeBackward", "0");
+    TSSqueezeBackward();
+};
diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp
new file mode 100644
index 00000000000000..05150bfe1fb58d
--- /dev/null
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2022-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+namespace transpose_sinking {
+
+class TRANSFORMATIONS_API TSUnsqueezeForward;
+class TRANSFORMATIONS_API TSUnsqueezeBackward;
+
+}  // namespace transpose_sinking
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief TSUnsqueezeForward transformation sinks Transpose through Unsqueeze, Reshape operations
+ * in the forward direction.
+ */
+class ov::pass::transpose_sinking::TSUnsqueezeForward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSUnsqueezeForward", "0");
+    TSUnsqueezeForward();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief TSUnsqueezeBackward transformation sinks Transpose through Unsqueeze, Reshape operations
+ * in the backward direction.
+ */
+class ov::pass::transpose_sinking::TSUnsqueezeBackward : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::pass::TSUnsqueezeBackward", "0");
+    TSUnsqueezeBackward();
+};
diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_utils.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_utils.hpp
index 7c14886f893cd2..78e03d7af13364 100644
--- a/src/common/transformations/include/transformations/transpose_sinking/ts_utils.hpp
+++ b/src/common/transformations/include/transformations/transpose_sinking/ts_utils.hpp
@@ -98,17 +98,43 @@ void UpdateForwardSinkingAbility(const std::shared_ptr<ov::Node>&);
 bool HasSameOutputTransposeNodes(const ov::Output<ov::Node>&);
 
 /**
- * Removes all direct node consumers that have one output
+ * @brief Removes all direct node consumers that have one output
  */
 void RemoveSingleOutputConsumers(const std::shared_ptr<ov::Node>&);
 
 /**
- * Changes the order of values in @arg input according to @arg transpose_axis_order along @arg axis
+ * @brief Changes the order of values in @arg input according to @arg transpose_axis_order along @arg axis
  */
 ov::Output<ov::Node> ChangeValuesOrder(const ov::Output<ov::Node>& input,
                                        const ov::AxisVector& transpose_axis_order,
                                        const std::shared_ptr<ov::opset10::Constant>& axis);
 
+/**
+ * @brief Returns the updated axes order for case when the initial axes order has more elements
+ * than after TransposeSinking, e.g.:
+ *
+ * before: Transpose(the initial axes order) -> ReduceMax
+ * after : ReduceMax -> Transpose (the updated axes order)
+ *
+ * before: Unsqueeze -> Transpose (the initial axes order)
+ * after : Transpose (the updated axes order) -> Unsqueeze
+ */
+std::vector<size_t> GetOrderAfterReduction(const std::vector<size_t>& axes_values,
+                                           const std::vector<size_t>& order_values);
+
+/**
+ * @brief Returns the updated axes order for case when the initial axes order has less elements
+ * than after TransposeSinking, e.g.:
+ *
+ * before : ReduceMax -> Transpose (the updated axes order)
+ * after: Transpose(the initial axes order) -> ReduceMax
+ *
+ * before: Transpose (the updated axes order) -> Unsqueeze
+ * after : Unsqueeze -> Transpose (the initial axes order)
+ */
+std::vector<size_t> GetOrderBeforeReduction(const std::vector<size_t>& axes_values,
+                                            const std::vector<size_t>& order_values);
+
 }  // namespace utils
 }  // namespace transpose_sinking
 }  // namespace pass
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index f394f7d037d26c..b814cbe0799269 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -236,7 +236,6 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph::Fu
     REGISTER_PASS(manager, ReverseInputChannelsFusion)
     REGISTER_PASS(manager, AlignEltwiseInputRanks)
     REGISTER_PASS(manager, ConstantFolding)
-
     manager.run_passes(f);
 
     if (!m_use_shapes) {
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
index 507b874d78b3a9..b6cbe79fd03edc 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
@@ -15,8 +15,11 @@
 #include "transformations/transpose_sinking/ts_fuse.hpp"
 #include "transformations/transpose_sinking/ts_interpolate.hpp"
 #include "transformations/transpose_sinking/ts_reduction.hpp"
+#include "transformations/transpose_sinking/ts_slice.hpp"
 #include "transformations/transpose_sinking/ts_split.hpp"
+#include "transformations/transpose_sinking/ts_squeeze.hpp"
 #include "transformations/transpose_sinking/ts_unary.hpp"
+#include "transformations/transpose_sinking/ts_unsqueeze.hpp"
 #include "transformations/utils/utils.hpp"
 
 using namespace ov::pass::transpose_sinking;
@@ -29,7 +32,10 @@ TSGeneralForward::TSGeneralForward() {
     add_matcher<TSSplitForward>();
     add_matcher<TSDataMovementForward>();
     add_matcher<TSReductionForward>();
+    add_matcher<TSSqueezeForward>();
+    add_matcher<TSUnsqueezeForward>();
     add_matcher<TSInterpolateForward>();
+    add_matcher<TSSliceForward>();
     add_matcher<TSFuse>();
 }
 
@@ -41,7 +47,10 @@ TSGeneralBackward::TSGeneralBackward() {
     add_matcher<TSSplitBackward>();
     add_matcher<TSDataMovementBackward>();
     add_matcher<TSReductionBackward>();
+    add_matcher<TSSqueezeBackward>();
+    add_matcher<TSUnsqueezeBackward>();
     add_matcher<TSInterpolateBackward>();
+    add_matcher<TSSliceBackward>();
     add_matcher<TSFuse>();
 }
 
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
index ab4e76994ece7f..cea4ff3bb9abc5 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
@@ -18,60 +18,15 @@
 
 using namespace ov;
 using namespace opset10;
+using namespace ov::pass::pattern;
 using namespace ov::pass::transpose_sinking;
 using namespace ov::pass::transpose_sinking::utils;
 
 namespace {
-std::vector<size_t> get_updated_order_forward(const std::vector<size_t>& axes_values,
-                                              const std::vector<size_t>& order_values) {
-    size_t buffer_size = order_values.size() - axes_values.size();
-    std::vector<size_t> aligned_order(buffer_size, 0);
-    std::vector<size_t> values_to_reduce(axes_values);
-    for (size_t i = 0; i < values_to_reduce.size(); ++i) {
-        values_to_reduce[i] = order_values[axes_values[i]];
-    }
-    std::sort(values_to_reduce.begin(), values_to_reduce.end());
-    for (size_t i = 0, j = 0; i < order_values.size(); ++i) {
-        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
-            continue;
-        }
-
-        auto lb = std::lower_bound(values_to_reduce.begin(), values_to_reduce.end(), order_values[i]);
-        aligned_order[j] = order_values[i] - (lb - values_to_reduce.begin());
-        ++j;
-    }
-    return aligned_order;
-}
-
-std::vector<size_t> get_updated_order_backward(const std::vector<size_t>& axes_values,
-                                               const std::vector<size_t>& order_values) {
-    size_t buffer_size = order_values.size() + axes_values.size();
-    std::vector<size_t> aligned_order(buffer_size);
-
-    std::vector<int64_t> cnt_deleted(buffer_size);
-    int64_t cnt = 0;
-    for (int64_t i = 0; i < static_cast<int64_t>(cnt_deleted.size()); ++i) {
-        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
-            cnt++;
-        }
-        cnt_deleted[i] = i - cnt;
-    }
-
-    for (size_t i = 0, j = 0; i < aligned_order.size(); ++i) {
-        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
-            aligned_order[i] = i;
-            continue;
-        }
-
-        aligned_order[i] = std::find(cnt_deleted.begin(), cnt_deleted.end(), order_values[j]) - cnt_deleted.begin();
-        ++j;
-    }
-    return aligned_order;
-}
 
 bool get_keep_dims(const std::shared_ptr<Node>& reduction) {
-    auto arithmetic_reduce = std::dynamic_pointer_cast<ov::op::util::ArithmeticReductionKeepDims>(reduction);
-    auto logical_reduce = std::dynamic_pointer_cast<ov::op::util::LogicalReductionKeepDims>(reduction);
+    auto arithmetic_reduce = as_type_ptr<ov::op::util::ArithmeticReductionKeepDims>(reduction);
+    auto logical_reduce = as_type_ptr<ov::op::util::LogicalReductionKeepDims>(reduction);
 
     bool keep_dims = false;  // squeeze/unsqueeze always reduces number of output dimensions
     if (logical_reduce)
@@ -80,32 +35,29 @@ bool get_keep_dims(const std::shared_ptr<Node>& reduction) {
         keep_dims = arithmetic_reduce->get_keep_dims();
     return keep_dims;
 }
+
 }  // namespace
 
 TSReductionForward::TSReductionForward() {
     MATCHER_SCOPE(TSReductionForward);
 
-    auto transpose_label = pattern::wrap_type<Transpose>({pattern::any_input(), pattern::wrap_type<Constant>()},
-                                                         pattern::consumers_count(1));
-    auto reduce_or_squeeze_label = pattern::
-        wrap_type<op::util::ArithmeticReductionKeepDims, op::util::LogicalReductionKeepDims, Squeeze, Unsqueeze>(
-            {transpose_label, pattern::wrap_type<Constant>()});
+    auto transpose_label = wrap_type<Transpose>({any_input(), wrap_type<Constant>()});
+    auto reduce_label = wrap_type<op::util::ArithmeticReductionKeepDims, op::util::LogicalReductionKeepDims>(
+        {transpose_label, wrap_type<Constant>()});
 
     ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
-        const auto& pattern_to_output = m.get_pattern_value_map();
+        const auto& pattern_to_output = m.get_pattern_map();
 
-        auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
-        auto reduction = pattern_to_output.at(reduce_or_squeeze_label).get_node_shared_ptr();
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto reduction = pattern_to_output.at(reduce_label);
         auto keep_dims = get_keep_dims(reduction);
 
-        auto transpose_order = std::dynamic_pointer_cast<Constant>(transpose->get_input_node_shared_ptr(1));
-        auto reduction_axes = std::dynamic_pointer_cast<Constant>(reduction->get_input_node_shared_ptr(1));
+        auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto reduction_axes = as_type_ptr<Constant>(reduction->get_input_node_shared_ptr(1));
         if (!transpose_order || !reduction_axes)
             return false;
 
-        auto unsqueeze = std::dynamic_pointer_cast<Unsqueeze>(reduction);
-        auto rank =
-            unsqueeze ? reduction->get_output_partial_shape(0).rank() : reduction->get_input_partial_shape(0).rank();
+        auto rank = reduction->get_input_partial_shape(0).rank();
         auto non_negative_axes =
             normalize_axes(reduction->get_friendly_name(), reduction_axes->cast_vector<int64_t>(), rank);
 
@@ -117,38 +69,17 @@ TSReductionForward::TSReductionForward() {
         }
 
         if (!keep_dims) {
-            if (non_negative_axes.empty()) {
-                auto input_pshape = transpose->input_value(0).get_partial_shape();
-
-                if (input_pshape.is_static()) {
-                    for (size_t i = 0; i < input_pshape.size(); ++i) {
-                        if (input_pshape[i] == 1) {
-                            non_negative_axes.push_back(i);
-                        }
-                    }
-                } else {
-                    return false;
-                }
-            }
-            if (unsqueeze) {
-                transpose_order_values = get_updated_order_backward(non_negative_axes, transpose_order_values);
-            } else {
-                transpose_order_values = get_updated_order_forward(non_negative_axes, transpose_order_values);
-            }
+            transpose_order_values = GetOrderAfterReduction(non_negative_axes, transpose_order_values);
         }
-        auto new_transpose_order = std::make_shared<Constant>(transpose_order->get_element_type(),
-                                                              Shape{transpose_order_values.size()},
-                                                              transpose_order_values);
 
-        std::shared_ptr<Node> new_reduction;
-        if (!unsqueeze) {
-            auto new_const =
-                std::make_shared<Constant>(reduction_axes->get_element_type(), reduction_axes->get_shape(), new_values);
-            new_reduction = reduction->clone_with_new_inputs({transpose->input_value(0), new_const});
-        } else {
-            new_reduction = reduction->clone_with_new_inputs({transpose->input_value(0), reduction->input_value(1)});
-        }
+        auto new_transpose_order = Constant::create(transpose_order->get_element_type(),
+                                                    {transpose_order_values.size()},
+                                                    transpose_order_values);
+
+        auto new_const = Constant::create(reduction_axes->get_element_type(), reduction_axes->get_shape(), new_values);
+        auto new_reduction = reduction->clone_with_new_inputs({transpose->input_value(0), new_const});
         auto new_transpose = transpose->clone_with_new_inputs({new_reduction, new_transpose_order});
+
         replace_node(reduction, new_transpose);
         new_reduction->set_friendly_name(transpose->get_friendly_name());
         new_transpose->set_friendly_name(reduction->get_friendly_name());
@@ -158,98 +89,55 @@ TSReductionForward::TSReductionForward() {
         return true;
     };
 
-    auto m = std::make_shared<pattern::Matcher>(reduce_or_squeeze_label, matcher_name);
+    auto m = std::make_shared<pattern::Matcher>(reduce_label, matcher_name);
     register_matcher(m, matcher_pass_callback);
 }
 
 TSReductionBackward::TSReductionBackward() {
     MATCHER_SCOPE(TSReductionBackward);
 
-    auto reduce_or_squeeze_label = pattern::
-        wrap_type<op::util::ArithmeticReductionKeepDims, op::util::LogicalReductionKeepDims, Squeeze, Unsqueeze>(
-            {pattern::any_input(), pattern::wrap_type<Constant>()},
-            HasSameOutputTransposeNodes);
-    auto transpose_label = pattern::wrap_type<Transpose>({reduce_or_squeeze_label, pattern::wrap_type<Constant>()});
-    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
-        const auto& pattern_to_output = m.get_pattern_value_map();
+    auto reduce_label = wrap_type<op::util::ArithmeticReductionKeepDims, op::util::LogicalReductionKeepDims>(
+        {any_input(), wrap_type<Constant>()},
+        HasSameOutputTransposeNodes);
+    auto transpose_label = wrap_type<Transpose>({reduce_label, wrap_type<Constant>()});
 
-        auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
-        auto reduction = pattern_to_output.at(reduce_or_squeeze_label).get_node_shared_ptr();
+    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_map();
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto reduction = pattern_to_output.at(reduce_label);
         auto keep_dims = get_keep_dims(reduction);
-        auto transpose_order = std::dynamic_pointer_cast<Constant>(transpose->get_input_node_shared_ptr(1));
-        auto reduction_axes = std::dynamic_pointer_cast<Constant>(reduction->get_input_node_shared_ptr(1));
+
+        auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto reduction_axes = as_type_ptr<Constant>(reduction->get_input_node_shared_ptr(1));
         if (!transpose_order || !reduction_axes)
             return false;
 
-        auto unsqueeze = std::dynamic_pointer_cast<Unsqueeze>(reduction);
-        auto rank =
-            unsqueeze ? reduction->get_output_partial_shape(0).rank() : reduction->get_input_partial_shape(0).rank();
+        auto rank = reduction->get_input_partial_shape(0).rank();
         auto non_negative_axes =
             normalize_axes(reduction->get_friendly_name(), reduction_axes->cast_vector<int64_t>(), rank);
-
         auto transpose_order_values = transpose_order->cast_vector<size_t>();
-        auto old_transpose_order_values = transpose_order_values;
-        std::vector<size_t> new_values;
-        if (unsqueeze) {
-            if (non_negative_axes.size() == transpose_order_values.size()) {
-                // input is a scalar, we unsqueeze all dims
-                // it's enough to eliminate such Transpose
-                transpose->output(0).replace(reduction);
-                return true;
-            }
-            for (const auto& axis : non_negative_axes) {
-                auto it = std::find(old_transpose_order_values.begin(), old_transpose_order_values.end(), axis);
-                if (it != old_transpose_order_values.end()) {
-                    new_values.push_back(it - old_transpose_order_values.begin());
-                }
-            }
-        }
-        bool squeeze_all_dims = false;
         if (!keep_dims) {
-            if (non_negative_axes.empty()) {
-                auto input_pshape = reduction->input_value(0).get_partial_shape();
-                if (input_pshape.is_static()) {
-                    for (size_t i = 0; i < input_pshape.size(); ++i) {
-                        if (input_pshape[i] == 1) {
-                            non_negative_axes.push_back(i);
-                        }
-                    }
-                    squeeze_all_dims = true;
-                } else {
-                    return false;
-                }
-            }
-            if (unsqueeze) {
-                transpose_order_values = get_updated_order_forward(new_values, transpose_order_values);
-            } else {
-                transpose_order_values = get_updated_order_backward(non_negative_axes, transpose_order_values);
-            }
+            transpose_order_values = GetOrderBeforeReduction(non_negative_axes, transpose_order_values);
         }
+        auto reversed_order_values = ReverseTransposeOrder(transpose_order_values);
+        auto new_transpose_order = Constant::create(transpose_order->get_element_type(),
+                                                    {transpose_order_values.size()},
+                                                    transpose_order_values);
 
-        if (!unsqueeze) {
-            auto reversed_order_values = ReverseTransposeOrder(transpose_order_values);
-            for (const auto& axis : non_negative_axes) {
-                new_values.push_back(reversed_order_values[axis]);
-            }
+        std::vector<size_t> new_values;
+        for (const auto& axis : non_negative_axes) {
+            new_values.push_back(reversed_order_values[axis]);
         }
 
-        auto new_transpose_order = std::make_shared<Constant>(transpose_order->get_element_type(),
-                                                              Shape{transpose_order_values.size()},
-                                                              transpose_order_values);
-        std::shared_ptr<Node> new_transpose, new_reduction;
-        if (squeeze_all_dims) {
-            new_transpose = transpose->clone_with_new_inputs({reduction->input_value(0), new_transpose_order});
-            new_reduction = reduction->clone_with_new_inputs({new_transpose, reduction->input_value(1)});
-        } else {
-            auto new_const =
-                std::make_shared<Constant>(reduction_axes->get_element_type(), reduction_axes->get_shape(), new_values);
-            new_transpose = transpose->clone_with_new_inputs({reduction->input_value(0), new_transpose_order});
-            new_reduction = reduction->clone_with_new_inputs({new_transpose, new_const});
-        }
+        auto new_const = Constant::create(reduction_axes->get_element_type(), reduction_axes->get_shape(), new_values);
+        auto new_transpose = transpose->clone_with_new_inputs({reduction->input_value(0), new_transpose_order});
+        auto new_reduction = reduction->clone_with_new_inputs({new_transpose, new_const});
+
         replace_node(transpose, new_reduction);
         copy_runtime_info({transpose, reduction}, {new_transpose, new_reduction});
         UpdateForwardSinkingAbility(new_transpose);
         new_reduction->set_friendly_name(transpose->get_friendly_name());
+        new_transpose->set_friendly_name(reduction->get_friendly_name());
         register_new_node(new_transpose);
         return true;
     };
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
new file mode 100644
index 00000000000000..de4390f07eeeca
--- /dev/null
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/transpose_sinking/ts_slice.hpp"
+
+#include "itt.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "openvino/util/common_util.hpp"
+#include "transformations/rt_info/transpose_sinking_attr.hpp"
+#include "transformations/transpose_sinking/ts_utils.hpp"
+
+using namespace ov;
+using namespace ov::opset10;
+using namespace ov::pass::pattern;
+using namespace ov::pass::transpose_sinking;
+using namespace ov::pass::transpose_sinking::utils;
+
+TSSliceForward::TSSliceForward() {
+    MATCHER_SCOPE(TSSliceForward);
+    auto const_label = wrap_type<Constant>();
+    auto transpose_label = wrap_type<Transpose>({any_input(), const_label});
+    auto main_node_label = wrap_type<Slice>({transpose_label, any_input(), any_input(), any_input(), any_input()});
+
+    matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
+        const auto& pattern_to_node = m.get_pattern_map();
+
+        auto& main_node = pattern_to_node.at(main_node_label);
+        auto transpose = std::dynamic_pointer_cast<Transpose>(pattern_to_node.at(transpose_label));
+        if (!transpose || main_node->get_input_size() < 5) {
+            return false;
+        }
+
+        auto transpose_const = as_type_ptr<Constant>(pattern_to_node.at(const_label));
+        if (!transpose_const) {
+            return false;
+        }
+
+        // remove Transpose on 1st input:
+        auto transpose_parent = transpose->input_value(0);
+        main_node->input(0).replace_source_output(transpose_parent);
+
+        const auto transpose_axis_order = transpose_const->get_axis_vector_val();
+        auto axis = std::make_shared<Constant>(element::i32, Shape{}, std::vector<int32_t>{0});
+
+        auto data = std::make_shared<Constant>(element::i32, Shape{transpose_axis_order.size()}, transpose_axis_order);
+        const auto& indices = main_node->input_value(4);
+        auto new_axis = std::make_shared<Gather>(data, indices, axis);
+
+        main_node->input(4).replace_source_output(new_axis);
+
+        main_node->validate_and_infer_types();
+        TransposeInputsInfo transpose_input_info = {transpose, transpose_const, 0};
+        for (auto& new_node : sink_forward::InsertOutputTransposes(main_node, transpose_input_info)) {
+            register_new_node(new_node);
+            UpdateForwardSinkingAbility(new_node);
+        }
+        return true;
+    };
+
+    auto m = std::make_shared<Matcher>(main_node_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
+
+TSSliceBackward::TSSliceBackward() {
+    MATCHER_SCOPE(TSSliceBackward);
+
+    auto main_node_label = wrap_type<Slice>([](const Output<Node>& output) -> bool {
+        return has_static_rank()(output) && HasSameOutputTransposeNodes(output);
+    });
+
+    auto transpose_const_label = wrap_type<Constant>();
+
+    auto transpose_label =
+        wrap_type<Transpose>({main_node_label, transpose_const_label}, [](const Output<Node>& output) -> bool {
+            return has_static_rank()(output) && is_sinking_node(output);
+        });
+
+    matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_value_map();
+        auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
+        auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
+        auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+
+        if (main_node->get_input_size() < 5) {
+            return false;
+        }
+
+        for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node,
+                                                                       transpose_const,
+                                                                       /* input_indexes= */ {0})) {
+            register_new_node(new_node);
+        }
+
+        // remove output transposes
+        RemoveSingleOutputConsumers(main_node);
+        SwapNames(main_node, transpose);
+        const auto transpose_axis_order = transpose_const->get_axis_vector_val();
+        const auto reversed_transpose_order = ReverseTransposeOrder(transpose_axis_order);
+        auto axis = std::make_shared<Constant>(element::i32, Shape{}, std::vector<int32_t>{0});
+        auto data =
+            std::make_shared<Constant>(element::i32, Shape{reversed_transpose_order.size()}, reversed_transpose_order);
+        const auto& indices = main_node->input_value(4);
+        auto new_axis = std::make_shared<Gather>(data, indices, axis);
+        main_node->input(4).replace_source_output(new_axis);
+
+        main_node->validate_and_infer_types();
+        return true;
+    };
+
+    auto m = std::make_shared<Matcher>(transpose_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
new file mode 100644
index 00000000000000..af5fc6dfc1e999
--- /dev/null
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
@@ -0,0 +1,271 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/transpose_sinking/ts_squeeze.hpp"
+
+#include <memory>
+#include <vector>
+
+#include "itt.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/rt_info/transpose_sinking_attr.hpp"
+#include "transformations/transpose_sinking/ts_utils.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace ov;
+using namespace opset10;
+using namespace ov::pass::pattern;
+using namespace ov::pass::transpose_sinking;
+using namespace ov::pass::transpose_sinking::utils;
+
+namespace {
+
+/**
+ * @brief Checks that Reshape operation is equal to Squeeze:
+ * Only 1 dims are deleted, all other dims must be the same.
+ * Converts these 1 dims to axes format.
+ * @arg reshape Reshape operation.
+ * @arg reshape_to_shape 2nd input to Reshape op as a constant.
+ * @arg result_axes Contains axes which will be squeezed.
+ */
+bool shape_to_squeeze_axes(const std::shared_ptr<Node>& reshape,
+                           const std::shared_ptr<Constant>& reshape_to_shape,
+                           std::vector<size_t>& result_axes) {
+    result_axes.clear();
+    auto reduction_axes_values = reshape_to_shape->cast_vector<int64_t>();
+    // supported the case if Reshape is equal to Squeeze
+    const auto& new_shape = reduction_axes_values;
+    const auto& input_pshape = reshape->get_input_partial_shape(0);
+    // todo: support dynamic case
+    if (input_pshape.is_dynamic()) {
+        return false;
+    }
+
+    const auto input_shape = input_pshape.to_shape();
+    if (new_shape.size() < input_shape.size()) {
+        size_t j = 0;
+        for (size_t i = 0; i < input_shape.size(); i++) {
+            const auto input_dim = static_cast<int64_t>(input_shape[i]);
+            if (j < new_shape.size() && new_shape[j] == input_dim) {
+                j++;
+            } else if (input_dim != 1) {
+                return false;
+            } else {
+                result_axes.push_back(i);
+            }
+        }
+        if (j != new_shape.size()) {
+            // not all new_shape values are in input_shape
+            return false;
+        }
+    } else {
+        // another reshape type, not Squeeze
+        // todo: move this checks in the pattern
+        return false;
+    }
+    return true;
+}
+
+/**
+ * @brief Converts squeezed_axes to actual shape (2nd input) for Reshape operation
+ * using the shape of the 1st input to Reshape.
+ * @arg input_node 1st input to Reshape op.
+ * @arg squeeze_axes In case of Reshape op is equal to squeeze, these axes indicate the places where 1 dims have
+ * to be deleted.
+ */
+bool squeeze_axes_to_shape(const Output<Node>& input_node,
+                           std::vector<size_t> squeeze_axes,
+                           std::vector<size_t>& to_shape) {
+    to_shape.clear();
+    std::sort(squeeze_axes.begin(), squeeze_axes.end());
+    const auto& input_pshape = input_node.get_partial_shape();
+    if (input_pshape.is_dynamic()) {
+        return false;
+    }
+    const auto& input_shape = input_pshape.get_shape();
+    for (size_t i = 0, j = 0; i < input_shape.size(); ++i) {
+        if (j < squeeze_axes.size() && i == squeeze_axes[j]) {
+            ++j;
+            continue;
+        }
+        to_shape.push_back(input_shape[i]);
+    }
+    return true;
+}
+
+}  // namespace
+
+TSSqueezeForward::TSSqueezeForward() {
+    MATCHER_SCOPE(TSSqueezeForward);
+
+    auto transpose_label = wrap_type<Transpose>({any_input(), wrap_type<Constant>()});
+    auto squeeze_label = wrap_type<Squeeze, Reshape>({transpose_label, wrap_type<Constant>()});
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_map();
+
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto squeeze = pattern_to_output.at(squeeze_label);
+
+        auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto squeeze_axes = as_type_ptr<Constant>(squeeze->get_input_node_shared_ptr(1));
+        if (!transpose_order || !squeeze_axes) {
+            return false;
+        }
+
+        std::vector<size_t> non_negative_axes;
+        if (as_type_ptr<Reshape>(squeeze)) {
+            auto success = shape_to_squeeze_axes(squeeze, squeeze_axes, non_negative_axes);
+            if (!success) {
+                return false;
+            }
+        } else {
+            auto rank = squeeze->get_input_partial_shape(0).rank();
+            non_negative_axes =
+                normalize_axes(squeeze->get_friendly_name(), squeeze_axes->cast_vector<int64_t>(), rank);
+        }
+
+        // if 2nd input to squeeze is empty then all '1' dims will be deleted.
+        if (non_negative_axes.empty()) {
+            auto input_pshape = transpose->output(0).get_partial_shape();
+            if (input_pshape.is_dynamic()) {
+                return false;
+            }
+            for (size_t i = 0; i < input_pshape.size(); ++i) {
+                if (input_pshape[i].get_length() == 1) {
+                    non_negative_axes.push_back(i);
+                }
+            }
+        }
+
+        auto transpose_order_values = transpose_order->cast_vector<size_t>();
+        std::vector<size_t> new_values;
+        new_values.reserve(non_negative_axes.size());
+        for (const auto& axis : non_negative_axes) {
+            new_values.push_back(transpose_order_values[axis]);
+        }
+
+        transpose_order_values = GetOrderAfterReduction(non_negative_axes, transpose_order_values);
+        auto new_transpose_order = Constant::create(transpose_order->get_element_type(),
+                                                    {transpose_order_values.size()},
+                                                    transpose_order_values);
+
+        if (as_type_ptr<Reshape>(squeeze)) {
+            std::vector<size_t> to_shape;
+            auto success = squeeze_axes_to_shape(transpose->input_value(0), new_values, to_shape);
+            if (!success) {
+                return false;
+            }
+            new_values = to_shape;
+        }
+
+        auto new_const = Constant::create(squeeze_axes->get_element_type(), {new_values.size()}, new_values);
+        auto new_squeeze = squeeze->clone_with_new_inputs({transpose->input_value(0), new_const});
+        auto new_transpose = transpose->clone_with_new_inputs({new_squeeze, new_transpose_order});
+
+        replace_node(squeeze, new_transpose);
+        new_squeeze->set_friendly_name(transpose->get_friendly_name());
+        new_transpose->set_friendly_name(squeeze->get_friendly_name());
+        UpdateForwardSinkingAbility(new_transpose);
+        register_new_node(new_transpose);
+        copy_runtime_info({transpose, squeeze}, {new_transpose, new_squeeze});
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(squeeze_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
+
+TSSqueezeBackward::TSSqueezeBackward() {
+    MATCHER_SCOPE(TSSqueezeBackward);
+
+    auto squeeze_label = wrap_type<Squeeze, Reshape>({any_input(), wrap_type<Constant>()}, HasSameOutputTransposeNodes);
+    auto transpose_label =
+        wrap_type<Transpose>({squeeze_label, wrap_type<Constant>()}, [](const Output<Node>& output) -> bool {
+            return has_static_rank()(output) && is_sinking_node(output);
+        });
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_map();
+
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto squeeze = pattern_to_output.at(squeeze_label);
+
+        auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto squeeze_axes = as_type_ptr<Constant>(squeeze->get_input_node_shared_ptr(1));
+        if (!transpose_order || !squeeze_axes) {
+            return false;
+        }
+
+        std::vector<size_t> non_negative_axes;
+        if (as_type_ptr<Reshape>(squeeze)) {
+            auto success = shape_to_squeeze_axes(squeeze, squeeze_axes, non_negative_axes);
+            if (!success) {
+                return false;
+            }
+        } else {
+            auto rank = squeeze->get_input_partial_shape(0).rank();
+            non_negative_axes =
+                normalize_axes(squeeze->get_friendly_name(), squeeze_axes->cast_vector<int64_t>(), rank);
+        }
+
+        bool squeeze_all_dims = false;
+        if (non_negative_axes.empty()) {
+            auto input_pshape = squeeze->input_value(0).get_partial_shape();
+            if (input_pshape.is_dynamic()) {
+                return false;
+            }
+            for (size_t i = 0; i < input_pshape.size(); ++i) {
+                if (input_pshape[i] == 1) {
+                    non_negative_axes.push_back(i);
+                }
+            }
+            squeeze_all_dims = true;
+        }
+
+        auto transpose_order_values = transpose_order->cast_vector<size_t>();
+        transpose_order_values = GetOrderBeforeReduction(non_negative_axes, transpose_order_values);
+        auto reversed_order_values = ReverseTransposeOrder(transpose_order_values);
+
+        std::vector<size_t> new_values;
+        for (const auto& axis : non_negative_axes) {
+            new_values.push_back(reversed_order_values[axis]);
+        }
+
+        auto new_transpose_order = Constant::create(transpose_order->get_element_type(),
+                                                    {transpose_order_values.size()},
+                                                    transpose_order_values);
+        auto new_transpose = transpose->clone_with_new_inputs({squeeze->input_value(0), new_transpose_order});
+        if (as_type_ptr<Reshape>(squeeze)) {
+            std::vector<size_t> to_shape;
+            auto success = squeeze_axes_to_shape(new_transpose->output(0), new_values, to_shape);
+            if (!success) {
+                return false;
+            }
+            new_values = to_shape;
+        }
+
+        std::shared_ptr<Node> new_squeeze;
+        if (squeeze_all_dims) {
+            new_squeeze = squeeze->clone_with_new_inputs({new_transpose, squeeze->input_value(1)});
+        } else {
+            auto new_const =
+                std::make_shared<Constant>(squeeze_axes->get_element_type(), squeeze_axes->get_shape(), new_values);
+            new_squeeze = squeeze->clone_with_new_inputs({new_transpose, new_const});
+        }
+
+        replace_node(transpose, new_squeeze);
+        copy_runtime_info({transpose, squeeze}, {new_transpose, new_squeeze});
+        new_squeeze->set_friendly_name(transpose->get_friendly_name());
+        new_transpose->set_friendly_name(squeeze->get_friendly_name());
+        register_new_node(new_transpose);
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(transpose_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
\ No newline at end of file
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
new file mode 100644
index 00000000000000..a3cbe66e41b853
--- /dev/null
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
@@ -0,0 +1,243 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/transpose_sinking/ts_unsqueeze.hpp"
+
+#include <memory>
+#include <vector>
+
+#include "itt.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/rt_info/transpose_sinking_attr.hpp"
+#include "transformations/transpose_sinking/ts_utils.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace ov;
+using namespace opset10;
+using namespace ov::pass::pattern;
+using namespace ov::pass::transpose_sinking;
+using namespace ov::pass::transpose_sinking::utils;
+
+namespace {
+
+/**
+ * @brief Checks that Reshape operation is equal to Unsqueeze:
+ * Only 1 dims are inserted, all other dims must be the same.
+ * Converts these 1 dims to axes format.
+ * @arg reshape Reshape operation.
+ * @arg reshape_to_shape 2nd input to Reshape op as a constant.
+ * @arg result_axes contains axes which will be unsqueezed.
+ */
+bool shape_to_unsqueeze_axes(const std::shared_ptr<Node>& reshape,
+                             const std::shared_ptr<Constant>& reshape_to_shape,
+                             std::vector<size_t>& result_axes) {
+    result_axes.clear();
+    auto reduction_axes_values = reshape_to_shape->cast_vector<int64_t>();
+    // supported the case if Reshape is equal to Unsqueeze
+    const auto& new_shape = reduction_axes_values;
+    const auto& input_pshape = reshape->get_input_partial_shape(0);
+    // todo: support dynamic case
+    if (input_pshape.is_dynamic()) {
+        return false;
+    }
+
+    const auto input_shape = input_pshape.to_shape();
+    if (new_shape.size() > input_shape.size()) {
+        size_t j = 0;
+        for (size_t i = 0; i < new_shape.size(); ++i) {
+            if (j < input_shape.size() && static_cast<int64_t>(input_shape[j]) == new_shape[i]) {
+                j++;
+            } else if (new_shape[i] != 1) {
+                return false;
+            } else {
+                result_axes.push_back(i);
+            }
+        }
+        if (j != input_shape.size()) {
+            // not all input_shape values are in new_shape
+            return false;
+        }
+    } else {
+        // another reshape type, not Unsqueeze
+        // todo: move this checks in the pattern
+        return false;
+    }
+    return true;
+}
+
+/**
+ * @brief Converts unsqueeze_axes to actual shape (2nd input) for Reshape operation
+ * using the shape of the 1st input to Reshape.
+ * @arg input_node 1st input to Reshape op.
+ * @arg unsqueeze_axes In case of Reshape op is equal to Unsqueeze, these axes indicate the places where 1 dims have
+ * to be inserted.
+ */
+bool unsqueeze_axes_to_shape(const Output<Node>& input_node,
+                             std::vector<size_t> unsqueeze_axes,
+                             std::vector<size_t>& to_shape) {
+    to_shape.clear();
+    const auto& input_pshape = input_node.get_partial_shape();
+    if (input_pshape.is_dynamic()) {
+        return false;
+    }
+    const auto& input_shape = input_pshape.get_shape();
+    to_shape.resize(input_shape.size() + unsqueeze_axes.size());
+    std::sort(unsqueeze_axes.begin(), unsqueeze_axes.end());
+    for (size_t i = 0, j = 0, k = 0; i < to_shape.size(); ++i) {
+        if (j < unsqueeze_axes.size() && i == unsqueeze_axes[j]) {
+            to_shape[i] = 1;
+            j++;
+        } else if (k < input_shape.size()) {
+            to_shape[i] = input_shape[k];
+            k++;
+        }
+    }
+    return true;
+}
+}  // namespace
+
+TSUnsqueezeForward::TSUnsqueezeForward() {
+    MATCHER_SCOPE(TSUnsqueezeForward);
+
+    auto transpose_label = wrap_type<Transpose>({any_input(), wrap_type<Constant>()});
+    auto unsqueeze_label = wrap_type<Unsqueeze, Reshape>({transpose_label, wrap_type<Constant>()});
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_map();
+
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto unsqueeze = pattern_to_output.at(unsqueeze_label);
+
+        auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto unsqueeze_axes = as_type_ptr<Constant>(unsqueeze->get_input_node_shared_ptr(1));
+        if (!transpose_order || !unsqueeze_axes) {
+            return false;
+        }
+
+        std::vector<size_t> non_negative_axes;
+        if (as_type_ptr<Reshape>(unsqueeze)) {
+            auto success = shape_to_unsqueeze_axes(unsqueeze, unsqueeze_axes, non_negative_axes);
+            if (!success) {
+                return false;
+            }
+        } else {
+            auto rank = unsqueeze->get_output_partial_shape(0).rank();
+            non_negative_axes =
+                normalize_axes(unsqueeze->get_friendly_name(), unsqueeze_axes->cast_vector<int64_t>(), rank);
+        }
+        auto ts_order_values = transpose_order->cast_vector<size_t>();
+
+        ts_order_values = GetOrderBeforeReduction(non_negative_axes, ts_order_values);
+        auto new_transpose_order =
+            Constant::create(transpose_order->get_element_type(), {ts_order_values.size()}, ts_order_values);
+
+        std::shared_ptr<Node> new_unsqueeze;
+        if (as_type_ptr<Reshape>(unsqueeze)) {
+            std::vector<size_t> new_values;
+            auto success = unsqueeze_axes_to_shape(transpose->input_value(0), non_negative_axes, new_values);
+            if (!success) {
+                return false;
+            }
+            auto new_const = Constant::create(unsqueeze_axes->get_element_type(), {new_values.size()}, new_values);
+            new_unsqueeze = unsqueeze->clone_with_new_inputs({transpose->input_value(0), new_const});
+        } else {
+            new_unsqueeze = unsqueeze->clone_with_new_inputs({transpose->input_value(0), unsqueeze->input_value(1)});
+        }
+        auto new_transpose = transpose->clone_with_new_inputs({new_unsqueeze, new_transpose_order});
+
+        replace_node(unsqueeze, new_transpose);
+        new_unsqueeze->set_friendly_name(transpose->get_friendly_name());
+        new_transpose->set_friendly_name(unsqueeze->get_friendly_name());
+        UpdateForwardSinkingAbility(new_transpose);
+        register_new_node(new_transpose);
+        copy_runtime_info({transpose, unsqueeze}, {new_transpose, new_unsqueeze});
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(unsqueeze_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
+
+TSUnsqueezeBackward::TSUnsqueezeBackward() {
+    MATCHER_SCOPE(TSUnsqueezeBackward);
+
+    auto unsqueeze_label =
+        wrap_type<Unsqueeze, Reshape>({any_input(), wrap_type<Constant>()}, HasSameOutputTransposeNodes);
+    auto transpose_label =
+        wrap_type<Transpose>({unsqueeze_label, wrap_type<Constant>()}, [](const Output<Node>& output) -> bool {
+            return has_static_rank()(output) && is_sinking_node(output);
+        });
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_map();
+
+        auto transpose = pattern_to_output.at(transpose_label);
+        auto unsqueeze = pattern_to_output.at(unsqueeze_label);
+
+        auto transpose_order = std::dynamic_pointer_cast<Constant>(transpose->get_input_node_shared_ptr(1));
+        auto unsqueeze_axes = std::dynamic_pointer_cast<Constant>(unsqueeze->get_input_node_shared_ptr(1));
+        if (!transpose_order || !unsqueeze_axes)
+            return false;
+
+        std::vector<size_t> non_negative_axes;
+        if (as_type_ptr<Reshape>(unsqueeze)) {
+            auto success = shape_to_unsqueeze_axes(unsqueeze, unsqueeze_axes, non_negative_axes);
+            if (!success) {
+                return false;
+            }
+        } else {
+            auto rank = unsqueeze->get_output_partial_shape(0).rank();
+            non_negative_axes =
+                normalize_axes(unsqueeze->get_friendly_name(), unsqueeze_axes->cast_vector<int64_t>(), rank);
+        }
+
+        auto transpose_order_values = transpose_order->cast_vector<size_t>();
+        auto old_transpose_order_values = transpose_order_values;
+        std::vector<size_t> new_values;
+
+        if (non_negative_axes.size() == transpose_order_values.size()) {
+            // input is a scalar, we unsqueeze all dims
+            // it's enough to eliminate such Transpose
+            transpose->output(0).replace(unsqueeze);
+            return true;
+        }
+
+        for (const auto& axis : non_negative_axes) {
+            auto it = std::find(old_transpose_order_values.begin(), old_transpose_order_values.end(), axis);
+            if (it != old_transpose_order_values.end()) {
+                new_values.push_back(it - old_transpose_order_values.begin());
+            }
+        }
+
+        transpose_order_values = GetOrderAfterReduction(new_values, transpose_order_values);
+        auto new_transpose_order = std::make_shared<Constant>(transpose_order->get_element_type(),
+                                                              Shape{transpose_order_values.size()},
+                                                              transpose_order_values);
+
+        auto new_transpose = transpose->clone_with_new_inputs({unsqueeze->input_value(0), new_transpose_order});
+        if (as_type_ptr<Reshape>(unsqueeze)) {
+            std::vector<size_t> to_shape;
+            auto success = unsqueeze_axes_to_shape(new_transpose->output(0), new_values, to_shape);
+            if (!success) {
+                return false;
+            }
+            new_values = to_shape;
+        }
+        auto new_const = Constant::create(unsqueeze_axes->get_element_type(), unsqueeze_axes->get_shape(), new_values);
+        auto new_unsqueeze = unsqueeze->clone_with_new_inputs({new_transpose, new_const});
+
+        replace_node(transpose, new_unsqueeze);
+        copy_runtime_info({transpose, unsqueeze}, {new_transpose, new_unsqueeze});
+        new_unsqueeze->set_friendly_name(transpose->get_friendly_name());
+        new_transpose->set_friendly_name(unsqueeze->get_friendly_name());
+        register_new_node(new_transpose);
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(transpose_label, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp
index 9925087be7d9ce..2f56e4d40c4b22 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp
@@ -379,6 +379,53 @@ void RemoveSingleOutputConsumers(const NodePtr& node) {
     }
 }
 
+std::vector<size_t> GetOrderAfterReduction(const std::vector<size_t>& axes_values,
+                                           const std::vector<size_t>& order_values) {
+    size_t buffer_size = order_values.size() - axes_values.size();
+    std::vector<size_t> aligned_order(buffer_size, 0);
+    std::vector<size_t> values_to_reduce(axes_values);
+    for (size_t i = 0; i < values_to_reduce.size(); ++i) {
+        values_to_reduce[i] = order_values[axes_values[i]];
+    }
+    std::sort(values_to_reduce.begin(), values_to_reduce.end());
+    for (size_t i = 0, j = 0; i < order_values.size(); ++i) {
+        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
+            continue;
+        }
+
+        auto lb = std::lower_bound(values_to_reduce.begin(), values_to_reduce.end(), order_values[i]);
+        aligned_order[j] = order_values[i] - (lb - values_to_reduce.begin());
+        ++j;
+    }
+    return aligned_order;
+}
+
+std::vector<size_t> GetOrderBeforeReduction(const std::vector<size_t>& axes_values,
+                                            const std::vector<size_t>& order_values) {
+    size_t buffer_size = order_values.size() + axes_values.size();
+    std::vector<size_t> aligned_order(buffer_size);
+
+    std::vector<int64_t> cnt_deleted(buffer_size);
+    int64_t cnt = 0;
+    for (int64_t i = 0; i < static_cast<int64_t>(cnt_deleted.size()); ++i) {
+        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
+            cnt++;
+        }
+        cnt_deleted[i] = i - cnt;
+    }
+
+    for (size_t i = 0, j = 0; i < aligned_order.size(); ++i) {
+        if (std::find(axes_values.begin(), axes_values.end(), i) != axes_values.end()) {
+            aligned_order[i] = i;
+            continue;
+        }
+
+        aligned_order[i] = std::find(cnt_deleted.begin(), cnt_deleted.end(), order_values[j]) - cnt_deleted.begin();
+        ++j;
+    }
+    return aligned_order;
+}
+
 }  // namespace utils
 }  // namespace transpose_sinking
 }  // namespace pass
diff --git a/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp b/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp
index 10de11c2893071..4cec6952975aff 100644
--- a/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp
+++ b/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp
@@ -11,8 +11,11 @@
 #include "transformations/transpose_sinking/ts_data_movement.hpp"
 #include "transformations/transpose_sinking/ts_interpolate.hpp"
 #include "transformations/transpose_sinking/ts_reduction.hpp"
+#include "transformations/transpose_sinking/ts_slice.hpp"
 #include "transformations/transpose_sinking/ts_split.hpp"
+#include "transformations/transpose_sinking/ts_squeeze.hpp"
 #include "transformations/transpose_sinking/ts_unary.hpp"
+#include "transformations/transpose_sinking/ts_unsqueeze.hpp"
 #include "ts_test_utils.hpp"
 
 using namespace std;
@@ -183,6 +186,34 @@ class InterpolateFactory : public IFactory {
 FactoryPtr CreateInterpolateFactory(const std::string& type_name, bool is_reference) {
     return std::make_shared<InterpolateFactory>(type_name, is_reference);
 }
+
+class SliceFactory : public IFactory {
+public:
+    explicit SliceFactory(const std::string& type_name) : IFactory(type_name) {}
+    NodePtr create(const OutputVector& parent_nodes) const override {
+        return std::make_shared<Slice>(parent_nodes[0],
+                                       parent_nodes[1],
+                                       parent_nodes[2],
+                                       parent_nodes[3],
+                                       parent_nodes[4]);
+    }
+};
+
+FactoryPtr CreateSliceFactory(const std::string& type_name) {
+    return std::make_shared<SliceFactory>(type_name);
+}
+
+class ReshapeFactory : public IFactory {
+public:
+    explicit ReshapeFactory(const std::string& type_name) : IFactory(type_name) {}
+    NodePtr create(const OutputVector& parent_nodes) const override {
+        return std::make_shared<Reshape>(parent_nodes[0], parent_nodes[1], false);
+    }
+};
+
+FactoryPtr CreateReshapeFactory(const std::string& type_name) {
+    return std::make_shared<ReshapeFactory>(type_name);
+}
 // ----------------------------------------------------------------------------
 
 #undef CREATE_UNARY_FACTORY
@@ -214,6 +245,12 @@ FactoryPtr CreateInterpolateFactory(const std::string& type_name, bool is_refere
 
 #undef CREATE_INTERPOLATE_FACTORY
 #define CREATE_INTERPOLATE_FACTORY(type_name, reference_flag) CreateInterpolateFactory(#type_name, reference_flag)
+
+#undef CREATE_SLICE_FACTORY
+#define CREATE_SLICE_FACTORY(type_name) CreateSliceFactory(#type_name)
+
+#undef CREATE_RESHAPE_FACTORY
+#define CREATE_RESHAPE_FACTORY(type_name) CreateReshapeFactory(#type_name)
 // ----------------------------------------------------------------------------
 
 struct Preprocessing {
@@ -651,7 +688,7 @@ auto test_forward_squeeze = []() {
     TestCase test_case;
 
     // Initialize common attributes
-    test_case.transformation = CREATE_PASS_FACTORY(TSReductionForward);
+    test_case.transformation = CREATE_PASS_FACTORY(TSSqueezeForward);
     test_case.num_main_ops = {1};
     test_case.inputs_to_main = {
         parameter(element::f32, {32, 1, 2, 1}),
@@ -685,7 +722,7 @@ auto test_forward_unsqueeze = []() {
     TestCase test_case;
 
     // Initialize common attributes
-    test_case.transformation = CREATE_PASS_FACTORY(TSReductionForward);
+    test_case.transformation = CREATE_PASS_FACTORY(TSUnsqueezeForward);
     test_case.num_main_ops = {1};
     test_case.inputs_to_main = {
         parameter(element::f32, {32, 3, 2, 1}),
@@ -722,6 +759,128 @@ auto test_forward_unsqueeze = []() {
 
 INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonUnsqueezeForward, TransposeSinkingTestFixture, test_forward_unsqueeze());
 
+auto test_forward_slice = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSSliceForward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {6, 4, 5, 3}),
+        constant<int64_t>(element::i32, {3}, {1, 2, 3}),
+        constant<int64_t>(element::i32, {3}, {0, 4, 11}),
+        constant<int64_t>(element::i32, {3}, {1, 2, -1}),
+        constant<int64_t>(element::i32, {3}, {0, 1, 2}),
+    };
+
+    // Test model description:
+    test_case.model.preprocess_inputs_to_main = {{set_transpose_for}, {{0}}};
+    test_case.model.main_op = {CREATE_SLICE_FACTORY(SliceFactory)};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto set_specific_gather_for = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector result = out_vec;
+        for (const auto& idx : idxs) {
+            const auto& out = out_vec[idx];
+            vector<int64_t> transpose_order(out_vec[0].get_shape().size());
+            iota(transpose_order.begin(), transpose_order.end(), 0);
+            reverse(transpose_order.begin(), transpose_order.end());
+            auto data = make_shared<Constant>(element::i32, Shape{transpose_order.size()}, transpose_order);
+            auto axis = make_shared<Constant>(element::i32, Shape{}, 0);
+            auto transpose = make_shared<Gather>(data, out, axis);
+            result[idx] = transpose;
+        }
+        return result;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{set_specific_gather_for}, {{4}}};
+    test_case.model_ref.main_op = {CREATE_SLICE_FACTORY(Slice)};
+    test_case.model_ref.preprocess_outputs_of_main = {{set_transpose_for}, {{0}}};
+    test_case.model_ref.model_template = create_model;
+
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonSliceForward, TransposeSinkingTestFixture, test_forward_slice());
+
+auto test_forward_reshape_squeeze = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSSqueezeForward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {6, 1, 5, 1, 4}),
+        constant<int64_t>(element::i32, {3}, {4, 5, 6}),
+    };
+
+    // Test model description:
+    test_case.model.preprocess_inputs_to_main = {{set_transpose_for}, {{0}}};
+    test_case.model.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        new_out_vec[0] = out_vec[0];
+        new_out_vec[1] =
+            make_shared<Constant>(out_vec[1].get_element_type(), out_vec[1].get_shape(), std::vector<int64_t>{6, 5, 4});
+        return new_out_vec;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{new_constant}, {{1}}};
+    test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model_ref.preprocess_outputs_of_main = {{set_transpose_for}, {{0}}};
+    test_case.model_ref.model_template = create_model;
+
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonReshapeSqueezeForward,
+                         TransposeSinkingTestFixture,
+                         test_forward_reshape_squeeze());
+
+auto test_forward_reshape_unsqueeze = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSUnsqueezeForward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {6, 5, 4}),
+        constant<int64_t>(element::i32, {5}, {4, 1, 5, 1, 6}),
+    };
+
+    // Test model description:
+    test_case.model.preprocess_inputs_to_main = {{set_transpose_for}, {{0}}};
+    test_case.model.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto new_transpose = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        auto order = make_shared<Constant>(element::i32, Shape{5}, std::vector<int64_t>{4, 1, 2, 3, 0});
+        new_out_vec[0] = make_shared<Transpose>(out_vec[0], order);
+        return new_out_vec;
+    };
+    auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        new_out_vec[0] = out_vec[0];
+        new_out_vec[1] = make_shared<Constant>(out_vec[1].get_element_type(),
+                                               out_vec[1].get_shape(),
+                                               std::vector<int64_t>{6, 1, 5, 1, 4});
+        return new_out_vec;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{new_constant}, {{1}}};
+    test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model_ref.preprocess_outputs_of_main = {{new_transpose}, {{0}}};
+    test_case.model_ref.model_template = create_model;
+
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonReshapeUnsqueezeForward,
+                         TransposeSinkingTestFixture,
+                         test_forward_reshape_unsqueeze());
 // ------------------ BACKWARD --------------------
 
 auto test_backward_unary = []() {
@@ -1003,7 +1162,7 @@ auto test_backward_squeeze = []() {
     TestCase test_case;
 
     // Initialize common attributes
-    test_case.transformation = CREATE_PASS_FACTORY(TSReductionBackward);
+    test_case.transformation = CREATE_PASS_FACTORY(TSSqueezeBackward);
     test_case.num_main_ops = {1};
     test_case.inputs_to_main = {
         parameter(element::f32, {32, 1, 2, 1}),
@@ -1036,7 +1195,7 @@ auto test_backward_unsqueeze = []() {
     TestCase test_case;
 
     // Initialize common attributes
-    test_case.transformation = CREATE_PASS_FACTORY(TSReductionBackward);
+    test_case.transformation = CREATE_PASS_FACTORY(TSUnsqueezeBackward);
     test_case.num_main_ops = {1};
     test_case.inputs_to_main = {
         parameter(element::f32, {32, 3, 2, 1}),
@@ -1066,6 +1225,126 @@ auto test_backward_unsqueeze = []() {
 INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonUnsqueezeBackward,
                          TransposeSinkingTestFixture,
                          test_backward_unsqueeze());
+
+auto test_backward_slice = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSSliceBackward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {6, 4, 5, 3}),
+        constant<int64_t>(element::i32, {3}, {1, 2, 3}),
+        constant<int64_t>(element::i32, {3}, {0, 4, 11}),
+        constant<int64_t>(element::i32, {3}, {1, 2, -1}),
+        constant<int64_t>(element::i32, {3}, {0, 1, 2}),
+    };
+
+    // Test model description:
+    test_case.model.main_op = {CREATE_SLICE_FACTORY(Slice)};
+    test_case.model.preprocess_outputs_of_main = {{set_transpose_for}, {{0}}};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto set_specific_gather_for = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector result = out_vec;
+        for (const auto& idx : idxs) {
+            const auto& out = out_vec[idx];
+            vector<int64_t> transpose_order(out_vec[0].get_shape().size());
+            iota(transpose_order.begin(), transpose_order.end(), 0);
+            reverse(transpose_order.begin(), transpose_order.end());
+            auto data = make_shared<Constant>(element::i32, Shape{transpose_order.size()}, transpose_order);
+            auto axis = make_shared<Constant>(element::i32, Shape{}, 0);
+            auto transpose = make_shared<Gather>(data, out, axis);
+            result[idx] = transpose;
+        }
+        return result;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{set_transpose_for, set_specific_gather_for}, {{0}, {4}}};
+    test_case.model_ref.main_op = {CREATE_SLICE_FACTORY(SliceFactory)};
+    test_case.model_ref.model_template = create_model;
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonSliceBackward, TransposeSinkingTestFixture, test_backward_slice());
+
+auto test_backward_reshape_squeeze = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSSqueezeBackward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {4, 1, 5, 1, 6}),
+        constant<int64_t>(element::i32, {3}, {4, 5, 6}),
+    };
+
+    // Test model description:
+    test_case.model.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model.preprocess_outputs_of_main = {{set_transpose_for}, {{0}}};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto new_transpose = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        auto order = make_shared<Constant>(element::i32, Shape{5}, std::vector<int64_t>{4, 1, 2, 3, 0});
+        new_out_vec[0] = make_shared<Transpose>(out_vec[0], order);
+        new_out_vec[1] = out_vec[1];
+        return new_out_vec;
+    };
+    auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        new_out_vec[0] = out_vec[0];
+        new_out_vec[1] =
+            make_shared<Constant>(out_vec[1].get_element_type(), out_vec[1].get_shape(), std::vector<int64_t>{6, 5, 4});
+        return new_out_vec;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{new_transpose, new_constant}, {{0}, {1}}};
+    test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model_ref.model_template = create_model;
+
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonReshapeSqueezeBackward,
+                         TransposeSinkingTestFixture,
+                         test_backward_reshape_squeeze());
+
+auto test_backward_reshape_unsqueeze = []() {
+    TestCase test_case;
+
+    // Initialize common attributes
+    test_case.transformation = CREATE_PASS_FACTORY(TSUnsqueezeBackward);
+    test_case.num_main_ops = {1};
+    test_case.inputs_to_main = {
+        parameter(element::f32, {4, 5, 6}),
+        constant<int64_t>(element::i32, {5}, {4, 1, 5, 1, 6}),
+    };
+
+    // Test model description:
+    test_case.model.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model.preprocess_outputs_of_main = {{set_transpose_for}, {{0}}};
+    test_case.model.model_template = create_model;
+
+    // Reference model description:
+    auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+        OutputVector new_out_vec(out_vec.size());
+        new_out_vec[0] = out_vec[0];
+        new_out_vec[1] = make_shared<Constant>(out_vec[1].get_element_type(),
+                                               out_vec[1].get_shape(),
+                                               std::vector<int64_t>{6, 1, 5, 1, 4});
+        return new_out_vec;
+    };
+    test_case.model_ref.preprocess_inputs_to_main = {{set_transpose_for, new_constant}, {{0}, {1}}};
+    test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
+    test_case.model_ref.model_template = create_model;
+
+    return wrapper(test_case);
+};
+
+INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonReshapeUnsqueezeBackward,
+                         TransposeSinkingTestFixture,
+                         test_backward_reshape_unsqueeze());
 }  // namespace common
 }  // namespace testing
-}  // namespace transpose_sinking
\ No newline at end of file
+}  // namespace transpose_sinking
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index e3a57c5da05010..80d9609bd469d8 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -281,7 +281,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     {
         // perform transpose sinking and reverse infer if the model contains only OpenVINO operations
         ov::pass::Manager manager;
-        manager.register_pass<ov::pass::transpose_sinking::TSGeneral>();
+        manager.register_pass<ov::pass::TransposeSinkingGeneral>();
         manager.register_pass<ov::pass::ReverseShapeAndTypeInfer>();
         manager.run_passes(model);
     }
diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp
index b708d711ec9f76..265d52bf20c1e9 100644
--- a/src/frontends/tensorflow_lite/src/frontend.cpp
+++ b/src/frontends/tensorflow_lite/src/frontend.cpp
@@ -268,7 +268,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& function) const {
     manager.register_pass<ov::frontend::tensorflow_lite::pass::TFLQuantizeResolver>();
     manager.register_pass<ov::frontend::tensorflow_lite::pass::Rfft2dSimplifier>();
     manager.register_pass<ov::pass::TransposeSinking>();
-    manager.register_pass<ov::pass::transpose_sinking::TSGeneral>();
+    manager.register_pass<ov::pass::TransposeSinkingGeneral>();
     manager.run_passes(function);
 }
 

From 953a166a629be3fce910fef8ad9323fd106a62c4 Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Fri, 24 Mar 2023 14:33:04 +0100
Subject: [PATCH 085/296] [DOCS] minor fix for content and config (#16538)

---
 docs/_static/images/BASIC_FLOW_IE_C.svg       |  4 ++--
 docs/_templates/layout.html                   |  3 ++-
 .../installing-openvino-docker-windows.md     | 17 +++++++----------
 .../install_guides/installing-openvino-pip.md | 19 +++++--------------
 docs/requirements.txt                         |  2 +-
 5 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/docs/_static/images/BASIC_FLOW_IE_C.svg b/docs/_static/images/BASIC_FLOW_IE_C.svg
index 25310487735e23..6b8ad0ef282518 100644
--- a/docs/_static/images/BASIC_FLOW_IE_C.svg
+++ b/docs/_static/images/BASIC_FLOW_IE_C.svg
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a50bd923023c4cec3781476f9c8ced19150b5fd2c2c143166604b51655e13768
-size 50623
+oid sha256:ccc7704d2a27f7491729767443f3d2bdd0ccc930f16fde631a7f9c67d158297a
+size 71369
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
index a6c09dd1d15908..ed3c4c79fcfa4d 100644
--- a/docs/_templates/layout.html
+++ b/docs/_templates/layout.html
@@ -18,10 +18,11 @@
 {% block docs_navbar %}
 {{ super() }}
 <div id="info-banner" class="transition-banner">
-    <p>OpenVINO 2022.1 introduces a new version of OpenVINO API (API 2.0). For more information on the changes and transition steps, see the <a href="https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html">transition guide</a></p>
+    <p>OpenVINO 2022.1 has introduced OpenVINO API 2.0. For more information on transition steps from the previous API, see the <a href="https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html">transition guide</a></p>
     <button type="button" class="close-banner" onclick="closeTransitionBanner()">
       <span aria-hidden="true">&times;</span>
     </button>
 </div>
 <script src="{{ pathto('_static/js/hide_banner.js', 1) }}"></script>
 {% endblock %}
+
diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md
index d10bf08df67a2e..e446b4e9ebaa53 100644
--- a/docs/install_guides/installing-openvino-docker-windows.md
+++ b/docs/install_guides/installing-openvino-docker-windows.md
@@ -9,18 +9,15 @@ This guide provides steps for creating a Docker image with Intel® Distribution
 System Requirements
 ####################
 
-
 .. tab:: Target Operating System with Python Versions
 
-   .. list-table::
-      :header-rows: 1
-
-      * - Operating System
-        - Supported Python Version
-      * - Windows Server Core base LTSC 2019
-        - 3.8
-      * - Windows 10, version 20H2
-        - 3.8
+   +------------------------------------+--------------------------+
+   | Operating System                   | Supported Python Version |
+   +====================================+==========================+
+   | Windows Server Core base LTSC 2019 | 3.8                      |
+   +------------------------------------+--------------------------+
+   | Windows 10, version 20H2           | 3.8                      |
+   +------------------------------------+--------------------------+
 
 .. tab:: Host Operating Systems
 
diff --git a/docs/install_guides/installing-openvino-pip.md b/docs/install_guides/installing-openvino-pip.md
index 17e1b944a02be7..21b7778a6319f0 100644
--- a/docs/install_guides/installing-openvino-pip.md
+++ b/docs/install_guides/installing-openvino-pip.md
@@ -2,11 +2,12 @@
 
 @sphinxdirective
 
-You can install both OpenVINO™ Runtime and OpenVINO Development Tools through the PyPI repository. This page provides the main steps for installing OpenVINO Runtime.
+Using the PyPI repository, you can install either OpenVINO™ Runtime or OpenVINO Development Tools on Windows, Linux, and macOS systems.
+This article focuses on OpenVINO™ Runtime.
 
-.. note:
+.. note
 
-   From the 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. See :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` for detailed steps.
+   If you install OpenVINO Development Tools, OpenVINO Runtime will also be installed as a dependency, so you don't need to install it separately.
 
 
 Installing OpenVINO Runtime
@@ -95,18 +96,8 @@ Run the command below:
    python -c "from openvino.runtime import Core"
 
 
-If installation was successful, you will not see any error messages (no console output).
+If installation was successful, you will not see any error messages (no console output). Congratulations! You have finished installing OpenVINO Runtime.
 
-Congratulations! You finished installing OpenVINO Runtime. Now you can start exploring OpenVINO's functionality through Jupyter Notebooks and sample applications. See the :ref:`What's Next <whats-next>` section to learn more!
-
-Installing OpenVINO Development Tools
-#####################################
-
-OpenVINO Development Tools adds even more functionality to OpenVINO. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Development Tools, OpenVINO Runtime will also be installed as a dependency, so you don't need to install OpenVINO Runtime separately.
-
-See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for step-by-step installation instructions.
-
-.. _whats-next:
 
 What's Next?
 ####################
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 19829c8e09af24..1de32203db2e10 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -31,7 +31,7 @@ six==1.15.0
 snowballstemmer==2.1.0
 soupsieve==2.2.1
 sphinx==4.5.0
-sphinx-copybutton==0.3.3
+sphinx-copybutton==0.5.1
 sphinx-design==0.3.0
 sphinx-inline-tabs==2021.8.17b10
 sphinx-panels==0.6.0

From 179403ddc96a1d6cc200a83e89b59a2d03300bb9 Mon Sep 17 00:00:00 2001
From: Ekaterina Aidova <ekaterina.aidova@intel.com>
Date: Fri, 24 Mar 2023 19:55:07 +0400
Subject: [PATCH 086/296] [PT FE]: improve integration into mo.convert_model
 (#16243)

---
 .../src/openvino/frontend/pytorch/decoder.py  |  69 ++++++-
 .../pyopenvino/frontend/pytorch/decoder.hpp   |   4 +
 .../openvino/frontend/pytorch/decoder.hpp     |   3 +
 src/frontends/pytorch/src/input_model.cpp     |   4 +-
 src/frontends/pytorch/src/node_context.cpp    |   5 +-
 src/frontends/pytorch/src/place.cpp           |   5 +
 src/frontends/pytorch/src/place.hpp           |   8 +
 .../pytorch/src/translate_session.cpp         |  32 +--
 .../pytorch/src/translate_session.hpp         |   4 +-
 .../test_mo_convert_pytorch.py                | 185 +++++++++++-------
 .../pytorch_tests/pytorch_layer_test_class.py | 108 ++++++----
 .../layer_tests/pytorch_tests/test_addcmul.py |   2 +-
 .../pytorch_tests/test_batch_norm.py          |   2 +-
 .../pytorch_tests/test_instance_norm.py       |  24 +--
 .../pytorch_tests/test_listunpack.py          |  10 +-
 .../layer_tests/pytorch_tests/test_min_max.py |   4 +-
 .../pytorch_tests/test_repeat_interleave.py   |   2 +-
 tools/mo/openvino/tools/mo/convert_impl.py    |   3 +-
 .../tools/mo/moc_frontend/extractor.py        |   6 +-
 .../tools/mo/moc_frontend/pipeline.py         |  10 +-
 .../mo/moc_frontend/pytorch_frontend_utils.py |  81 +-------
 .../mo/openvino/tools/mo/utils/cli_parser.py  |   6 +-
 22 files changed, 330 insertions(+), 247 deletions(-)

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
index 2f9069024feb70..b30fc0934d385b 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
@@ -9,7 +9,6 @@
 from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape
 
 import typing
-import warnings
 import torch
 import numpy as np
 
@@ -92,18 +91,75 @@ def get_value_from_getattr(getattr_node, self_module):
 
 
 class TorchScriptPythonDecoder (Decoder):
-    def __init__(self, pt_module, graph_element=None):
+    def __init__(self, pt_module, graph_element=None, example_input=None, freeze=True):
         Decoder.__init__(self)
         # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted
         self.m_decoders = []
+        self._input_signature = None
+        converted_model = False
         if graph_element is None:
-            assert hasattr(pt_module, "inlined_graph"), "graph_element must have inlined_graph"
+            converted_model = True
+            pt_module = self._get_scripted_model(pt_module, example_input, freeze)
             self.graph_element = pt_module.inlined_graph
         else:
             self.graph_element = graph_element
         self.pt_module = pt_module
-        self.raw_inputs = list(self.graph_element.inputs())
+        self.raw_inputs = [inp for inp in self.graph_element.inputs()]
         self.raw_outputs = list(self.graph_element.outputs())
+        if self._input_signature is not None and self.raw_inputs[0].debugName() == "self":
+            self._input_signature.insert(0, "self")
+
+    def _get_scripted_model(self, pt_module, example_inputs=None, freeze=True):
+        import torch
+        import inspect
+
+        def prepare_example_inputs(inputs, input_signature):
+            if inputs is not None:
+                if isinstance(inputs, dict):
+                    if input_signature is not None:
+                        ordered_inputs = []
+                        used_sign = []
+                        for key in input_signature:
+                            if key not in inputs:
+                                continue
+                            ordered_inputs.append(inputs[key])
+                            used_sign.append(key)
+                        inputs = ordered_inputs
+                        input_signature = used_sign
+                    else:
+                        inputs = list(inputs.values())
+                        input_signature = input_signature[:len(inputs)]
+                if isinstance(inputs, torch.Tensor):
+                    inputs = [inputs]
+            return inputs, input_signature
+
+        pt_module.eval()
+        input_signature = None
+        if isinstance(pt_module, torch.nn.Module) and not isinstance(pt_module, (torch.jit._trace.TopLevelTracedModule, torch.jit._script.RecursiveScriptModule)):
+            input_signature = list(inspect.signature(pt_module.forward).parameters.keys())
+            try:
+                scripted = torch.jit.script(pt_module)
+            except Exception as scripting_err:
+                if example_inputs is not None:
+                    inputs, input_signature = prepare_example_inputs(example_inputs, input_signature)
+                    try:
+                        scripted = torch.jit.trace(pt_module, inputs)
+                    except Exception as tracing_e:
+                        raise tracing_e
+                else:
+                    raise scripting_err
+        else:
+            scripted = pt_module
+        if freeze:
+            try:
+                f_model = torch.jit.freeze(scripted)
+            except Exception:
+                # usually freezing failed when model already frozen for inference
+                f_model = scripted
+        else:
+            f_model = scripted
+        self._input_signature = input_signature
+        return f_model
 
     def inputs(self) -> list:
         return [x.unique() for x in self.raw_inputs]
@@ -114,6 +170,11 @@ def get_input(self, index: int):
     def get_input_debug_name(self, index: int) -> str:
         return self._raw_input(index).debugName()
 
+    def get_input_signature_name(self, index: int) -> str:
+        if self._input_signature is not None:
+            return self._input_signature[index]
+        return self.get_input_debug_name(index)
+
     def get_input_shape(self, index: int):
         raw_input = self._raw_input(index)
         return self.get_shape_for_value(raw_input)
diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
index 848b36554ea202..4151a030257060 100644
--- a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
@@ -26,6 +26,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
         PYBIND11_OVERRIDE_PURE(const std::string&, TorchDecoder, get_input_debug_name, index);
     }
 
+    const std::string& get_input_signature_name(size_t index) const override {
+        PYBIND11_OVERRIDE_PURE(const std::string&, TorchDecoder, get_input_signature_name, index);
+    }
+
     ov::PartialShape get_input_shape(size_t index) const override {
         PYBIND11_OVERRIDE_PURE(ov::PartialShape, TorchDecoder, get_input_shape, index);
     }
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
index b48c1d1065e7a9..963a5e051c9a35 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp
@@ -34,6 +34,9 @@ class TorchDecoder : public IDecoder {
     // Return debug name of the input tensor
     virtual const std::string& get_input_debug_name(size_t index) const = 0;
 
+    // Return signature name of the input tensor
+    virtual const std::string& get_input_signature_name(size_t index) const = 0;
+
     // Return shape if inputs has torch::Tensor type in the original model, otherwise returns the shape [] of a scalar
     virtual PartialShape get_input_shape(size_t index) const = 0;
 
diff --git a/src/frontends/pytorch/src/input_model.cpp b/src/frontends/pytorch/src/input_model.cpp
index ae4d15f6c430b0..c03bcdcee0adfc 100644
--- a/src/frontends/pytorch/src/input_model.cpp
+++ b/src/frontends/pytorch/src/input_model.cpp
@@ -45,7 +45,9 @@ std::vector<ov::frontend::Place::Ptr> InputModel::get_inputs() const {
     for (const auto& input_idx : m_model_decoder->inputs()) {
         auto place_it = m_name_to_place.find(std::to_string(input_idx));
         FRONT_END_GENERAL_CHECK(place_it != m_name_to_place.end(), "Couldn't find Place for input.");
-        res.push_back(place_it->second);
+        if (input_idx != 0) {
+            res.push_back(place_it->second);
+        }
     }
     return res;
 }
diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp
index 49495749d570f4..9f2b4716d6e6a4 100644
--- a/src/frontends/pytorch/src/node_context.cpp
+++ b/src/frontends/pytorch/src/node_context.cpp
@@ -46,7 +46,10 @@ void NodeContext::mutate_input(size_t index, Output<Node> ov_output) const {
     FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index);
     auto input_id = m_decoder_inputs.at(index);
     FRONT_END_GENERAL_CHECK(m_tensor_map->count(input_id), "No tensor corresponding input: ", input_id, " exist.");
-    m_translate_session->encode_tensor_name(ov_output, input_id, m_decoder->get_input_debug_name(index));
+    m_translate_session->encode_tensor_name(
+        ov_output,
+        input_id,
+        {m_decoder->get_input_debug_name(index), m_decoder->get_input_signature_name(index)});
     (*m_tensor_map)[input_id] = ov_output;
     m_mutated_tensors->insert(input_id);
 }
diff --git a/src/frontends/pytorch/src/place.cpp b/src/frontends/pytorch/src/place.cpp
index 0bf0da388ba0e0..9fbf55e5c4c94f 100644
--- a/src/frontends/pytorch/src/place.cpp
+++ b/src/frontends/pytorch/src/place.cpp
@@ -30,6 +30,11 @@ Place::Place(const ov::frontend::InputModel& input_model, size_t tensor_index)
         if (debug_name != m_names.at(0)) {
             m_names.push_back(debug_name);
         }
+        const auto& signature_name =
+            im->m_model_decoder->get_input_signature_name(std::distance(inputs.begin(), in_it));
+        if (signature_name != m_names.at(0) && signature_name != debug_name) {
+            m_names.push_back(signature_name);
+        }
     }
     auto out_it = std::find(outputs.begin(), outputs.end(), tensor_index);
     if (out_it != outputs.end()) {
diff --git a/src/frontends/pytorch/src/place.hpp b/src/frontends/pytorch/src/place.hpp
index 3f91d20ed83abd..540381efe7d74d 100644
--- a/src/frontends/pytorch/src/place.hpp
+++ b/src/frontends/pytorch/src/place.hpp
@@ -33,6 +33,14 @@ class Place : public ov::frontend::Place {
         return m_tensor_index;
     }
 
+    bool is_equal_data(const Ptr& another) const override {
+        const auto another_pt = dynamic_cast<ov::frontend::pytorch::Place*>(another.get());
+        if (!another_pt) {
+            return false;
+        }
+        return m_tensor_index == another_pt->get_tensor_index();
+    }
+
 private:
     const ov::frontend::InputModel& m_input_model;
     const size_t m_tensor_index;
diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp
index 89e19a1609ee0e..4eb16cb0f78aa9 100644
--- a/src/frontends/pytorch/src/translate_session.cpp
+++ b/src/frontends/pytorch/src/translate_session.cpp
@@ -84,7 +84,10 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
             }
             if (!input_node) {
                 auto parameter = std::make_shared<v0::Parameter>(type, pshape);
-                encode_tensor_name(parameter->output(0), inputs.at(i), pytorch_model->get_input_debug_name(i));
+                encode_tensor_name(
+                    parameter->output(0),
+                    inputs.at(i),
+                    {pytorch_model->get_input_debug_name(i), pytorch_model->get_input_signature_name(i)});
                 parameters->push_back(parameter);
                 input_node = parameter;
                 auto order = pytorch_model->get_input_transpose_order(i);
@@ -148,7 +151,7 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
                                         "Duplicated producer for PT value with unique ID: ",
                                         fw_tensor_id);
                 (*tensor_map)[fw_tensor_id] = converted_outputs[i];
-                encode_tensor_name(converted_outputs[i], fw_tensor_id, node->get_output_debug_name(i));
+                encode_tensor_name(converted_outputs[i], fw_tensor_id, {node->get_output_debug_name(i)});
             }
         };
 
@@ -221,32 +224,29 @@ OutputVector TranslateSession::convert_node(const NodeContext& context) {
     return make_framework_node(context);
 }
 
-void TranslateSession::encode_tensor_name(Output<Node> output, size_t tensor_idx, std::string debug_name) {
+void TranslateSession::encode_tensor_name(Output<Node> output,
+                                          size_t tensor_idx,
+                                          std::vector<std::string> additional_names) {
     if (!output.get_names().empty()) {
         OPENVINO_DEBUG << "Tensor names already exist: " << output.get_any_name() << ". Rewriting with " << tensor_idx;
     }
-    auto has_dname = !debug_name.empty();
     auto name = std::to_string(tensor_idx);
-    if (has_dname && name == debug_name)
-        has_dname = false;
+    std::unordered_set<std::string> names;
+    names.insert(name);
+    if (additional_names.size() > 0) {
+        names.insert(additional_names.begin(), additional_names.end());
+    }
 
     if (m_counter_map.count(tensor_idx)) {
         auto&& pair = m_counter_map[tensor_idx];
         auto new_name = name + '_' + std::to_string(++pair.first);
         pair.second.set_names({new_name});
         pair.second = output;
-        if (has_dname) {
-            output.set_names({name, debug_name});
-        } else {
-            output.set_names({name});
-        }
+        output.set_names(names);
+
     } else {
         m_counter_map[tensor_idx] = {0, output};
-        if (has_dname) {
-            output.set_names({name, debug_name});
-        } else {
-            output.set_names({name});
-        }
+        output.set_names(names);
     }
 }
 
diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp
index 939cba7d1bdc21..fb1c06a48e8ce8 100644
--- a/src/frontends/pytorch/src/translate_session.hpp
+++ b/src/frontends/pytorch/src/translate_session.hpp
@@ -35,7 +35,9 @@ class TranslateSession {
         const TensorMap& external_tensor_map = {},
         const std::unordered_map<size_t, PlaceDesc>& external_descriptors = {});
 
-    void encode_tensor_name(Output<Node> tensor_desc, size_t tensor_idx, std::string debug_name = "");
+    void encode_tensor_name(Output<Node> tensor_desc,
+                            size_t tensor_idx,
+                            std::vector<std::string> additional_names = {});
     size_t decode_tensor_name(const Output<Node>& tensor_desc);
 
     size_t m_friendly_name_counter = 0;
diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
index 55825f35dbe75c..a4649f423909e1 100644
--- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
@@ -10,6 +10,7 @@
 import torch
 import unittest
 from openvino.runtime import PartialShape, Dimension, Model, Type
+from openvino.tools.mo import InputCutInfo
 
 from common.mo_convert_test_class import CommonMOConvertTest
 
@@ -27,6 +28,7 @@ def forward(self, in_positions):
 
 def make_pt_model_one_input():
     from torch import nn
+
     class NeuralNetwork(nn.Module):
         def __init__(self):
             super(NeuralNetwork, self).__init__()
@@ -44,6 +46,7 @@ def forward(self, x):
 
 def make_pt_model_two_inputs():
     from torch import nn
+
     class NeuralNetwork(nn.Module):
         def __init__(self):
             super(NeuralNetwork, self).__init__()
@@ -72,8 +75,10 @@ def make_ref_pt_model_one_input(shape, dtype=np.float32):
 
 def make_ref_pt_model_two_inputs(shape, dtype=np.float32):
     if len(shape) == 2:
-        param1 = ov.opset8.parameter(PartialShape(shape[0]), name="input_0", dtype=dtype)
-        param2 = ov.opset8.parameter(PartialShape(shape[1]), name="input_1", dtype=dtype)
+        param1 = ov.opset8.parameter(PartialShape(
+            shape[0]), name="input_0", dtype=dtype)
+        param2 = ov.opset8.parameter(PartialShape(
+            shape[1]), name="input_1", dtype=dtype)
     else:
         shape = PartialShape(shape)
         param1 = ov.opset8.parameter(shape, name="input_0", dtype=dtype)
@@ -176,7 +181,8 @@ def create_pytorch_nn_module_sample_input_int32_two_inputs(tmp_dir):
     sample_input1 = torch.zeros(1, 3, 10, 10, dtype=torch.int32)
     sample_input2 = torch.zeros(1, 3, 10, 10, dtype=torch.int32)
     sample_input = sample_input1, sample_input2
-    ref_model = make_ref_pt_model_two_inputs([PartialShape([-1, 3, -1, -1]), inp_shapes[1]], dtype=np.int32)
+    ref_model = make_ref_pt_model_two_inputs(
+        [PartialShape([-1, 3, -1, -1]), inp_shapes[1]], dtype=np.int32)
 
     return pt_model, ref_model, {'input_shape': inp_shapes,
                                  'example_input': sample_input, 'onnx_opset_version': 11, "use_legacy_frontend": True}
@@ -188,7 +194,8 @@ def create_pytorch_nn_module_compare_convert_paths_case1(tmp_dir):
 
     sample_input = torch.zeros(1, 3, 10, 10, dtype=torch.int32)
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': sample_input, 'onnx_opset_version': 16, "use_legacy_frontend": True}
@@ -200,7 +207,8 @@ def create_pytorch_nn_module_compare_convert_paths_case2(tmp_dir):
 
     sample_input = torch.zeros(1, 3, 10, 10, dtype=torch.int32)
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': sample_input,
@@ -216,7 +224,8 @@ def create_pytorch_nn_module_compare_convert_paths_case3(tmp_dir):
 
     sample_input = torch.zeros(1, 3, 10, 10, dtype=torch.float32)
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'input_shape': [1, 3, 10, 10],
@@ -232,7 +241,8 @@ def create_pytorch_nn_module_compare_convert_paths_case4(tmp_dir):
     sample_input = (sample_input1, sample_input2)
 
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
 
@@ -248,7 +258,8 @@ def create_pytorch_nn_module_compare_convert_paths_case5(tmp_dir):
     sample_input = tuple([sample_input1, sample_input2])
 
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
 
@@ -266,7 +277,8 @@ def create_pytorch_nn_module_compare_convert_paths_case6(tmp_dir):
     sample_input = tuple([sample_input1, sample_input2])
 
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, sample_input, onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, sample_input,
+                      onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
 
@@ -302,7 +314,8 @@ def create_pytorch_nn_module_sample_input_numpy(tmp_dir):
 
     example_inputs = np.array(torch.zeros(1, 3, 10, 10, dtype=torch.int32))
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, torch.zeros(1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, torch.zeros(
+        1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': example_inputs,
@@ -314,9 +327,11 @@ def create_pytorch_nn_module_sample_input_dict(tmp_dir):
     from openvino.tools.mo import convert_model
     pt_model = make_pt_model_one_input()
 
-    example_inputs = {"x": np.array(torch.zeros(1, 3, 10, 10, dtype=torch.int32))}
+    example_inputs = {"x": np.array(
+        torch.zeros(1, 3, 10, 10, dtype=torch.int32))}
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, torch.zeros(1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, torch.zeros(
+        1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': example_inputs,
@@ -345,7 +360,8 @@ def create_pytorch_nn_module_sample_list_of_tensors(tmp_dir):
     example_inputs = [torch.zeros(3, 10, 10, dtype=torch.float32)]
 
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, torch.unsqueeze(example_inputs[0], 0), onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, torch.unsqueeze(
+        example_inputs[0], 0), onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': example_inputs,
@@ -359,7 +375,8 @@ def create_pytorch_nn_module_sample_input_ov_host_tensor(tmp_dir):
 
     sample_input = Tensor(np.zeros([1, 3, 10, 10], dtype=np.int32))
     onnx_model_path = os.path.join(tmp_dir, 'export.onnx')
-    torch.onnx.export(pt_model, torch.zeros(1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
+    torch.onnx.export(pt_model, torch.zeros(
+        1, 3, 10, 10, dtype=torch.int32), onnx_model_path, opset_version=16)
 
     ref_model = convert_model(onnx_model_path)
     return pt_model, ref_model, {'example_input': sample_input,
@@ -397,8 +414,10 @@ def create_pytorch_nn_module_layout_list(tmp_dir):
     ref_model.inputs[0].node.layout = Layout('nchw')
     ref_model.inputs[1].node.layout = Layout('nhwc')
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'layout': ['nchw', Layout('nhwc')],
-                                 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {
+        'input_shape': [shape, shape], 'layout': ['nchw', Layout('nhwc')],
+        "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]
+        }
 
 
 def create_pytorch_nn_module_layout_list_case2(tmp_dir):
@@ -411,8 +430,9 @@ def create_pytorch_nn_module_layout_list_case2(tmp_dir):
     ref_model.inputs[0].node.layout = Layout('nchw')
     ref_model.inputs[1].node.layout = Layout('nhwc')
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'layout': ('nchw', Layout('nhwc')),
-                                 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {
+        'input_shape': [shape, shape], 'layout': ('nchw', Layout('nhwc')), 
+        "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_mean_list(tmp_dir):
@@ -433,8 +453,9 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]],
-                                 'onnx_opset_version': 11, 'compress_to_fp16': False, "use_legacy_frontend": True}
+    return pt_model, ref_model, {
+        'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], 'compress_to_fp16': False, 
+        "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
@@ -447,9 +468,11 @@ def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
     param1 = ov.opset8.parameter(shape)
     param2 = ov.opset8.parameter(shape)
     const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const1_decompressed = ov.opset8.convert(const1, destination_type=np.float32)
+    const1_decompressed = ov.opset8.convert(
+        const1, destination_type=np.float32)
     const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const2_decompressed = ov.opset8.convert(const2, destination_type=np.float32)
+    const2_decompressed = ov.opset8.convert(
+        const2, destination_type=np.float32)
     sub1 = ov.opset8.subtract(param1, const1_decompressed)
     sub2 = ov.opset8.subtract(param2, const2_decompressed)
     add = ov.opset8.add(sub1, sub2)
@@ -459,8 +482,7 @@ def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]],
-                                 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_mean_list_compressin_enabled(tmp_dir):
@@ -470,12 +492,10 @@ def create_pytorch_nn_module_mean_list_compressin_enabled(tmp_dir):
     shape = PartialShape(shape)
     param1 = ov.opset8.parameter(shape)
     param2 = ov.opset8.parameter(shape)
-    const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const1_decompressed = ov.opset8.convert(const1, destination_type=np.float32)
-    const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const2_decompressed = ov.opset8.convert(const2, destination_type=np.float32)
-    sub1 = ov.opset8.subtract(param1, const1_decompressed)
-    sub2 = ov.opset8.subtract(param2, const2_decompressed)
+    const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
+    const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
+    sub1 = ov.opset8.subtract(param1, const1)
+    sub2 = ov.opset8.subtract(param2, const2)
     add = ov.opset8.add(sub1, sub2)
     relu = ov.opset8.relu(add)
     sigm = ov.opset8.sigmoid(relu)
@@ -483,8 +503,9 @@ def create_pytorch_nn_module_mean_list_compressin_enabled(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]],
-                                 'onnx_opset_version': 11, 'compress_to_fp16': True, "use_legacy_frontend": True}
+    return pt_model, ref_model, {
+        'input_shape': [shape, shape], 'mean_values': [[0, 0, 0], [0, 0, 0]], 
+        'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_scale_list(tmp_dir):
@@ -505,8 +526,7 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]],
-                                 'onnx_opset_version': 11, 'compress_to_fp16': False, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
@@ -519,9 +539,11 @@ def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
     param1 = ov.opset8.parameter(shape)
     param2 = ov.opset8.parameter(shape)
     const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
-    const1_decompressed = ov.opset8.convert(const1, destination_type=np.float32)
+    const1_decompressed = ov.opset8.convert(
+        const1, destination_type=np.float32)
     const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
-    const2_decompressed = ov.opset8.convert(const2, destination_type=np.float32)
+    const2_decompressed = ov.opset8.convert(
+        const2, destination_type=np.float32)
     sub1 = ov.opset8.multiply(param1, const1_decompressed)
     sub2 = ov.opset8.multiply(param2, const2_decompressed)
     add = ov.opset8.add(sub1, sub2)
@@ -531,8 +553,7 @@ def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]],
-                                 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir):
@@ -555,44 +576,47 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir):
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
 
-    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]],
-                                 'onnx_opset_version': 11, 'compress_to_fp16': True, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]],  "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)],
+                                 'compress_to_fp16': True}
 
 
 def create_pytorch_nn_module_shapes_list_static(tmp_dir):
     pt_model = make_pt_model_two_inputs()
     ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20])
 
-    return pt_model, ref_model, {'input_shape': [[1, 3, 20, 20], [1, 3, 20, 20]], 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': [[1, 3, 20, 20], [1, 3, 20, 20]], "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir):
     pt_model = make_pt_model_two_inputs()
-    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], [-1, 3, 20, Dimension(-1, 20)]]
+    inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)],
+                  [-1, 3, 20, Dimension(-1, 20)]]
 
-    param1 = ov.opset8.parameter(PartialShape(inp_shapes[0]), name="input_0", dtype=np.float32)
-    param2 = ov.opset8.parameter(PartialShape(inp_shapes[1]), name="input_1", dtype=np.float32)
+    param1 = ov.opset8.parameter(PartialShape(
+        inp_shapes[0]), name="x", dtype=np.float32)
+    param2 = ov.opset8.parameter(PartialShape(
+        inp_shapes[1]), name="y", dtype=np.float32)
     add = ov.opset8.add(param1, param2)
     relu = ov.opset8.relu(add)
     sigm = ov.opset8.sigmoid(relu)
 
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {'input_shape': inp_shapes, 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': inp_shapes, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
 def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
     inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]]
     ref_model = make_ref_pt_model_one_input(inp_shapes[0])
-    return pt_model, ref_model, {'input_shape': inp_shapes, 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': inp_shapes, "input": InputCutInfo("x", None, "f32", None)}
 
 
 def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir):
     pt_model = make_pt_model_one_input()
     inp_shapes = [[1, 3, 20, 20]]
     ref_model = make_ref_pt_model_one_input(inp_shapes[0])
-    return pt_model, ref_model, {'input_shape': inp_shapes, 'onnx_opset_version': 11, "use_legacy_frontend": True}
+    return pt_model, ref_model, {'input_shape': inp_shapes, "input": InputCutInfo("x", None, "f32", None)}
 
 
 def create_pytorch_nn_module_convert_pytorch_frontend1(tmp_dir):
@@ -605,7 +629,10 @@ def create_pytorch_nn_module_convert_pytorch_frontend1(tmp_dir):
 
     parameter_list = [param]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {"example_input": torch.zeros((1, 3, 10, 10))}
+    return pt_model, ref_model, {
+        "example_input": torch.zeros((1, 3, 10, 10)),
+        "input": [InputCutInfo("x", [-1, -1, -1, -1], "f32", None)]
+    }
 
 
 def create_pytorch_nn_module_convert_pytorch_frontend2(tmp_dir):
@@ -620,39 +647,46 @@ def create_pytorch_nn_module_convert_pytorch_frontend2(tmp_dir):
     ref_model = Model([sigm], parameter_list, "test")
     ref_model.input(0).get_node().set_element_type(Type.i32)
     ref_model.validate_nodes_and_infer_types()
-    return pt_model, ref_model, {"example_input": torch.zeros((1, 3, 10, 10), dtype=torch.int32)}
+    return pt_model, ref_model, {
+        "example_input": torch.zeros((1, 3, 10, 10), dtype=torch.int32),
+        "input": [InputCutInfo("x", [-1, -1, -1, -1], "i32", None)]
+    }
 
 
 def create_pytorch_nn_module_convert_pytorch_frontend3(tmp_dir):
     pt_model = make_pt_model_two_inputs()
     shape = [-1, -1, -1, -1]
     shape = PartialShape(shape)
-    param1 = ov.opset10.parameter(shape)
-    param2 = ov.opset10.parameter(shape)
-    param2_convert = ov.opset10.convert_like(param2, param1)
-    add = ov.opset10.add(param1, param2_convert)
+    param1 = ov.opset10.parameter(shape, dtype=np.float32)
+    param2 = ov.opset10.parameter(shape, dtype=np.float32)
+    add = ov.opset10.add(param1, param2)
     relu = ov.opset10.relu(add)
     sigm = ov.opset10.sigmoid(relu)
 
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {"example_input": [torch.zeros((1, 3, 10, 10)), torch.ones((1, 3, 10, 10))]}
+    return pt_model, ref_model, {
+        "example_input": [torch.zeros((1, 3, 10, 10)), torch.ones((1, 3, 10, 10))],
+        "input": [InputCutInfo("x", [-1, -1, -1, -1], "f32", None), InputCutInfo("y", [-1, -1, -1, -1], "f32", None)]
+    }
 
 
 def create_pytorch_nn_module_convert_pytorch_frontend4(tmp_dir):
     pt_model = make_pt_model_two_inputs()
     shape = [-1, -1, -1, -1]
     shape = PartialShape(shape)
-    param1 = ov.opset10.parameter(shape)
-    param2 = ov.opset10.parameter(shape)
-    param2_convert = ov.opset10.convert_like(param2, param1)
-    add = ov.opset10.add(param1, param2_convert)
+    param1 = ov.opset10.parameter(shape, dtype=np.float32)
+    param2 = ov.opset10.parameter(shape, dtype=np.float32)
+    add = ov.opset10.add(param1, param2)
     relu = ov.opset10.relu(add)
     sigm = ov.opset10.sigmoid(relu)
 
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return pt_model, ref_model, {"example_input": {"x": torch.zeros((1, 3, 10, 10)), "y": torch.ones((1, 3, 10, 10))}}
+    return pt_model, ref_model, {
+        "example_input": {"x": torch.zeros((1, 3, 10, 10), dtype=torch.float32), "y": torch.ones((1, 3, 10, 10), dtype=torch.float32)},
+        "input": [InputCutInfo("x", [-1, -1, -1, -1], "f32", None), InputCutInfo("y", [-1, -1, -1, -1], "f32", None)]
+    }
 
 
 def create_pytorch_jit_script_module_convert_pytorch_frontend(tmp_dir):
@@ -662,15 +696,16 @@ def create_pytorch_jit_script_module_convert_pytorch_frontend(tmp_dir):
     scripted_model = torch.jit.script(net)
     shape = [-1, -1, -1, -1]
     shape = PartialShape(shape)
-    param1 = ov.opset10.parameter(shape)
-    param2 = ov.opset10.parameter(shape)
-    param2_convert = ov.opset10.convert_like(param2, param1)
-    add = ov.opset10.add(param1, param2_convert)
+    param1 = ov.opset10.parameter(shape, dtype=np.float32)
+    param2 = ov.opset10.parameter(shape, dtype=np.float32)
+    add = ov.opset10.add(param1, param2)
     relu = ov.opset10.relu(add)
     sigm = ov.opset10.sigmoid(relu)
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return scripted_model, ref_model,  {"example_input": {"x": torch.zeros((1, 3, 10, 10)), "y": torch.ones((1, 3, 10, 10))}}
+    return scripted_model, ref_model,  {
+        "example_input": {"x": torch.zeros((1, 3, 10, 10)), "y": torch.ones((1, 3, 10, 10))},
+        "input": [InputCutInfo("x.1", [-1, -1, -1, -1], "f32", None), InputCutInfo("y.1", [-1, -1, -1, -1], "f32", None)]}
 
 
 def create_pytorch_jit_trace_module_convert_pytorch_frontend(tmp_dir):
@@ -681,15 +716,15 @@ def create_pytorch_jit_trace_module_convert_pytorch_frontend(tmp_dir):
     scripted_model = torch.jit.trace(net, example_input)
     shape = [-1, -1, -1, -1]
     shape = PartialShape(shape)
-    param1 = ov.opset10.parameter(shape)
-    param2 = ov.opset10.parameter(shape)
-    param2_convert = ov.opset10.convert_like(param2, param1)
-    add = ov.opset10.add(param1, param2_convert)
+    param1 = ov.opset10.parameter(shape, dtype=np.float32)
+    param2 = ov.opset10.parameter(shape, dtype=np.float32)
+    add = ov.opset10.add(param1, param2)
     relu = ov.opset10.relu(add)
     sigm = ov.opset10.sigmoid(relu)
     parameter_list = [param1, param2]
     ref_model = Model([sigm], parameter_list, "test")
-    return scripted_model, ref_model,  {"example_input": example_input}
+    return scripted_model, ref_model,  {"example_input": example_input, "input": [
+        InputCutInfo("x", [-1, -1, -1, -1], "f32", None), InputCutInfo("y", [-1, -1, -1, -1], "f32", None)]}
 
 
 class TestMoConvertPyTorch(CommonMOConvertTest):
@@ -736,9 +771,9 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
         create_pytorch_jit_trace_module_convert_pytorch_frontend
     ]
 
-    @pytest.mark.parametrize("create_model", test_data)
-    @pytest.mark.nightly
-    @pytest.mark.precommit
+    @ pytest.mark.parametrize("create_model", test_data)
+    @ pytest.mark.nightly
+    @ pytest.mark.precommit
     def test_mo_import_from_memory(self, create_model, ie_device, precision, ir_version,
                                    temp_dir, use_new_frontend, use_old_api):
         fw_model, graph_ref, mo_params = create_model(temp_dir)
@@ -746,7 +781,8 @@ def test_mo_import_from_memory(self, create_model, ie_device, precision, ir_vers
         test_params = {'input_model': fw_model}
         if mo_params is not None:
             test_params.update(mo_params)
-        self._test_by_ref_graph(temp_dir, test_params, graph_ref, compare_tensor_names=False)
+        self._test_by_ref_graph(temp_dir, test_params,
+                                graph_ref, compare_tensor_names=False)
 
 
 def create_pt_model_with_custom_op():
@@ -773,4 +809,5 @@ def test_onnx_fallthrough(self):
 
         # Check that ONNX conversion passed, so ONNX frontend raises error message of unsupported op.
         with self.assertRaisesRegex(RuntimeError, ".*OpenVINO does not support the following ONNX operations: MyTorchOp.*"):
-            convert_model(pytorch_model, input_shape=[1, 2, 3], use_legacy_frontend=True)
+            convert_model(pytorch_model, input_shape=[
+                          1, 2, 3], use_legacy_frontend=True)
diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
index 3caddbf3f1d069..eec93596006522 100644
--- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
+++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
@@ -48,13 +48,17 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti
             inputs = self._prepare_input()
         with torch.no_grad():
             model.eval()
-            if not kwargs.get('trace_model', False):
-                model = torch.jit.script(model)
+            torch_inputs = [torch.from_numpy(inp) if isinstance(
+                inp, np.ndarray) else inp for inp in inputs]
+            trace_model = kwargs.get('trace_model', False)
+            freeze_model = kwargs.get('freeze_model', True)
+            use_mo_convert = kwargs.get("use_mo_convert", True)
+            if not freeze_model or not use_mo_convert:
+                model, converted_model = self.convert_directly_via_frontend(
+                    model, torch_inputs, trace_model, dynamic_shapes, inputs, freeze_model)
             else:
-                torch_inputs = [torch.from_numpy(inp) for inp in inputs]
-                model = torch.jit.trace(model, deepcopy(torch_inputs))
-            if kwargs.get('freeze_model', True):
-                model = torch.jit.freeze(model)
+                model, converted_model = self.convert_via_mo(
+                    model, torch_inputs, trace_model, dynamic_shapes, inputs)
             graph = model.inlined_graph
             print(graph)
 
@@ -62,36 +66,12 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti
                 kind = [kind]
             if kind is not None:
                 for op in kind:
-                    assert self._check_kind_exist(graph, op), f"Operation {op} type doesn't exist in provided graph"
-
-            fe_manager = FrontEndManager()
-            fe = fe_manager.load_by_framework('pytorch')
-
-            decoder = TorchScriptPythonDecoder(model)
-
-            im = fe.load(decoder)
-            om = fe.convert(im)
-
-        torch_inps = [torch.from_numpy(inp) if isinstance(inp, np.ndarray) else inp for inp in inputs]
-        
-        params = om.get_parameters()
-        # todo: support lists and dicts
-        for i in range(len(inputs)):
-            inp = inputs[i]
-            if isinstance(inp, list):
-                inputs[i] = np.array(inp)
-                if inputs[i].dtype == np.int64:
-                    inputs[i] = inputs[i].astype(np.int32)
-                inp = inputs[i]
-            assert inp.dtype.name in self._type_map, f"Unknown type {inp.dtype}."
-            params[i].set_element_type(self._type_map[inp.dtype.name])
-            shape = [-1] * len(inp.shape) if dynamic_shapes else inp.shape
-            params[i].set_partial_shape(PartialShape(shape))
-        om.validate_nodes_and_infer_types()
+                    assert self._check_kind_exist(
+                        graph, op), f"Operation {op} type doesn't exist in provided graph"
 
         # OV infer:
         core = Core()
-        compiled = core.compile_model(om, ie_device)
+        compiled = core.compile_model(converted_model, ie_device)
         infer_res = compiled(deepcopy(inputs))
 
         if hasattr(self, 'skip_framework') and self.skip_framework:
@@ -99,7 +79,7 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti
             return
 
         # Framework infer:
-        fw_res = model(*deepcopy(torch_inps))
+        fw_res = model(*deepcopy(torch_inputs))
 
         if not isinstance(fw_res, (tuple)):
             fw_res = (fw_res,)
@@ -120,8 +100,8 @@ def flattenize_list_outputs(res):
                     results.extend(decomposed_res)
                     continue
                 results.append(res_item)
-            return results 
-       
+            return results
+
         flatten_fw_res = flattenize_list_outputs(fw_res)
 
         assert len(flatten_fw_res) == len(
@@ -130,7 +110,8 @@ def flattenize_list_outputs(res):
         for fw_tensor, ov_tensor in zip(flatten_fw_res, output_list):
             if not isinstance(fw_tensor, torch.Tensor):
                 if np.isscalar(fw_tensor):
-                    assert fw_tensor == np.array(ov_tensor).item(), f"{fw_tensor} != {np.array(ov_tensor).item()}"
+                    assert fw_tensor == np.array(ov_tensor).item(
+                    ), f"{fw_tensor} != {np.array(ov_tensor).item()}"
                 else:
                     if isinstance(fw_tensor, list):
                         ov_tensor = ov_tensor.tolist()
@@ -169,6 +150,59 @@ def flattenize_list_outputs(res):
     def _prepare_input(self):
         raise RuntimeError("Please provide inputs generation function")
 
+    def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs):
+        import torch
+        from openvino.tools.mo import convert_model
+        kwargs = {"example_input": example_input if len(
+            example_input) > 1 else example_input[0], "compress_to_fp16": False}
+        with torch.no_grad():
+            if trace_model:
+                model = torch.jit.trace(model, example_input)
+            else:
+                model = torch.jit.script(model)
+            model = torch.jit.freeze(model)
+            print(model)
+            if not dynamic_shapes:
+                input_shapes = [inp.shape for inp in ov_inputs]
+                kwargs["input_shape"] = input_shapes
+            om = convert_model(model, **kwargs)
+        self._resolve_input_shape_dtype(om, ov_inputs, dynamic_shapes)
+        return model, om
+
+    def convert_directly_via_frontend(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model):
+        import torch
+
+        fe_manager = FrontEndManager()
+        fe = fe_manager.load_by_framework('pytorch')
+
+        model.eval()
+        with torch.no_grad():
+            if trace_model:
+                model = torch.jit.trace(model, example_input)
+            else:
+                model = torch.jit.script(model)
+        decoder = TorchScriptPythonDecoder(model, freeze=freeze_model)
+        im = fe.load(decoder)
+        om = fe.convert(im)
+        self._resolve_input_shape_dtype(om, ov_inputs, dynamic_shapes)
+        return model, om
+
+    def _resolve_input_shape_dtype(self, om, ov_inputs, dynamic_shapes):
+        params = list(om.inputs)
+        for i in range(len(ov_inputs)):
+            inp = ov_inputs[i]
+            if isinstance(inp, list):
+                ov_inputs[i] = np.array(inp)
+                if ov_inputs[i].dtype == np.int64:
+                    ov_inputs[i] = ov_inputs[i].astype(np.int32)
+                inp = ov_inputs[i]
+            assert inp.dtype.name in self._type_map, f"Unknown type {inp.dtype}."
+            params[i].get_node().set_element_type(self._type_map[inp.dtype.name])
+            shape = [-1] * len(inp.shape) if dynamic_shapes else inp.shape
+            params[i].get_node().set_partial_shape(PartialShape(shape))
+        om.validate_nodes_and_infer_types()
+        return om
+
 
 def get_params(ie_device=None, precision=None):
     """
diff --git a/tests/layer_tests/pytorch_tests/test_addcmul.py b/tests/layer_tests/pytorch_tests/test_addcmul.py
index abe9e56e55036d..18acaf7d201ff4 100644
--- a/tests/layer_tests/pytorch_tests/test_addcmul.py
+++ b/tests/layer_tests/pytorch_tests/test_addcmul.py
@@ -39,7 +39,7 @@ def forward(self, x, y, z):
         [np.int32, 10],
         [np.int32, 110],
         [np.float32, 2.0],
-        [np.float32, 3.1],
+        [np.float32, 3.123],
         [np.float32, 4.5],
         [np.float64, 41.5],
         [np.float64, 24.5],
diff --git a/tests/layer_tests/pytorch_tests/test_batch_norm.py b/tests/layer_tests/pytorch_tests/test_batch_norm.py
index 8f94fe16ae4f9e..07d9733ceb827b 100644
--- a/tests/layer_tests/pytorch_tests/test_batch_norm.py
+++ b/tests/layer_tests/pytorch_tests/test_batch_norm.py
@@ -45,4 +45,4 @@ def forward(self, x):
     @pytest.mark.precommit
     def test_batch_norm(self, weights, bias, eps, ie_device, precision, ir_version, kwargs_to_prepare_input):
         self._test(*self.create_model(weights, bias, eps),
-                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, dynamic_shapes=False)
\ No newline at end of file
+                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, dynamic_shapes=False, use_mo_convert=False)
\ No newline at end of file
diff --git a/tests/layer_tests/pytorch_tests/test_instance_norm.py b/tests/layer_tests/pytorch_tests/test_instance_norm.py
index e512e50ca108a0..2fe3f5e13e066a 100644
--- a/tests/layer_tests/pytorch_tests/test_instance_norm.py
+++ b/tests/layer_tests/pytorch_tests/test_instance_norm.py
@@ -30,7 +30,7 @@ def __init__(self, weights=False, bias=False, mean_var=False, eps=1e-05):
                 if mean_var:
                     self.mean = torch.randn(weights_shape)
                     self.var = torch.randn(weights_shape)
-                
+
                 self.eps = eps
 
             def forward(self, x):
@@ -42,15 +42,16 @@ def forward(self, x):
 
     @pytest.mark.parametrize("params",
                              [
-                              {"eps": 0.0001},
-                              {'weights': True, 'eps': -0.05},
-                              {'weights': True},
-                              {'weights': True, 'bias': True},
-                              {"weights": True, 'bias': False, "mean_var": True},
-                              {"weights": True, 'bias': True, "mean_var": True},
-                              {"weights": False, 'bias': True, "mean_var": True},
-                              {"weights": False, 'bias': False, "mean_var": True},
-                              {"weights": False, 'bias': False, "mean_var": True, "eps": 1.5}
+                                 {"eps": 0.0001},
+                                 {'weights': True, 'eps': -0.05},
+                                 {'weights': True},
+                                 {'weights': True, 'bias': True},
+                                 {"weights": True, 'bias': False, "mean_var": True},
+                                 {"weights": True, 'bias': True, "mean_var": True},
+                                 {"weights": False, 'bias': True, "mean_var": True},
+                                 {"weights": False, 'bias': False, "mean_var": True},
+                                 {"weights": False, 'bias': False,
+                                  "mean_var": True, "eps": 1.5}
                              ])
     @pytest.mark.parametrize("kwargs_to_prepare_input", [
         {"ndim": 3},
@@ -61,4 +62,5 @@ def forward(self, x):
     @pytest.mark.precommit
     def test_group_norm(self, params, ie_device, precision, ir_version, kwargs_to_prepare_input):
         self._test(*self.create_model(**params),
-                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, dynamic_shapes=not params.get("mean_var", False))
\ No newline at end of file
+                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, 
+                   dynamic_shapes=not params.get("mean_var", False), use_mo_convert=False)
diff --git a/tests/layer_tests/pytorch_tests/test_listunpack.py b/tests/layer_tests/pytorch_tests/test_listunpack.py
index 188cc4744246fd..43e1a7420f0fb7 100644
--- a/tests/layer_tests/pytorch_tests/test_listunpack.py
+++ b/tests/layer_tests/pytorch_tests/test_listunpack.py
@@ -13,10 +13,10 @@
 class TestListUnpack(PytorchLayerTest):
     def _prepare_input(self):
         return (
-            np.random.randn(8, 3, 512, 512),
-            np.random.randn(1, 3, 224, 224),
-            np.random.randn(10, 1, 8, 8),
-            np.random.randn(1, 1, 1, 1),
+            np.random.randn(8, 3, 512, 512).astype(np.float32),
+            np.random.randn(1, 3, 224, 224).astype(np.float32),
+            np.random.randn(10, 1, 8, 8).astype(np.float32),
+            np.random.randn(1, 1, 1, 1).astype(np.float32),
         )
 
     def create_model_size_listunpack(self):
@@ -122,7 +122,7 @@ def test_listconstruct_getitem_listunpack(
             *self.create_model_listconstruct_getitem_listunpack(idx),
             ie_device,
             precision,
-            ir_version
+            ir_version,
         )
 
 class TestMeshgridListUnpack(PytorchLayerTest):
diff --git a/tests/layer_tests/pytorch_tests/test_min_max.py b/tests/layer_tests/pytorch_tests/test_min_max.py
index 185e1cdfb38769..e962b48b72b3ab 100644
--- a/tests/layer_tests/pytorch_tests/test_min_max.py
+++ b/tests/layer_tests/pytorch_tests/test_min_max.py
@@ -135,7 +135,7 @@ def forward(self, x: float, y: float):
     @pytest.mark.precommit
     def test_min_max(self, case, kwargs_to_prepare_input, ie_device, precision, ir_version):
         self._test(*self.create_model(case),
-                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input)
+                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, use_mo_convert=False)
 
 class TestPrimMin(PytorchLayerTest):
     def _prepare_input(self, first_input, second_input, dtype="float"):
@@ -199,4 +199,4 @@ def forward(self, x: float, y: float):
     @pytest.mark.precommit
     def test_min(self, case, kwargs_to_prepare_input, ie_device, precision, ir_version):
         self._test(*self.create_model(case),
-                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input)
+                   ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, use_mo_convert=False)
diff --git a/tests/layer_tests/pytorch_tests/test_repeat_interleave.py b/tests/layer_tests/pytorch_tests/test_repeat_interleave.py
index 3a20e204d17306..dc937a7d27d784 100644
--- a/tests/layer_tests/pytorch_tests/test_repeat_interleave.py
+++ b/tests/layer_tests/pytorch_tests/test_repeat_interleave.py
@@ -73,4 +73,4 @@ def test_repeat_interleave_non_const_repeats(self, ie_device, precision, ir_vers
         self.repeats = input_data['repeats']
         dim = input_data['dim']
         self._test(*self.create_model_non_const_repeat(dim),
-                   ie_device, precision, ir_version)
+                   ie_device, precision, ir_version, dynamic_shapes=False, use_mo_convert=False)
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index e62610938b4afa..f773639ae07295 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -765,10 +765,9 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args):
                     args.pop("use_legacy_frontend")
                     return convert_pytorch_via_onnx(args, example_inputs, cli_parser, framework, _convert)
 
-                decoder, input_signature  = get_pytorch_decoder(args['input_model'], parse_input_shapes(args), example_inputs)
+                decoder = get_pytorch_decoder(args['input_model'], parse_input_shapes(args), example_inputs)
                 args['input_model'] = decoder
                 args["framework"] = "pytorch"
-                args["input_signature"] = input_signature
 
         argv = pack_params_to_args_namespace(args, cli_parser)
 
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/extractor.py b/tools/mo/openvino/tools/mo/moc_frontend/extractor.py
index 093067b60c5c38..60f6cbbe3c74f1 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/extractor.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/extractor.py
@@ -187,11 +187,12 @@ def fe_input_user_data_repack(
     """
     _input_shapes = []
     _input_names = []
+    model_inputs = input_model.get_inputs()
+    
     if isinstance(input_user_shapes, list) and len(input_user_shapes) > 1 and isinstance(input_user_shapes[0],
                                                                                          PartialShape):
         for shape in input_user_shapes:
             assert isinstance(shape, PartialShape), "Got incorrect format of input shapes."
-        model_inputs = input_model.get_inputs()
         assert len(model_inputs) == len(input_user_shapes)
         for idx, model_input in enumerate(model_inputs):
             _input_shapes.append({"node": model_input, "shape": input_user_shapes[idx]})
@@ -234,7 +235,6 @@ def fe_input_user_data_repack(
         # for example, --input_shape [3] --freeze_placeholder_with_value "is_training->False"
         # means the model has two inputs: one is is_training to be frozen, the other to re-write the shape
         # NOTE: the logic relies on parameters with the single name
-        model_inputs = input_model.get_inputs()
         frozen_names = freeze_placeholder.keys()
         assert len(model_inputs) == len(frozen_names) + 1, \
             "Please check the conversion command-line. Total number of model inputs ({} detected) " \
@@ -259,7 +259,7 @@ def fe_input_user_data_repack(
         # and they should not be changed and their properties (shape and type) should not be over-written
         # NOTE: the logic relies on parameters with the single name
         assert input_user_shapes is None
-        for node in input_model.get_inputs():
+        for node in model_inputs:
             assert len(node.get_names()) > 0, "Original model inputs must have tensor names."
             input_name = node.get_names()[0]
             _input_shapes.append(
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/pipeline.py b/tools/mo/openvino/tools/mo/moc_frontend/pipeline.py
index 8d2d9c4f9458b0..a08992f7c91fee 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/pipeline.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/pipeline.py
@@ -19,7 +19,6 @@
 from openvino.tools.mo.moc_frontend.extractor import fe_user_data_repack
 from openvino.tools.mo.utils.class_registration import get_enabled_and_disabled_transforms
 from openvino.tools.mo.utils.error import Error
-from openvino.tools.mo.moc_frontend.pytorch_frontend_utils import pytorch_process_after_convert
 
 
 def moc_pipeline(argv: argparse.Namespace, moc_front_end: FrontEnd):
@@ -75,9 +74,10 @@ def add_names_to_tensors(model: InputModel, places: List[Place]):
         # a model is not processed further in json analysis mode
         sys.exit(0)
 
+    model_inputs = input_model.get_inputs()
     inputs_equal = True
     if user_shapes:
-        inputs_equal = check_places_are_same(input_model.get_inputs(), user_shapes)
+        inputs_equal = check_places_are_same(model_inputs, user_shapes)
 
     outputs_equal = True
     if outputs:
@@ -196,7 +196,7 @@ def shape_to_array(shape: PartialShape):
     # Set batch size
     if argv.batch is not None and argv.batch > 0:
         log.debug('Setting batch size to {}'.format(argv.batch))
-        for place in input_model.get_inputs():
+        for place in model_inputs:
             old_partial_shape = input_model.get_partial_shape(place)
             old_shape_array = shape_to_array(old_partial_shape) if old_partial_shape.rank.is_static else []
             joined_name = ' '.join(place.get_names())
@@ -214,8 +214,4 @@ def shape_to_array(shape: PartialShape):
             input_model.set_partial_shape(place, new_partial_shape)
 
     ngraph_function = moc_front_end.convert(input_model)
-
-    # TO DO: remove as part of PyTorch frontend productization CVS-103615
-    if argv.framework == "pytorch":
-        pytorch_process_after_convert(argv, ngraph_function)
     return ngraph_function
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py
index 29939d7d988bed..6aefe606753203 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py
@@ -6,7 +6,7 @@
 import numpy as np
 from openvino.tools.mo.moc_frontend.shape_utils import get_static_shape, get_dynamic_dims, parse_input_shapes
 from openvino.tools.mo.utils.error import Error
-from openvino.runtime import PartialShape, Tensor
+from openvino.runtime import Tensor
 
 def get_onnx_temp_filename(output_dir):
     output_dir = output_dir if output_dir is not None else os.getcwd()
@@ -22,35 +22,15 @@ def remove_tmp_onnx_model(out_dir):
 
 
 def get_pytorch_decoder(model, input_shape, example_inputs):
-    import torch
-    import inspect
     try:
         from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder
     except Exception as e:
         log.error("PyTorch frontend loading failed")
         raise e
     inputs = prepare_torch_inputs(example_inputs, input_shape, allow_none=True)
-    model.eval()
-    input_signature = None
-    if isinstance(model, torch.nn.Module) and not isinstance(model, torch.jit._trace.TopLevelTracedModule):
-        input_signature = list(inspect.signature(model.forward).parameters.keys())
-        try:
-            scripted = torch.jit.script(model)
-        except Exception as scripting_err:
-            if example_inputs is not None:
-                try:
-                    scripted = torch.jit.trace(model, inputs)
-                except Exception as tracing_e:
-                    log.error('Both traicing and scripting failed')
-                    raise tracing_e
-            else:
-                log.error("Model scripting failed")
-                raise scripting_err
-    else:
-        scripted = model
-    f_model = torch.jit.freeze(scripted)
-    decoder = TorchScriptPythonDecoder(f_model)
-    return decoder, input_signature
+    decoder = TorchScriptPythonDecoder(model, example_input=inputs)
+        
+    return decoder
 
 
 def to_torch_tensor(tensor):
@@ -63,6 +43,8 @@ def to_torch_tensor(tensor):
         return torch.tensor(tensor)
     if isinstance(tensor, Tensor):
         return torch.tensor(tensor.data)
+    if isinstance(tensor, (float, int, bool)):
+        return tensor
     else:
         raise Error("Unexpected type of example_input. Supported types torch.Tensor, np.array or ov.Tensor. "
                     "Got {}".format(type(tensor)))
@@ -148,7 +130,6 @@ def convert_pytorch_via_onnx(args, example_inputs, cli_parser, framework, main_c
     args['example_input'] = None
     args['onnx_opset_version'] = None
     try:
-
         model_onnx = convert_pytorch_to_onnx(args['input_model'],
                                             parse_input_shapes(args),
                                             opset_version,
@@ -163,53 +144,3 @@ def convert_pytorch_via_onnx(args, example_inputs, cli_parser, framework, main_c
     finally:
         remove_tmp_onnx_model(out_dir)
     return ov_model, argv
-
-
-def pytorch_process_after_convert(argv, ov_model):
-    import torch
-    from openvino.frontend.pytorch.decoder import pt_to_ov_type_map
-
-    def add_tensor_name(input_desc, input_name):
-        tensor = input_desc.get_tensor()
-        input_names = tensor.names
-        input_names.update(input_name)
-        tensor.set_names(input_names)
-
-    example_inputs = getattr(argv, "example_input", None)
-    input_signature = getattr(argv, "input_signature", None)
-    provide_shapes = argv.input_shape is not None
-    if example_inputs is not None:
-        inputs = [example_inputs] if isinstance(example_inputs, torch.Tensor) else example_inputs
-        if input_signature is not None and isinstance(inputs, dict):
-            ordered_inputs = []
-            upd_sign = []
-            for key in input_signature:
-                if key not in inputs:
-                    continue
-                ordered_inputs.append(inputs[key])
-                upd_sign.append(key)
-            inputs = ordered_inputs
-            input_signature = upd_sign
-        for idx, input_tensor in enumerate(ov_model.inputs):
-            if isinstance(inputs, (list, tuple)):
-                input_data = inputs[idx]
-            else:
-                input_data = list(inputs.values())[idx]
-            pt_dtype = input_data.dtype if isinstance(input_data, torch.Tensor) else type(input_data)
-            dtype = pt_to_ov_type_map.get(str(pt_dtype))
-            if dtype is None:
-                raise f"Unknown input dtype {pt_dtype}"
-
-            input_tensor.get_node().set_element_type(dtype)
-            if input_signature is not None:
-                add_tensor_name(input_tensor, input_signature[idx])
-            if not provide_shapes:
-                # prevent dynamic rank issue
-                shape = [-1] * len(input_data.shape)
-                input_tensor.get_node().set_partial_shape(PartialShape(shape))
-            
-        ov_model.validate_nodes_and_infer_types() 
-    elif input_signature is not None:
-        for idx, input_tensor in enumerate(ov_model.inputs):
-            add_tensor_name(input_tensor, input_signature[idx])
-    return ov_model
diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py
index 836c1a5f0039e1..083e412be14e8f 100644
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -707,10 +707,6 @@ def transform_param_to_str(value):
                                        'For PyTorch it can be torch.Tensor.', '', '', None),
     'onnx_opset_version': ParamDescription('Version of ONNX opset that is used for converting from PyTorch to ONNX.',
                                            '', '', None),
-    'input_signature': ParamDescription('PyTorch model forward method input signature, ' 
-                                        'will be detected automatically for torch.nn.Module based model instances, '
-                                        'for for scripted models may requires to set manually. Example of usage: for forward method defined as'
-                                        ' def forward(self, x, y), it will be ["x", "y"]', '', '', None)
     }
 }
 
@@ -2010,7 +2006,7 @@ def get_mean_scale_dictionary(mean_values, scale_values, argv_input: str):
     res = {}
     # collect input names
     if argv_input:
-        inputs = argv_input.split(',')
+        inputs =  [get_node_name_with_port_from_input_value(input_value) for input_value in split_inputs(argv_input)]
     else:
         inputs = []
         if type(mean_values) is dict:

From 81b4666632775dac086e21ce1194d4c0b36e5f40 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Fri, 24 Mar 2023 18:17:36 +0100
Subject: [PATCH 087/296] Update tutorials (#16544)

---
 docs/nbdoc/consts.py |   2 +-
 docs/tutorials.md    | 127 +++++++++++++++++++++++--------------------
 2 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py
index d633d63d1e578e..946cfeb6842f44 100644
--- a/docs/nbdoc/consts.py
+++ b/docs/nbdoc/consts.py
@@ -8,7 +8,7 @@
 
 repo_name = "openvino_notebooks"
 
-artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20230317115622/dist/rst_files/"
+artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20230323220806/dist/rst_files/"
 
 blacklisted_extensions = ['.xml', '.bin']
 
diff --git a/docs/tutorials.md b/docs/tutorials.md
index b7f69cb75e48d7..aebf5c71aac3f1 100644
--- a/docs/tutorials.md
+++ b/docs/tutorials.md
@@ -105,7 +105,7 @@ Tutorials that explain how to optimize and quantize models with OpenVINO tools.
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 | `102-pytorch-onnx-to-openvino <notebooks/102-pytorch-onnx-to-openvino-with-output.html>`__                                    | Convert PyTorch models to OpenVINO IR.                                                                                                     | |n102-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `103-paddle-onnx-to-openvino <notebooks/103-paddle-onnx-to-openvino-classification-with-output.html>`__ |br| |n103|           | Convert PaddlePaddle models to OpenVINO IR.                                                                                                | |n103-img1|                               |
+| `103-paddle-onnx-to-openvino <notebooks/103-paddle-to-openvino-classification-with-output.html>`__ |br| |n103|                | Convert PaddlePaddle models to OpenVINO IR.                                                                                                | |n103-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 | `104-model-tools <notebooks/104-model-tools-with-output.html>`__ |br| |n104|                                                  | Download, convert and benchmark models from Open Model Zoo.                                                                                | |n104-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
@@ -117,24 +117,26 @@ Tutorials that explain how to optimize and quantize models with OpenVINO tools.
    +==============================================================================================================================+==================================================================================================================================+
    | `105-language-quantize-bert <notebooks/105-language-quantize-bert-with-output.html>`__                                       | Optimize and quantize a pre-trained BERT model                                                                                   |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
-   | `106-auto-device <notebooks/106-auto-device-with-output.html>`__                                                             | Demonstrates how to use AUTO Device                                                                                              |
+   | `106-auto-device <notebooks/106-auto-device-with-output.html>`__ |br| |n106|                                                 | Demonstrates how to use AUTO Device                                                                                              |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
    | `107-speech-recognition-quantization <notebooks/107-speech-recognition-quantization-with-output.html>`__                     | Optimize and quantize a pre-trained Wav2Vec2 speech model                                                                        |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
-   | `110-ct-segmentation-quantize <notebooks/110-ct-segmentation-quantize-with-output.html>`__                                   | Quantize a kidney segmentation model and show live inference                                                                     |
+   | `110-ct-segmentation-quantize <notebooks/110-ct-segmentation-quantize-with-output.html>`__ |br| |n110|                       | Quantize a kidney segmentation model and show live inference                                                                     |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
    | `111-detection-quantization <notebooks/111-detection-quantization-with-output.html>`__ |br| |n111|                           | Quantize an object detection model                                                                                               |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
    | `112-pytorch-post-training-quantization-nncf <notebooks/112-pytorch-post-training-quantization-nncf-with-output.html>`__     | Use Neural Network Compression Framework (NNCF) to quantize PyTorch model in post-training mode (without model fine-tuning)      |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
-   | `113-image-classification-quantization <notebooks/113-image-classification-quantization-with-output.html>`__                 | Quantize mobilenet image classification                                                                                          |
+   | `113-image-classification-quantization <notebooks/113-image-classification-quantization-with-output.html>`__ |br| |n113|     | Quantize mobilenet image classification                                                                                          |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
-   | `114-quantization-simplified-mode <notebooks/114-quantization-simplified-mode-with-output.html>`__                           | Quantize Image Classification Models with POT in Simplified Mode                                                                 |
+   | `114-quantization-simplified-mode <notebooks/114-quantization-simplified-mode-with-output.html>`__ |br| |n114|               | Quantize Image Classification Models with POT in Simplified Mode                                                                 |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
-   | `115-async-api <notebooks/115-async-api-with-output.html>`__                                                                 | Use Asynchronous Execution to Improve Data Pipelining                                                                            |
+   | `115-async-api <notebooks/115-async-api-with-output.html>`__ |br| |n115|                                                     | Use Asynchronous Execution to Improve Data Pipelining                                                                            |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
    | `116-sparsity-optimization <notebooks/116-sparsity-optimization-with-output.html>`__                                         | Improve performance of sparse Transformer models                                                                                 |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
+   | `117-model-server <notebooks/117-model-server-with-output.html>`__                                                           | Improve performance of sparse Transformer models                                                                                 |
+   +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
    | `118-optimize-preprocessing <notebooks/118-optimize-preprocessing-with-output.html>`__                                       | Improve performance of image preprocessing step                                                                                  |
    +------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+
 
@@ -151,19 +153,17 @@ Demos that demonstrate inference on a particular model.
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 | Notebook                                                                                                                      | Description                                                                                                                                | Preview                                   |
 +===============================================================================================================================+============================================================================================================================================+===========================================+
-| `210-ct-scan-live-inference <notebooks/210-ct-scan-live-inference-with-output.html>`__ |br| |n210|                            | Show live inference on segmentation of CT-scan data.                                                                                       | |n210-img1|                               |
+| `205-vision-background-removal <notebooks/205-vision-background-removal-with-output.html>`__ |br| |n205|                      | Remove and replace the background in an image using salient object detection.                                                              | |n205-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `211-speech-to-text <notebooks/211-speech-to-text-with-output.html>`__ |br| |n211|                                            | Run inference on speech-to-text recognition model.                                                                                         | |n211-img1|                               |
+| `209-handwritten-ocr <notebooks/209-handwritten-ocr-with-output.html>`__ |br| |n209|                                          | OCR for handwritten simplified Chinese and Japanese.                                                                                       | |n209-img1| |br| |chinese-text|           |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `208-optical-character-recognition <notebooks/208-optical-character-recognition-with-output.html>`__                          | Annotate text on images using text recognition resnet.                                                                                     | |n208-img1|                               |
+| `211-speech-to-text <notebooks/211-speech-to-text-with-output.html>`__ |br| |n211|                                            | Run inference on speech-to-text recognition model.                                                                                         | |n211-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `209-handwritten-ocr <notebooks/209-handwritten-ocr-with-output.html>`__ |br| |n209|                                          | OCR for handwritten simplified Chinese and Japanese.                                                                                       | |n209-img1| |br| |chinese-text|           |
+| `215-image-inpainting <notebooks/215-image-inpainting-with-output.html>`__ |br| |n215|                                        | Fill missing pixels with image in-painting.                                                                                                | |n215-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `218-vehicle-detection-and-recognition <notebooks/218-vehicle-detection-and-recognition-with-output.html>`__                  | Use pre-trained models to detect and recognize vehicles and their attributes with OpenVINO.                                                | |n218-img1|                               |
+| `218-vehicle-detection-and-recognition <notebooks/218-vehicle-detection-and-recognition-with-output.html>`__ |br| |n218|      | Use pre-trained models to detect and recognize vehicles and their attributes with OpenVINO.                                                | |n218-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 
-
-
 .. dropdown:: Explore more notebooks below.
 
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
@@ -177,31 +177,29 @@ Demos that demonstrate inference on a particular model.
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `203-meter-reader <notebooks/203-meter-reader-with-output.html>`__ |br| |n203|                                                | PaddlePaddle pre-trained models to read industrial meter's value                                                                           | |n203-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `204-named-entity-recognition <notebooks/204-named-entity-recognition-with-output.html>`__ |br| |n204|                        | Perform named entity recognition on simple text.                                                                                           | |n204-img1|                               |
+   | `206-vision-paddlegan-anime <notebooks/206-vision-paddlegan-anime-with-output.html>`__                                        | Turn an image into anime using a GAN.                                                                                                      | |n206-img1| → |n206-img2|                 |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `205-vision-background-removal <notebooks/205-vision-background-removal-with-output.html>`__ |br| |n205|                      | Remove and replace the background in an image using salient object detection.                                                              | |n205-img1|                               |
+   | `207-vision-paddlegan-superresolution <notebooks/207-vision-paddlegan-superresolution-with-output.html>`__                    | Upscale small images with superresolution using a PaddleGAN model.                                                                         | |n207-img1| → |n207-img2|                 |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `206-vision-paddlegan-anime <notebooks/206-vision-paddlegan-anime-with-output.html>`__ |br| |n206|                            | Turn an image into anime using a GAN.                                                                                                      | |n206-img1| → |n206-img2|                 |
+   | `208-optical-character-recognition <notebooks/208-optical-character-recognition-with-output.html>`__                          | Annotate text on images using text recognition resnet.                                                                                     | |n208-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `207-vision-paddlegan-superresolution <notebooks/207-vision-paddlegan-superresolution-with-output.html>`__ |br| |n207|        | Upscale small images with superresolution using a PaddleGAN model.                                                                         | |n207-img1| → |n207-img2|                 |
+   | `212-pyannote-speaker-diarization <notebooks/212-pyannote-speaker-diarization-with-output.html>`__                            | Run inference on speaker diarization pipeline                                                                                              | |n212-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `212-onnx-style-transfer <notebooks/212-onnx-style-transfer-with-output.html>`__ |br| |n212|                                  | Transform images to five different styles with neural style transfer.                                                                      | |n212-img1| → |n212-img2|                 |
+   | `213-question-answering <notebooks/213-question-answering-with-output.html>`__ |br| |n213|                                    | Answer your questions basing on a context.                                                                                                 | |n213-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `214-vision-paddle-classification <notebooks/214-vision-paddle-classification-with-output.html>`__ |br| |n214|                | PaddlePaddle Image Classification with OpenVINO.                                                                                           |                                           |
+   | `214-grammar-correction <notebooks/214-grammar-correction-with-output.html>`__                                                | Grammatical Error Correction with OpenVINO                                                                                                 |                                           |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `215-image-inpainting <notebooks/215-image-inpainting-with-output.html>`__                                                    | Fill missing pixels with image in-painting.                                                                                                | |n215-img1|                               |
-   +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `216-license-plate-recognition <notebooks/216-license-plate-recognition-with-output.html>`__                                  | Recognize Chinese license plates in traffic.                                                                                               | |n216-img1|                               |
+   | `216-license-plate-recognition <notebooks/216-license-plate-recognition-with-output.html>`__ |br| |n216|                      | Recognize Chinese license plates in traffic.                                                                                               | |n216-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `217-vision-deblur <notebooks/217-vision-deblur-with-output.html>`__ |br| |n217|                                              | Deblur Images with DeblurGAN-v2.                                                                                                           | |n217-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `219-knowledge-graphs-conve <notebooks/219-knowledge-graphs-conve-with-output.html>`__                                        | Optimize the knowledge graph embeddings model (ConvE) with OpenVINO                                                                        |                                           |
+   | `219-knowledge-graphs-conve <notebooks/219-knowledge-graphs-conve-with-output.html>`__ |br| |n219|                            | Optimize the knowledge graph embeddings model (ConvE) with OpenVINO                                                                        |                                           |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `220-yolov5-accuracy-check-and-quantization <notebooks/220-yolov5-accuracy-check-and-quantization-with-output.html>`__        | Quantize the Ultralytics YOLOv5 model and check accuracy using the OpenVINO POT API                                                        | |n220-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `221-machine-translation <notebooks/221-machine-translation-with-output.html>`__                                              | Real-time translation from English to German                                                                                               |                                           |
+   | `221-machine-translation <notebooks/221-machine-translation-with-output.html>`__ |br| |n221|                                  | Real-time translation from English to German                                                                                               |                                           |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `222-vision-image-colorization <notebooks/222-vision-image-colorization-with-output.html>`__                                  | Use pre-trained models to colorize black & white images using OpenVINO                                                                     | |n222-img1|                               |
+   | `222-vision-image-colorization <notebooks/222-vision-image-colorization-with-output.html>`__ |br| |n222|                      | Use pre-trained models to colorize black & white images using OpenVINO                                                                     | |n222-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `223-gpt2-text-prediction <notebooks/223-gpt2-text-prediction-with-output.html>`__                                            | Use GPT-2 to perform text prediction on an input sequence                                                                                  | |n223-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
@@ -215,7 +213,7 @@ Demos that demonstrate inference on a particular model.
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `228-clip-zero-shot-image-classification <notebooks/228-clip-zero-shot-image-classification-with-output.html>`__              | Perform Zero-shot Image Classification with CLIP and OpenVINO                                                                              | |n228-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-   | `229-distilbert-sequence-classification <notebooks/229-distilbert-sequence-classification-with-output.html>`__                | Sequence Classification with OpenVINO                                                                                                      | |n229-img1|                               |
+   | `229-distilbert-sequence-classification <notebooks/229-distilbert-sequence-classification-with-output.html>`__ |br| |n229|    | Sequence Classification with OpenVINO                                                                                                      | |n229-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
    | `230-yolov8-optimization <notebooks/230-yolov8-optimization-with-output.html>`__                                              | Optimize YOLOv8 using NNCF PTQ API                                                                                                         | |n230-img1|                               |
    +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
@@ -296,11 +294,11 @@ The following tutorials are guaranteed to provide a great experience with infere
 +===============================================================================================================================+============================================================================================================================================+===========================================+
 | `Vision-monodepth <notebooks/201-vision-monodepth-with-output.html>`__ |br| |n201|                                            | Monocular depth estimation with images and video.                                                                                          | |n201-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `CT-scan-live-inference <notebooks/210-ct-scan-live-inference-with-output.html>`__ |br| |n210|                                | Show live inference on segmentation of CT-scan data.                                                                                       | |n210-img1|                               |
+| `Vision-background-removal <notebooks/205-vision-background-removal-with-output.html>`__ |br| |n205|                          | Remove and replace the background in an image using salient object detection.                                                              | |n205-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 | `Object-detection-webcam <notebooks/401-object-detection-with-output.html>`__ |br| |n401|                                     | Object detection with a webcam or video file.                                                                                              | |n401-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
-| `Pose-estimation-webcam <notebooks/402-pose-estimation-with-output>`__ |br| |n402|                                            | Human pose estimation with a webcam or video file.                                                                                         | |n402-img1|                               |
+| `Pose-estimation-webcam <notebooks/402-pose-estimation-with-output.html>`__ |br| |n402|                                       | Human pose estimation with a webcam or video file.                                                                                         | |n402-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
 | `Action-recognition-webcam <notebooks/403-action-recognition-webcam-with-output.html>`__ |br| |n403|                          | Human action recognition with a webcam or video file.                                                                                      | |n403-img1|                               |
 +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+
@@ -362,16 +360,6 @@ Made with `contributors-img <https://contrib.rocks>`__.
    :target: https://user-images.githubusercontent.com/15709723/127779326-dc14653f-a960-4877-b529-86908a6f2a61.png
 .. |n104-img1| image:: https://user-images.githubusercontent.com/10940214/157541917-c5455105-b0d9-4adf-91a7-fbc142918015.png
    :target: https://user-images.githubusercontent.com/10940214/157541917-c5455105-b0d9-4adf-91a7-fbc142918015.png
-.. |n210-img1| image:: https://user-images.githubusercontent.com/15709723/134784204-cf8f7800-b84c-47f5-a1d8-25a9afab88f8.gif
-   :target: https://user-images.githubusercontent.com/15709723/134784204-cf8f7800-b84c-47f5-a1d8-25a9afab88f8.gif
-.. |n211-img1| image:: https://user-images.githubusercontent.com/36741649/140987347-279de058-55d7-4772-b013-0f2b12deaa61.png
-   :target: https://user-images.githubusercontent.com/36741649/140987347-279de058-55d7-4772-b013-0f2b12deaa61.png
-.. |n213-img1| image:: https://user-images.githubusercontent.com/4547501/152571639-ace628b2-e3d2-433e-8c28-9a5546d76a86.gif
-   :target: https://user-images.githubusercontent.com/4547501/152571639-ace628b2-e3d2-433e-8c28-9a5546d76a86.gif
-.. |n208-img1| image:: https://user-images.githubusercontent.com/36741649/129315292-a37266dc-dfb2-4749-bca5-2ac9c1e93d64.jpg
-   :target: https://user-images.githubusercontent.com/36741649/129315292-a37266dc-dfb2-4749-bca5-2ac9c1e93d64.jpg
-.. |n209-img1| image:: https://user-images.githubusercontent.com/36741649/132660640-da2211ec-c389-450e-8980-32a75ed14abb.png
-   :target: https://user-images.githubusercontent.com/36741649/132660640-da2211ec-c389-450e-8980-32a75ed14abb.png
 .. |n201-img1| image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif
    :target: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif
 .. |n202i-img1| image:: https://user-images.githubusercontent.com/36741649/170005347-e4409f9e-ec34-416b-afdf-a9d8185929ca.jpg
@@ -388,8 +376,6 @@ Made with `contributors-img <https://contrib.rocks>`__.
    :target: https://user-images.githubusercontent.com/15709723/127269258-a8e2c03e-731e-4317-b5b2-ed2ee767ff5e.gif
 .. |n203-img1| image:: https://user-images.githubusercontent.com/91237924/166135627-194405b0-6c25-4fd8-9ad1-83fb3a00a081.jpg
    :target: https://user-images.githubusercontent.com/91237924/166135627-194405b0-6c25-4fd8-9ad1-83fb3a00a081.jpg
-.. |n204-img1| image:: https://user-images.githubusercontent.com/33627846/169470030-0370963e-6ad8-49e3-be7a-f02a2c677733.gif
-   :target: https://user-images.githubusercontent.com/33627846/169470030-0370963e-6ad8-49e3-be7a-f02a2c677733.gif
 .. |n205-img1| image:: https://user-images.githubusercontent.com/15709723/125184237-f4b6cd00-e1d0-11eb-8e3b-d92c9a728372.png
    :target: https://user-images.githubusercontent.com/15709723/125184237-f4b6cd00-e1d0-11eb-8e3b-d92c9a728372.png
 .. |n206-img1| image:: https://user-images.githubusercontent.com/15709723/127788059-1f069ae1-8705-4972-b50e-6314a6f36632.jpeg
@@ -402,10 +388,16 @@ Made with `contributors-img <https://contrib.rocks>`__.
 .. |n207-img2| image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
    :target: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
    :width: 130
-.. |n212-img1| image:: https://user-images.githubusercontent.com/77325899/147358090-ff5b21f5-0efb-4aff-8444-9d07add49b92.png
-   :target: https://user-images.githubusercontent.com/77325899/147358090-ff5b21f5-0efb-4aff-8444-9d07add49b92.png
-.. |n212-img2| image:: https://user-images.githubusercontent.com/77325899/147358009-0cf10d51-3150-40cb-a776-074558b98da5.png
-   :target: https://user-images.githubusercontent.com/77325899/147358009-0cf10d51-3150-40cb-a776-074558b98da5.png
+.. |n208-img1| image:: https://user-images.githubusercontent.com/36741649/129315292-a37266dc-dfb2-4749-bca5-2ac9c1e93d64.jpg
+   :target: https://user-images.githubusercontent.com/36741649/129315292-a37266dc-dfb2-4749-bca5-2ac9c1e93d64.jpg
+.. |n209-img1| image:: https://user-images.githubusercontent.com/36741649/132660640-da2211ec-c389-450e-8980-32a75ed14abb.png
+   :target: https://user-images.githubusercontent.com/36741649/132660640-da2211ec-c389-450e-8980-32a75ed14abb.png
+.. |n211-img1| image:: https://user-images.githubusercontent.com/36741649/140987347-279de058-55d7-4772-b013-0f2b12deaa61.png
+   :target: https://user-images.githubusercontent.com/36741649/140987347-279de058-55d7-4772-b013-0f2b12deaa61.png
+.. |n213-img1| image:: https://user-images.githubusercontent.com/4547501/152571639-ace628b2-e3d2-433e-8c28-9a5546d76a86.gif
+   :target: https://user-images.githubusercontent.com/4547501/152571639-ace628b2-e3d2-433e-8c28-9a5546d76a86.gif
+.. |n212-img1| image:: https://user-images.githubusercontent.com/29454499/218432101-0bd0c424-e1d8-46af-ba1d-ee29ed6d1229.png
+   :target: https://user-images.githubusercontent.com/29454499/218432101-0bd0c424-e1d8-46af-ba1d-ee29ed6d1229.png
 .. |n215-img1| image:: https://user-images.githubusercontent.com/4547501/167121084-ec58fbdb-b269-4de2-9d4c-253c5b95de1e.png
    :target: https://user-images.githubusercontent.com/4547501/167121084-ec58fbdb-b269-4de2-9d4c-253c5b95de1e.png
 .. |n216-img1| image:: https://user-images.githubusercontent.com/70456146/162759539-4a0a996f-dabe-40ea-98d6-85b4dce8511d.png
@@ -465,6 +457,7 @@ Made with `contributors-img <https://contrib.rocks>`__.
 
 .. |Apache License Version 2.0| image:: https://img.shields.io/badge/license-Apache_2.0-green.svg
    :target: https://github.com/openvinotoolkit/openvino_notebooks/blob/main/LICENSE
+
 .. |nbval| image:: https://github.com/openvinotoolkit/openvino_notebooks/actions/workflows/nbval.yml/badge.svg
    :target: https://github.com/openvinotoolkit/openvino_notebooks/actions/workflows/nbval.yml?query=branch%3Amain
 .. |nbval-docker| image:: https://github.com/openvinotoolkit/openvino_notebooks/actions/workflows/docker.yml/badge.svg
@@ -478,20 +471,26 @@ Made with `contributors-img <https://contrib.rocks>`__.
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F003-hello-segmentation%2F003-hello-segmentation.ipynb
 .. |n004| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F004-hello-detection%2F004-hello-detection.ipynb
+
 .. |n101| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F101-tensorflow-to-openvino%2F101-tensorflow-to-openvino.ipynb
 .. |n103| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F103-paddle-onnx-to-openvino-classification%2F103-paddle-onnx-to-openvino-classification.ipynb
 .. |n104| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F104-model-tools%2F104-model-tools.ipynb
+.. |n106| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F106-auto-device%2F106-auto-device.ipynb
+.. |n110| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F110-ct-segmentation-quantize%2F110-ct-scan-live-inference.ipynb
 .. |n111| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F111-detection-quantization%2F111-detection-quantization.ipynb
-.. |n210| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F210-ct-scan-live-inference%2F210-ct-scan-live-inference.ipynb
-.. |n211| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F211-speech-to-text%2F211-speech-to-text.ipynb
-.. |n213| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F213-question-answering%2F213-question-answering.ipynb
+.. |n113| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F113-image-classification-quantization%2F113-image-classification-quantization.ipynb
+.. |n114| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F114-quantization-simplified-mode%2F114-quantization-simplified-mode.ipynb
+.. |n115| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F115-async-api%2F115-async-api.ipynb
+
 .. |n209| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F209-handwritten-ocr%2F209-handwritten-ocr.ipynb
 .. |n201| image:: https://mybinder.org/badge_logo.svg
@@ -502,20 +501,28 @@ Made with `contributors-img <https://contrib.rocks>`__.
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F202-vision-superresolution%2F202-vision-superresolution-video.ipynb
 .. |n203| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F203-meter-reader%2F203-meter-reader.ipynb
-.. |n204| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F204-named-entity-recognition%2F204-named-entity-recognition.ipynb
 .. |n205| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F205-vision-background-removal%2F205-vision-background-removal.ipynb
-.. |n206| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F206-vision-paddlegan-anime%2F206-vision-paddlegan-anime.ipynb
-.. |n207| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F207-vision-paddlegan-superresolution%2F207-vision-paddlegan-superresolution.ipynb
-.. |n212| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F212-onnx-style-transfer%2F212-onnx-style-transfer.ipynb
-.. |n214| image:: https://mybinder.org/badge_logo.svg
-   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F214-vision-paddle-classification%2F214-vision-paddle-classification.ipynb
+.. |n211| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F211-speech-to-text%2F211-speech-to-text.ipynb
+.. |n213| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F213-question-answering%2F213-question-answering.ipynb
+.. |n215| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F215-image-inpainting%2F215-image-inpainting.ipynb
+.. |n216| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F216-license-plate-recognition%2F216-license-plate-recognition.ipynb
 .. |n217| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/ThanosM97/openvino_notebooks/217-vision-deblur?labpath=notebooks%2F217-vision-deblur%2F217-vision-deblur.ipynb
+.. |n218| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F218-vehicle-detection-and-recognition%2F218-vehicle-detection-and-recognition.ipynb
+.. |n219| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F219-knowledge-graphs-conve%2F219-knowledge-graphs-conve.ipynb
+.. |n221| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F221-machine-translation%2F221-machine-translation.ipynb
+.. |n222| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F222-vision-image-colorization%2F222-vision-image-colorization.ipynb
+.. |n229| image:: https://mybinder.org/badge_logo.svg
+   :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F229-distilbert-sequence-classification%2F229-distilbert-sequence-classification.ipynb
 .. |n401| image:: https://mybinder.org/badge_logo.svg
    :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F401-object-detection-webcam%2F401-object-detection.ipynb
 .. |n402| image:: https://mybinder.org/badge_logo.svg

From 18df64c1352d2bb85439199fe050b11947d83d0b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 24 Mar 2023 22:42:20 +0400
Subject: [PATCH 088/296] More accurate hwloc finding in case of dynamic
 tbbbind (#16488)

---
 src/cmake/ie_parallel.cmake | 125 +++++++++++++++++++++---------------
 1 file changed, 72 insertions(+), 53 deletions(-)

diff --git a/src/cmake/ie_parallel.cmake b/src/cmake/ie_parallel.cmake
index 1e6b942f887649..7c58235c57eb61 100644
--- a/src/cmake/ie_parallel.cmake
+++ b/src/cmake/ie_parallel.cmake
@@ -2,6 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+if(NOT ANDROID)
+    find_package(PkgConfig QUIET)
+endif()
+
 function(_ov_get_tbb_location tbb_target _tbb_lib_location_var)
     if(NOT TBB_FOUND)
         return()
@@ -63,48 +67,45 @@ macro(ov_find_package_tbb)
             unset(TBB_DIR)
 
             # try tbb.pc from system
-            if(NOT ANDROID AND ENABLE_SYSTEM_TBB)
-                find_package(PkgConfig QUIET)
-                if(PkgConfig_FOUND)
-                    macro(_ov_pkg_config_tbb_unset)
-                        # unset since it affects OpenVINOConfig.cmake.in
-                        unset(tbb_FOUND)
-                        unset(tbb_FOUND CACHE)
-                    endmacro()
-                    pkg_search_module(tbb QUIET
-                                      IMPORTED_TARGET GLOBAL
-                                      tbb)
-                    if(tbb_FOUND)
-                        # parse version
-                        string(REGEX REPLACE "~.*" "" tbb_VERSION_PATCHED "${tbb_VERSION}")
-                        if(tbb_VERSION_PATCHED AND tbb_VERSION_PATCHED VERSION_LESS _ov_minimal_tbb_version)
-                            _ov_pkg_config_tbb_unset()
-                            message(WARNING "Found TBB ${tbb_VERSION} via ${PKG_CONFIG_EXECUTABLE} while OpenVINO requies ${_ov_minimal_tbb_version} at least")
-                        elseif(TARGET PkgConfig::tbb)
-                            add_library(TBB::tbb ALIAS PkgConfig::tbb)
-                            set(TBB_VERSION ${tbb_VERSION})
-                            set(TBB_FOUND ${tbb_FOUND})
-
-                            # note: for python wheels we need to find and install tbbmalloc as well
-                            _ov_get_tbb_location(PkgConfig::tbb tbb_loc)
-                            string(REPLACE "tbb" "tbbmalloc" tbbmalloc_loc "${tbb_loc}")
-                            if(EXISTS "${tbbmalloc_loc}")
-                                add_library(TBB::tbbmalloc SHARED IMPORTED)
-                                set_target_properties(TBB::tbbmalloc PROPERTIES IMPORTED_LOCATION ${tbbmalloc_loc})
-                            endif()
-
-                            message(STATUS "${PKG_CONFIG_EXECUTABLE}: tbb (${tbb_VERSION}) is found at ${tbb_PREFIX}")
+            if(ENABLE_SYSTEM_TBB AND PkgConfig_FOUND)
+                macro(_ov_pkg_config_tbb_unset)
+                    # unset since it affects OpenVINOConfig.cmake.in
+                    unset(tbb_FOUND)
+                    unset(tbb_FOUND CACHE)
+                endmacro()
+                pkg_search_module(tbb QUIET
+                                  IMPORTED_TARGET GLOBAL
+                                  tbb)
+                if(tbb_FOUND)
+                    # parse version
+                    string(REGEX REPLACE "~.*" "" tbb_VERSION_PATCHED "${tbb_VERSION}")
+                    if(tbb_VERSION_PATCHED AND tbb_VERSION_PATCHED VERSION_LESS _ov_minimal_tbb_version)
+                        _ov_pkg_config_tbb_unset()
+                        message(WARNING "Found TBB ${tbb_VERSION} via ${PKG_CONFIG_EXECUTABLE} while OpenVINO requies ${_ov_minimal_tbb_version} at least")
+                    elseif(TARGET PkgConfig::tbb)
+                        add_library(TBB::tbb ALIAS PkgConfig::tbb)
+                        set(TBB_VERSION ${tbb_VERSION})
+                        set(TBB_FOUND ${tbb_FOUND})
+
+                        # note: for python wheels we need to find and install tbbmalloc as well
+                        _ov_get_tbb_location(PkgConfig::tbb tbb_loc)
+                        string(REPLACE "tbb" "tbbmalloc" tbbmalloc_loc "${tbb_loc}")
+                        if(EXISTS "${tbbmalloc_loc}")
+                            add_library(TBB::tbbmalloc SHARED IMPORTED)
+                            set_target_properties(TBB::tbbmalloc PROPERTIES IMPORTED_LOCATION ${tbbmalloc_loc})
+                        endif()
+
+                        message(STATUS "${PKG_CONFIG_EXECUTABLE}: tbb (${tbb_VERSION}) is found at ${tbb_PREFIX}")
+                    else()
+                        _ov_pkg_config_tbb_unset()
+
+                        if(CPACK_GENERATOR STREQUAL "^(DEB|RPM|CONDA-FORGE|BREW)$")
+                            # package managers require system TBB
+                            set(message_type FATAL_ERROR)
                         else()
-                            _ov_pkg_config_tbb_unset()
-
-                            if(CPACK_GENERATOR STREQUAL "^(DEB|RPM|CONDA-FORGE|BREW)$")
-                                # package managers require system TBB
-                                set(message_type FATAL_ERROR)
-                            else()
-                                set(message_type WARNING)
-                            endif()
-                            message(${message_type} "cmake v${CMAKE_VERSION} contains bug in function 'pkg_search_module', need to update to at least v3.16.0 version")
+                            set(message_type WARNING)
                         endif()
+                        message(${message_type} "cmake v${CMAKE_VERSION} contains bug in function 'pkg_search_module', need to update to at least v3.16.0 version")
                     endif()
                 endif()
             endif()
@@ -124,7 +125,7 @@ macro(ov_find_package_tbb)
                 endif()
 
                 # try to find one more time
-                find_package(TBB QUIET COMPONENTS tbb tbbmalloc
+                find_package(TBB ${_ov_minimal_tbb_version} QUIET COMPONENTS tbb tbbmalloc
                              # TBB_DIR can be provided by ov_download_tbb
                              HINTS ${TBB_DIR}
                              ${_tbb_paths}
@@ -143,20 +144,38 @@ macro(ov_find_package_tbb)
             endforeach()
 
             if(WIN32 AND TARGET TBB::tbbbind_2_5)
-                # Add HWLOC::hwloc_2_5 target to check via Apivalidator
-                get_target_property(TBB_location TBB::tbb IMPORTED_LOCATION_RELEASE)
-                get_filename_component(TBB_dir "${TBB_location}" DIRECTORY)
-                set(hwloc_dll_name "${CMAKE_SHARED_LIBRARY_PREFIX}hwloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
-                find_file(HWLOC_DLL NAMES ${hwloc_dll_name} PATHS "${TBB_dir}" DOC "Path to hwloc.dll")
-                
-                if(NOT HWLOC_DLL)
-                    message(FATAL_ERROR "Failed to find ${hwloc_dll_name} in ${TBB_dir}")
+                # some package managers provide hwloc.pc file within installation package
+                # let's try it first
+                if(PkgConfig_FOUND)
+                    pkg_search_module(HWLOC QUIET
+                                      IMPORTED_TARGET GLOBAL
+                                      hwloc)
                 endif()
 
-                add_library(HWLOC::hwloc_2_5 SHARED IMPORTED)
-                set_property(TARGET HWLOC::hwloc_2_5 APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
-                set_target_properties(HWLOC::hwloc_2_5 PROPERTIES
-                    IMPORTED_LOCATION_RELEASE "${HWLOC_DLL}")
+                if(TARGET PkgConfig::HWLOC)
+                    # dependency is satisfied
+                else()
+                    # Add HWLOC::hwloc_2_5 target to check via ApiValidator
+                    get_target_property(imported_configs TBB::tbbbind_2_5 IMPORTED_CONFIGURATIONS)
+                    foreach(imported_config RELEASE RELWITHDEBINFO DEBUG)
+                        if(imported_config IN_LIST imported_configs)
+                            get_target_property(TBBbind_location TBB::tbbbind_2_5 IMPORTED_LOCATION_${imported_config})
+                            get_filename_component(TBB_dir "${TBBbind_location}" DIRECTORY)
+                            break()
+                        endif()
+                    endforeach()
+
+                    set(hwloc_dll_name "${CMAKE_SHARED_LIBRARY_PREFIX}hwloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+                    find_file(HWLOC_DLL NAMES ${hwloc_dll_name} PATHS "${TBB_dir}" DOC "Path to hwloc.dll")
+
+                    if(NOT HWLOC_DLL)
+                        message(FATAL_ERROR "Failed to find ${hwloc_dll_name} in ${TBB_dir}")
+                    endif()
+
+                    add_library(HWLOC::hwloc_2_5 SHARED IMPORTED)
+                    set_property(TARGET HWLOC::hwloc_2_5 APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+                    set_target_properties(HWLOC::hwloc_2_5 PROPERTIES IMPORTED_LOCATION_RELEASE "${HWLOC_DLL}")
+                endif()
             endif()
         endif()
 

From 1ef94ec0694ed21d8d39514a9a865fa9b8b3ce7e Mon Sep 17 00:00:00 2001
From: Ekaterina Aidova <ekaterina.aidova@intel.com>
Date: Sat, 25 Mar 2023 00:00:17 +0400
Subject: [PATCH 089/296] [PT FE]: support aten::linalng_vector_norm (#16109)

* [PT FE]: support aten::linalng_vector_norm

* more norm ops

* update tests
---
 src/frontends/pytorch/src/op/norm.cpp        | 264 ++++++++++++++++++-
 src/frontends/pytorch/src/op_table.cpp       |   6 +
 tests/layer_tests/pytorch_tests/test_norm.py | 215 ++++++++++++++-
 3 files changed, 475 insertions(+), 10 deletions(-)

diff --git a/src/frontends/pytorch/src/op/norm.cpp b/src/frontends/pytorch/src/op/norm.cpp
index 34a0bdd01c4cfa..a743816ee8d845 100644
--- a/src/frontends/pytorch/src/op/norm.cpp
+++ b/src/frontends/pytorch/src/op/norm.cpp
@@ -5,12 +5,23 @@
 #include "openvino/frontend/pytorch/node_context.hpp"
 #include "openvino/op/abs.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/convert_like.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/not_equal.hpp"
 #include "openvino/op/power.hpp"
+#include "openvino/op/range.hpp"
 #include "openvino/op/reduce_l1.hpp"
 #include "openvino/op/reduce_l2.hpp"
 #include "openvino/op/reduce_max.hpp"
 #include "openvino/op/reduce_min.hpp"
 #include "openvino/op/reduce_sum.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/sqrt.hpp"
+#include "openvino/op/squeeze.hpp"
+#include "pt_framework_node.hpp"
 #include "utils.hpp"
 
 namespace ov {
@@ -20,13 +31,12 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_norm(const NodeContext& context) {
-    num_inputs_check(context, 4, 4);
-    auto input_tensor = context.get_input(0);
-    auto p = context.const_input<float>(1);
-    auto dim = context.get_input(2);
-    auto keep_dim = context.const_input<bool>(3);
-
+namespace {
+Output<Node> norm_vector(const NodeContext& context,
+                         Output<Node> input_tensor,
+                         Output<Node> dim,
+                         float p,
+                         bool keep_dim) {
     Output<Node> res;
     if (p == 1) {
         res = context.mark_node(std::make_shared<v4::ReduceL1>(input_tensor, dim, keep_dim));
@@ -38,17 +48,253 @@ OutputVector translate_norm(const NodeContext& context) {
     } else if (p == -std::numeric_limits<float>::infinity()) {
         auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
         res = context.mark_node(std::make_shared<v1::ReduceMin>(abs, dim, keep_dim));
+    } else if (p == 0) {
+        auto input_rank = input_tensor.get_partial_shape().rank();
+        FRONT_END_OP_CONVERSION_CHECK(input_rank.is_dynamic() || input_rank.get_length() == 1,
+                                      "ord=0 supported only for vector norm");
+        auto zero = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
+        zero = context.mark_node(std::make_shared<v1::ConvertLike>(zero, input_tensor));
+        auto cond = context.mark_node(std::make_shared<v1::NotEqual>(input_tensor, zero));
+        cond = context.mark_node(std::make_shared<v1::ConvertLike>(cond, input_tensor));
+        res = context.mark_node(std::make_shared<v1::ReduceSum>(cond, dim, keep_dim));
     } else {
-        auto const_p = context.mark_node(v0::Constant::create(element::f64, Shape{1}, {p}));
-        auto const_p_inv = context.mark_node(v0::Constant::create(element::f64, Shape{1}, {1.0 / p}));
+        auto const_p = context.mark_node(v0::Constant::create(element::f32, Shape{1}, {p}));
+        const_p = context.mark_node(std::make_shared<v1::ConvertLike>(const_p, input_tensor));
+        auto const_p_inv = context.mark_node(v0::Constant::create(element::f32, Shape{1}, {1.0 / p}));
+        const_p_inv = context.mark_node(std::make_shared<v1::ConvertLike>(const_p_inv, input_tensor));
         auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
         auto pow = context.mark_node(std::make_shared<v1::Power>(abs, const_p));
         auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(pow, dim, keep_dim));
         res = context.mark_node(std::make_shared<v1::Power>(sum, const_p_inv));
     }
+    return res;
+};
+
+Output<Node> norm_matrix(const NodeContext& context,
+                         Output<Node> input_tensor,
+                         Output<Node> dim,
+                         float p,
+                         bool keep_dim) {
+    Output<Node> res;
+    auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
+    auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
+    auto first_dim = context.mark_node(std::make_shared<v8::Gather>(dim, zero, zero));
+    auto second_dim = context.mark_node(std::make_shared<v8::Gather>(dim, one, zero));
+    if (p == 1) {
+        auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
+        auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(abs, first_dim, true));
+        res = context.mark_node(std::make_shared<v1::ReduceMax>(sum, second_dim, keep_dim));
+    } else if (p == std::numeric_limits<float>::infinity()) {
+        auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
+        auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(abs, second_dim, true));
+        res = context.mark_node(std::make_shared<v1::ReduceMax>(sum, first_dim, keep_dim));
+    } else if (p == -std::numeric_limits<float>::infinity()) {
+        auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
+        auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(abs, second_dim, true));
+        res = context.mark_node(std::make_shared<v1::ReduceMin>(sum, first_dim, keep_dim));
+    } else if (p == -1) {
+        auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
+        auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(abs, first_dim, true));
+        res = context.mark_node(std::make_shared<v1::ReduceMin>(sum, second_dim, keep_dim));
+    } else {
+        FRONT_END_OP_CONVERSION_CHECK(false, "Unsupported ord ", p, " for matrix norm");
+    }
+
+    return res;
+};
+
+Output<Node> frobenius_norm(const NodeContext& context, Output<Node> x, Output<Node> dim, bool keep_dim) {
+    auto sqr = context.mark_node(std::make_shared<v1::Multiply>(x, x));
+    auto sumsqr = context.mark_node(std::make_shared<v1::ReduceSum>(sqr, dim, keep_dim));
+    return context.mark_node(std::make_shared<v0::Sqrt>(sumsqr));
+}
+};  // namespace
+
+OutputVector translate_norm(const NodeContext& context) {
+    num_inputs_check(context, 4, 6);
+    auto input_tensor = context.get_input(0);
+    auto p_node_type = context.get_input_type(1);
+    Output<Node> dim;
+    if (context.input_is_none(2)) {
+        auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
+        auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
+        auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input_tensor, element::i32));
+        auto rank = context.mark_node(std::make_shared<v3::ShapeOf>(input_shape, element::i32));
+        rank = context.mark_node(std::make_shared<v0::Squeeze>(rank, zero));
+        dim = context.mark_node(std::make_shared<v0::Range>(zero, rank, one));
+    } else {
+        dim = context.get_input(2);
+    }
+    auto keep_dim = context.const_input<bool>(3);
+    if (!context.input_is_none(4)) {
+        input_tensor = apply_dtype(context, 4, input_tensor);
+    }
+    Output<Node> res;
+    if (p_node_type.is<type::Str>()) {
+        auto p_str = context.const_input<std::string>(1);
+        if (p_str == "fro") {
+            res = frobenius_norm(context, input_tensor, dim, keep_dim);
+        } else {
+            FRONT_END_OP_CONVERSION_CHECK(false, "Umsupported ord ", p_str);
+        }
+    } else {
+        auto p = context.const_input<float>(1);
+        res = norm_vector(context, input_tensor, dim, p, keep_dim);
+    }
+    // output tensor
+    if (!context.input_is_none(5)) {
+        context.mutate_input(5, res);
+    }
     return {res};
 };
 
+OutputVector translate_linalg_vector_norm(const NodeContext& context) {
+    // aten::linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType?
+    // dtype=None) -> Tensor
+    // aten::linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool
+    // keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!):
+    num_inputs_check(context, 5, 6);
+    auto x = context.get_input(0);
+    // ord defines the vector norm that is computed.
+    auto ord = context.const_input<float>(1);
+    bool keep_dim = context.const_input<bool>(3);
+    Output<Node> dim;
+    Output<Node> result;
+    // If dim= None, x will be flattened before the norm is computed.
+    if (context.input_is_none(2)) {
+        keep_dim = false;
+        auto minus_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
+        x = context.mark_node(std::make_shared<v1::Reshape>(x, minus_one, false));
+        dim = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
+    } else {
+        dim = context.get_input(2);
+    }
+    // dtype may be used to perform the computation in a more precise dtype. It is semantically equivalent to calling
+    // linalg.vector_norm(x.to(dtype))
+    if (!context.input_is_none(4)) {
+        x = apply_dtype(context, 4, x);
+    }
+    result = norm_vector(context, x, dim, ord, keep_dim);
+    // output tensor
+    if (!context.input_is_none(5)) {
+        context.mutate_input(5, result);
+    }
+    return {result};
+};
+
+OutputVector translate_linalg_matrix_norm(const NodeContext& context) {
+    // aten::linalg_matrix_norm.out(Tensor self, Scalar ord, int[] dim=[-2, -1], bool keepdim=False, *, ScalarType?
+    // dtype=None, Tensor(a!) out) -> Tensor(a!) aten::linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2, -1],
+    // bool keepdim=False, *, ScalarType? dtype=None) aten::linalg_matrix_norm.str_ord(Tensor self, str ord="fro", int[]
+    // dim=[-2, -1], bool keepdim=False, *, ScalarType? dtype=None)
+    num_inputs_check(context, 5, 6);
+    auto x = context.get_input(0);
+    // ord defines the vector norm that is computed can be string or number
+    auto ord_type = context.get_input_type(1);
+    auto dim = context.get_input(2);
+    bool keep_dim = context.const_input<bool>(3);
+    Output<Node> result;
+
+    // dtype may be used to perform the computation in a more precise dtype. It is semantically equivalent to calling
+    // linalg.mtrix_norm(x.to(dtype))
+    if (!context.input_is_none(4)) {
+        x = apply_dtype(context, 4, x);
+    }
+    if (ord_type.is<type::Str>()) {
+        auto p_str = context.const_input<std::string>(1);
+        if (p_str == "fro") {
+            result = frobenius_norm(context, x, dim, keep_dim);
+        } else {
+            FRONT_END_OP_CONVERSION_CHECK(false, "Umsupported ord ", p_str);
+        }
+    } else {
+        auto p = context.const_input<float>(1);
+        result = norm_matrix(context, x, dim, p, keep_dim);
+    }
+    // output tensor
+    if (!context.input_is_none(5)) {
+        context.mutate_input(5, result);
+    }
+    return {result};
+};
+
+OutputVector translate_linalg_norm(const NodeContext& context) {
+    // aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None)
+    // aten::linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None)
+    // aten::linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType?
+    // dtype=None, Tensor(a!) out) -> Tensor(a!)
+    num_inputs_check(context, 5, 6);
+    auto x = context.get_input(0);
+    bool keep_dim = context.const_input<bool>(3);
+    Output<Node> result;
+    Output<Node> dim;
+    // dtype may be used to perform the computation in a more precise dtype. It is semantically equivalent to calling
+    // linalg.norm(x.to(dtype))
+    if (!context.input_is_none(4)) {
+        x = apply_dtype(context, 4, x);
+    }
+    // If dim= None and ord= None, A will be flattened to 1D and the 2-norm of the resulting vector will be computed.
+    if (context.input_is_none(2) && context.input_is_none(1)) {
+        auto minus_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
+        x = context.mark_node(std::make_shared<v1::Reshape>(x, minus_one, false));
+        dim = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
+        result = norm_vector(context, x, dim, 2, false);
+    } else {
+        // If dim=None apply for all dimesions
+        if (context.input_is_none(2)) {
+            auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
+            auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
+            auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(x, element::i32));
+            auto rank = context.mark_node(std::make_shared<v3::ShapeOf>(input_shape, element::i32));
+            rank = context.mark_node(std::make_shared<v0::Squeeze>(rank, zero));
+            dim = context.mark_node(std::make_shared<v0::Range>(zero, rank, one));
+        } else {
+            dim = context.get_input(2);
+        }
+        // default norm for matrix is frobenius norm, for vector - L2, for other ranks are not detrmined
+        if (context.input_is_none(1)) {
+            auto input_rank = x.get_partial_shape().rank();
+            if (input_rank.is_static() && input_rank.get_length() == 2) {
+                result = frobenius_norm(context, x, dim, keep_dim);
+            } else if (input_rank.is_static() && input_rank.get_length() == 1) {
+                result = norm_vector(context, x, dim, 2, keep_dim);
+            } else {
+                FRONT_END_OP_CONVERSION_CHECK(false,
+                                              "linalg norm for tensor rank > 2 without ord specification unsupported");
+            }
+        } else {
+            // ord defines the  norm that is computed can be string or number
+            auto ord_type = context.get_input_type(1);
+            if (ord_type.is<type::Str>()) {
+                auto p_str = context.const_input<std::string>(1);
+                if (p_str == "fro") {
+                    result = frobenius_norm(context, x, dim, keep_dim);
+                } else {
+                    FRONT_END_OP_CONVERSION_CHECK(false, "Umsupported ord ", p_str);
+                }
+            } else {
+                auto p = context.const_input<float>(1);
+                if (!context.input_is_none(2)) {
+                    auto const_dim = context.const_input<std::vector<int64_t>>(2);
+                    if (const_dim.size() == 2) {
+                        result = norm_matrix(context, x, dim, p, keep_dim);
+                    } else {
+                        result = norm_vector(context, x, dim, p, keep_dim);
+                    }
+                } else {
+                    result = norm_vector(context, x, dim, p, keep_dim);
+                }
+            }
+        }
+    }
+
+    // output tensor
+    if (!context.input_is_none(5)) {
+        context.mutate_input(5, result);
+    }
+    return {result};
+};
+
 }  // namespace op
 }  // namespace pytorch
 }  // namespace frontend
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
index c42024fa36f4df..80e1db8b3544e4 100644
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -62,6 +62,9 @@ OP_CONVERTER(translate_instance_norm);
 OP_CONVERTER(translate_int);
 OP_CONVERTER(translate_layer_norm);
 OP_CONVERTER(translate_len);
+OP_CONVERTER(translate_linalg_norm);
+OP_CONVERTER(translate_linalg_matrix_norm);
+OP_CONVERTER(translate_linalg_vector_norm);
 OP_CONVERTER(translate_linear);
 OP_CONVERTER(translate_list_construct);
 OP_CONVERTER(translate_log);
@@ -235,6 +238,9 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"aten::leaky_relu", op::translate_1to1_match_2_inputs<opset10::PRelu>},
         {"aten::leaky_relu_", op::inplace_op<op::translate_1to1_match_2_inputs<opset10::PRelu>>},
         {"aten::len", op::translate_len},
+        {"aten::linalg_norm", op::translate_linalg_norm},
+        {"aten::linalg_matrix_norm", op::translate_linalg_matrix_norm},
+        {"aten::linalg_vector_norm", op::translate_linalg_vector_norm},
         {"aten::linear", op::translate_linear},
         {"aten::log", op::translate_log},
         {"aten::log_", op::inplace_op<op::translate_log>},
diff --git a/tests/layer_tests/pytorch_tests/test_norm.py b/tests/layer_tests/pytorch_tests/test_norm.py
index 7cea64f4825c5e..7005ed4af6321b 100644
--- a/tests/layer_tests/pytorch_tests/test_norm.py
+++ b/tests/layer_tests/pytorch_tests/test_norm.py
@@ -14,7 +14,7 @@
 class TestNorm(PytorchLayerTest):
 
     def _prepare_input(self):
-        return (np.random.randn(2, 3, 4, 5),)
+        return (np.random.randn(1, 2, 3).astype(np.float32),)
 
     def create_model(self, p, dim, keepdim):
         class aten_norm(torch.nn.Module):
@@ -37,3 +37,216 @@ def forward(self, input_data):
     def test_norm(self, ie_device, precision, ir_version, p, dim, keepdim):
         self._test(*self.create_model(p, dim, keepdim),
                    ie_device, precision, ir_version)
+
+
+class TestLinalgVectorNorm(PytorchLayerTest):
+
+    def _prepare_input(self, out=False, out_dtype=None):
+        if not out:
+            return (np.random.randn(1, 2, 3).astype(np.float32),)
+        x = np.random.randn(1, 2, 3).astype(np.float32)
+        y = np.random.randn(1, 2, 3).astype(
+            out_dtype if out_dtype is not None else np.float32)
+        return (x, y)
+
+    def create_model(self, p, dim, keepdim, dtype_str, out, out_as_dtype):
+        dtypes = {
+            "float32": torch.float32,
+            "float64": torch.float64
+        }
+        dtype = dtypes.get(dtype_str)
+
+        class aten_linalg_vector_norm(torch.nn.Module):
+
+            def __init__(self, p, dim, keepdim, dtype, out, out_as_dtype) -> None:
+                super().__init__()
+                self.ord = p
+                self.dim = dim
+                self.keepdim = keepdim
+                self.dtype = dtype
+                if self.dtype is not None:
+                    self.forward = self.forward_dtype
+                if out:
+                    self.forward = self.forward_out
+                if out_as_dtype:
+                    self.forward = self.forward_prim_dtype
+
+            def forward(self, x):
+                return torch.linalg.vector_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim
+                )
+
+            def forward_dtype(self, x):
+                return torch.linalg.vector_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=self.dtype
+                )
+
+            def forward_prim_dtype(self, x, y):
+                return torch.linalg.vector_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=y.dtype
+                )
+
+            def forward_out(self, x, y):
+                return y, torch.linalg.vector_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, out=y
+                )
+
+        ref_net = None
+
+        return aten_linalg_vector_norm(p, dim, keepdim, dtype, out, out_as_dtype), ref_net, "aten::linalg_vector_norm"
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize('p', [-2, -1, 0, 1, 2, 2.5, float('inf'), float('-inf')])
+    @pytest.mark.parametrize('dim', [0, [0, 1], None])
+    @pytest.mark.parametrize('keepdim', [True, False])
+    @pytest.mark.parametrize("dtype", ["float32", "float64", None])
+    @pytest.mark.parametrize("out", [True, False])
+    @pytest.mark.parametrize("prim_dtype", [True, False])
+    def test_linalg_vector_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version):
+        self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype),
+                   ie_device, precision, ir_version, 
+                   kwargs_to_prepare_input={"out": out or prim_dtype, "out_dtype": dtype if prim_dtype else None})
+
+
+class TestLinalgMatrixNorm(PytorchLayerTest):
+
+    def _prepare_input(self, out=False, out_dtype=None):
+        if not out:
+            return (np.random.randn(3, 3).astype(np.float32),)
+        x = np.random.randn(1, 3, 3).astype(np.float32)
+        y = np.random.randn(1, 3, 3).astype(
+            out_dtype if out_dtype is not None else np.float32)
+        return (x, y)
+
+    def create_model(self, p, dim, keepdim, dtype_str, out, out_as_dtype):
+        dtypes = {
+            "float32": torch.float32,
+            "float64": torch.float64
+        }
+        dtype = dtypes.get(dtype_str)
+
+        class aten_linalg_matrix_norm(torch.nn.Module):
+
+            def __init__(self, p, dim, keepdim, dtype, out, out_as_dtype) -> None:
+                super().__init__()
+                self.ord = p
+                self.dim = dim
+                self.keepdim = keepdim
+                self.dtype = dtype
+                if self.dtype is not None:
+                    self.forward = self.forward_dtype
+                if out:
+                    self.forward = self.forward_out
+                if out_as_dtype:
+                    self.forward = self.forward_prim_dtype
+
+            def forward(self, x):
+                return torch.linalg.matrix_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim
+                )
+
+            def forward_dtype(self, x):
+                return torch.linalg.matrix_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=self.dtype
+                )
+
+            def forward_prim_dtype(self, x, y):
+                return torch.linalg.matrix_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=y.dtype
+                )
+
+            def forward_out(self, x, y):
+                return y, torch.linalg.matrix_norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, out=y
+                )
+
+        ref_net = None
+
+        return aten_linalg_matrix_norm(p, dim, keepdim, dtype, out, out_as_dtype), ref_net, "aten::linalg_matrix_norm"
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize('p', [-1, 1, float('inf'), float('-inf'), "fro"])
+    @pytest.mark.parametrize('dim', [[0, 1], [-1, -2]])
+    @pytest.mark.parametrize('keepdim', [True, False])
+    @pytest.mark.parametrize("dtype", ["float32", "float64", None])
+    @pytest.mark.parametrize("out", [True, False])
+    @pytest.mark.parametrize("prim_dtype", [True, False])
+    def test_linalg_matrix_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version):
+        self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype),
+                   ie_device, precision, ir_version, 
+                   kwargs_to_prepare_input={"out": out or prim_dtype, "out_dtype": dtype if prim_dtype else None})
+
+
+class TestLinalgNorm(PytorchLayerTest):
+
+    def _prepare_input(self, out=False, out_dtype=None):
+        if not out:
+            return (np.random.randn(3, 3).astype(np.float32),)
+        x = np.random.randn(3, 3).astype(np.float32)
+        y = np.random.randn(3, 3).astype(
+            out_dtype if out_dtype is not None else np.float32)
+        return (x, y)
+
+    def create_model(self, p, dim, keepdim, dtype_str, out, out_as_dtype):
+        dtypes = {
+            "float32": torch.float32,
+            "float64": torch.float64
+        }
+        dtype = dtypes.get(dtype_str)
+
+        class aten_linalg_matrix_norm(torch.nn.Module):
+
+            def __init__(self, p, dim, keepdim, dtype, out, out_as_dtype) -> None:
+                super().__init__()
+                self.ord = p
+                self.dim = dim
+                self.keepdim = keepdim
+                self.dtype = dtype
+                if self.dtype is not None:
+                    self.forward = self.forward_dtype
+                if out:
+                    self.forward = self.forward_out
+                if out_as_dtype:
+                    self.forward = self.forward_prim_dtype
+
+            def forward(self, x):
+                return torch.linalg.norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim
+                )
+
+            def forward_dtype(self, x):
+                return torch.linalg.norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=self.dtype
+                )
+
+            def forward_prim_dtype(self, x, y):
+                return torch.linalg.norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, dtype=y.dtype
+                )
+
+            def forward_out(self, x, y):
+                return y, torch.linalg.norm(
+                    x, ord=self.ord, dim=self.dim, keepdim=self.keepdim, out=y
+                )
+
+        ref_net = None
+
+        return aten_linalg_matrix_norm(p, dim, keepdim, dtype, out, out_as_dtype), ref_net, "aten::linalg_norm"
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize('p,dim', [
+        (-1, [0, 1]), (1, [-1, -2]), (float('inf'), [1, 0]), 
+        (float('-inf'), [-2, -1]), (0, 1), (1, -1), 
+        (None, None), (2.5, 0), (-1, 1), (2, 0), 
+        (float('inf'), 1), (float('-inf'), 1), ("fro", (0, 1))])
+    @pytest.mark.parametrize('keepdim', [True, False])
+    @pytest.mark.parametrize("dtype", ["float32", "float64", None])
+    @pytest.mark.parametrize("out", [True, False])
+    @pytest.mark.parametrize("prim_dtype", [True, False])
+    def test_linalg_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version):
+        self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype),
+                   ie_device, precision, ir_version, 
+                   kwargs_to_prepare_input={"out": out or prim_dtype, "out_dtype": dtype if prim_dtype else None})

From 6a251430453100f3602d2c91812f37d6a3b34f92 Mon Sep 17 00:00:00 2001
From: Taylor Yeonbok Lee <taylor.lee@intel.com>
Date: Fri, 24 Mar 2023 13:10:33 -0700
Subject: [PATCH 090/296] [GPU] Prevent memory reset at runtime allocation for
 dynamic shape, fix wrong padding handling (#16351)

* Prevent memory reset at runtime allocation for dynamic shape

* Set default alloc to reset mem

* Additional fixes :
- If there is any convolution/deconvolution users which requires padded input, enqueue reset buffer when reuse buffer.
- Removed cl finish from gpu_buffer::fill. (Hopefully it should be waited only when needed. Otherwise sync is to be done by event)
- Removed buffer reset from on_execute of nonzero count, which is not needed any more.

* Remove unused API

* Fix tensor offset to project the padding

* Added unittest

* Applied review comment
---
 .../include/intel_gpu/graph/network.hpp       |   7 --
 .../include/intel_gpu/runtime/memory_pool.hpp |   6 +-
 .../graph/include/binary_convolution_inst.h   |  17 +++
 .../src/graph/include/convolution_inst.h      |  17 +++
 .../src/graph/include/deconvolution_inst.h    |  17 +++
 .../src/graph/include/non_zero_inst.h         |   3 -
 .../src/graph/include/primitive_inst.h        |  21 +++-
 src/plugins/intel_gpu/src/graph/network.cpp   |  10 --
 src/plugins/intel_gpu/src/graph/non_zero.cpp  |   4 -
 .../intel_gpu/src/graph/primitive_inst.cpp    |  40 ++++---
 .../intel_gpu/src/kernel_selector/jitter.cpp  |  27 ++++-
 .../intel_gpu/src/runtime/memory_pool.cpp     |  14 +--
 .../intel_gpu/src/runtime/ocl/ocl_engine.cpp  |   2 +-
 .../intel_gpu/src/runtime/ocl/ocl_memory.cpp  |   3 -
 .../dynamic_execution/memory_realloc_test.cpp | 101 ++++++++++++++++++
 15 files changed, 231 insertions(+), 58 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
index 008b174b644f94..8f8b72ca8e4f6d 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -214,13 +214,6 @@ struct network {
     bool is_primary_stream() const { return _is_primary_stream; }
     bool is_dynamic() const { return _is_dynamic; }
 
-    /// Create memory object with specified @p layout and allocation @p type for primitive with @p id
-    /// Underlying memory handle can be reused with other primitives from memory pool based on @p dependencies
-    memory_ptr get_memory_from_pool(const layout& layout,
-                                    primitive_id id,
-                                    std::set<primitive_id> dependencies,
-                                    allocation_type type,
-                                    bool reusable = true);
     memory_pool& get_memory_pool() {
         return *_memory_pool;
     }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
index 767d1b7d7db375..83a01965609018 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp
@@ -106,13 +106,15 @@ class memory_pool {
                           uint32_t network_id,
                           const std::set<primitive_id>& restrictions,
                           allocation_type type,
-                          bool reusable = true);  // get from pool or create memory allocation
+                          bool reusable = true,
+                          bool reset = true);  // get from pool or create memory allocation
     memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true);
     memory_ptr get_from_non_padded_pool(const layout& layout,
                                         const primitive_id& id,
                                         uint32_t network_id,
                                         const std::set<primitive_id>&,
-                                        allocation_type type);
+                                        allocation_type type,
+                                        bool reset = true);
     memory_ptr get_from_padded_pool(const layout& layout,
                                     const primitive_id& id,
                                     uint32_t network_id,
diff --git a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h
index 73f5a4138e67bc..4ec2f8dcf32da8 100644
--- a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h
@@ -42,6 +42,23 @@ class typed_primitive_inst<binary_convolution> : public typed_primitive_inst_bas
     static std::string to_string(binary_convolution_node const& node);
     typed_primitive_inst(network& network, binary_convolution_node const& node);
 
+    bool need_reset_input_memory() const override {
+        auto input_layout = _deps[0].first->_impl_params->get_output_layout(0);
+        if (input_layout.data_padding) {
+            return true;
+        }
+        return false;
+    }
+
+    bool need_reset_output_memory() const override {
+        bool res = parent::need_reset_output_memory();
+        auto output_layout = _impl_params->get_output_layout(0);
+        if (output_layout.data_padding) {
+            return true;
+        }
+        return res;
+    }
+
     memory::ptr weights_memory() const { return dep_memory_ptr(1); }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
index 27b017eec33f10..498d0b8d38ad06 100644
--- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
@@ -127,6 +127,23 @@ class typed_primitive_inst<convolution> : public typed_primitive_inst_base<convo
     static layout calc_output_layout(convolution_node const& node, kernel_impl_params const& impl_param);
     static std::string to_string(convolution_node const& node);
 
+    bool need_reset_input_memory() const override {
+        auto input_layout = _deps[0].first->_impl_params->get_output_layout(0);
+        if (input_layout.data_padding) {
+            return true;
+        }
+        return false;
+    }
+
+    bool need_reset_output_memory() const override {
+        bool res = parent::need_reset_output_memory();
+        auto output_layout = _impl_params->get_output_layout(0);
+        if (output_layout.data_padding) {
+            return true;
+        }
+        return res;
+    }
+
 public:
     typed_primitive_inst(network& network, convolution_node const& node);
 
diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
index 0da98ae316fe16..afbd06a303e5ef 100644
--- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
@@ -60,6 +60,23 @@ class typed_primitive_inst<deconvolution> : public typed_primitive_inst_base<dec
     static layout calc_output_layout(deconvolution_node const& node, kernel_impl_params const& impl_param);
     static std::string to_string(deconvolution_node const& node);
 
+    bool need_reset_input_memory() const override {
+        auto input_layout = _deps[0].first->_impl_params->get_output_layout(0);
+        if (input_layout.data_padding) {
+            return true;
+        }
+        return false;
+    }
+
+    bool need_reset_output_memory() const override {
+        bool res = parent::need_reset_output_memory();
+        auto output_layout = _impl_params->get_output_layout(0);
+        if (output_layout.data_padding) {
+            return true;
+        }
+        return res;
+    }
+
     typed_primitive_inst(network& network, deconvolution_node const& node);
 
     memory::ptr weights_memory() const {
diff --git a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h
index a8248e159310c9..90516f4c740745 100644
--- a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h
@@ -40,9 +40,6 @@ class typed_primitive_inst<count_nonzero> : public typed_primitive_inst_base<cou
     static std::string to_string(count_nonzero_node const& node);
 
     typed_primitive_inst(network& network, count_nonzero_node const& node);
-
-private:
-    void on_execute() override;
 };
 
 using count_nonzero_inst = typed_primitive_inst<count_nonzero>;
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index 8cb23f520f375a..9ec7614a1475ff 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -192,7 +192,7 @@ class primitive_inst {
 
     void allocate_internal_buffers();
     static memory::ptr allocate_output(engine& engine, memory_pool& pool, const program_node& _node,
-                                       const kernel_impl_params& impl_params, uint32_t net_id, bool is_internal, size_t idx = 0);
+            const kernel_impl_params& impl_params, uint32_t net_id, bool is_internal, size_t idx = 0, bool reset_mem = true);
 
     std::vector<memory::cptr> get_intermediates_memories() const { return _intermediates_memory; }
 
@@ -284,7 +284,7 @@ class primitive_inst {
     size_t max_output_layout_size = 0;
     std::vector<size_t> max_intermediates_memory_sizes;
 
-    std::vector<memory::ptr> allocate_outputs(kernel_impl_params* updated_params = nullptr);
+    std::vector<memory::ptr> allocate_outputs(kernel_impl_params* updated_params = nullptr, bool reset_mem = true);
     memory::ptr allocate_internal_buffer(size_t idx);
     static std::vector<std::shared_ptr<primitive_inst>> build_exec_deps(
         std::vector<std::pair<std::shared_ptr<primitive_inst>, int32_t>> const& mem_deps);
@@ -298,7 +298,7 @@ class primitive_inst {
     virtual event::ptr update_weights();
     // if primitive_inst doesn't replace impl to new impl(static impl with opt kerenl or dynamic impl), return false
     bool update_impl();
-    void realloc_if_needed();
+    event::ptr realloc_if_needed();
 
     cldnn::network::ptr get_unfused_subgraph();
 
@@ -332,6 +332,21 @@ class primitive_inst {
         return { layout(in_layout.get<ShapeType>(), output_type, in_layout.format) };
     }
 
+    virtual bool need_reset_input_memory() const {
+        return false;
+    }
+
+    virtual bool need_reset_output_memory() const {
+        std::vector<primitive_id> users;
+        for (auto u : _node->get_users())
+            users.push_back(u->id());
+
+        for (auto u : _network.get_primitives(users)) {
+            if (u->need_reset_input_memory())
+                return true;
+        }
+        return false;
+    }
 
     // This could be implemented via single map std::unordered_map<instrumentation::perf_counter_key, std::tuple<int64_t, size_t>>
     // but the overhead on using perf_counter_key as map key is too big, thus we use hash as map key
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 63d025230fc9ea..35d3c611a08d78 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -1322,16 +1322,6 @@ void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance
     }
 }
 
-memory::ptr network::get_memory_from_pool(const layout& layout,
-                                               primitive_id id,
-                                               std::set<primitive_id> dependencies,
-                                               allocation_type type,
-                                               bool reusable) {
-    if (_config.get_property(ov::intel_gpu::enable_memory_pool))
-        return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable);
-    return _memory_pool->get_memory(layout, type);
-}
-
 network::VariableState& network::get_variable_memory(const std::string &variable_id) {
     auto it = _variables_states.find(variable_id);
     if (it == _variables_states.end()) {
diff --git a/src/plugins/intel_gpu/src/graph/non_zero.cpp b/src/plugins/intel_gpu/src/graph/non_zero.cpp
index 01a9688ca9b6a5..db6489fc3cd82a 100644
--- a/src/plugins/intel_gpu/src/graph/non_zero.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_zero.cpp
@@ -48,10 +48,6 @@ std::string count_nonzero_inst::to_string(count_nonzero_node const& node) {
 
 count_nonzero_inst::typed_primitive_inst(network& network, count_nonzero_node const& node) : parent(network, node) {}
 
-void count_nonzero_inst::on_execute() {
-    output_memory().fill(_network.get_stream(), 0);
-}
-
 // -----------------------------------------------
 // gather_nonzero
 // -----------------------------------------------
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 4acd2d02c808e6..796b4e0b82cd17 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -242,11 +242,11 @@ void primitive_inst::update_shape() {
     }
 }
 
-void primitive_inst::realloc_if_needed() {
+event::ptr primitive_inst::realloc_if_needed() {
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation);
 
-
+    event::ptr ev = nullptr;
     // Update param if fake_alignment is available
     auto updated_params = _node->type()->get_fake_aligned_params(*_impl_params);
     auto actual_layout = updated_params.get_output_layout();
@@ -254,28 +254,31 @@ void primitive_inst::realloc_if_needed() {
 
     // input_layout node is supposed to always use external memory in dynamic case
     if (_node->is_type<input_layout>())
-        return;
+        return ev;
 
     bool can_reuse_buffer = _outputs[0] && actual_layout.count() <= max_output_layout_size;
 
     if (can_reuse_buffer) {
         GPU_DEBUG_TRACE_DETAIL << id() << ": reuse previously allocated output buffer" << std::endl;
         _outputs[0] = _network.get_engine().reinterpret_buffer(*_outputs[0], actual_layout);
+        if (need_reset_output_memory()) {
+            ev = _outputs[0]->fill(_network.get_stream());
+        }
     } else {
         GPU_DEBUG_TRACE_DETAIL << id() << ": realloc output memory. "
                                <<  " Current buffer_size=" << max_output_layout_size
                                <<  " Requested buffer_size=" << actual_layout.count() << std::endl;
-        _outputs = allocate_outputs(&updated_params);
+        _outputs = allocate_outputs(&updated_params, need_reset_output_memory());
         // TODO : need to handle multiple outputs
         max_output_layout_size = updated_params.output_layouts[0].count();
     }
     // intermediate memory allocation is required for primitives consisting of multiple kernels in dynamic case
     {
         if (_impl == nullptr)
-            return;
+            return ev;
         const auto& ibuf_layouts = _impl->get_internal_buffer_layouts();
         if (ibuf_layouts.empty())
-            return;
+            return ev;
 
         for (size_t i = 0; i < ibuf_layouts.size(); ++i) {
             if (i < _intermediates_memory.size() && ibuf_layouts[i].bytes_count() <= max_intermediates_memory_sizes[i]) {
@@ -293,6 +296,7 @@ void primitive_inst::realloc_if_needed() {
             }
         }
     }
+    return ev;
 }
 
 bool primitive_inst::update_impl() {
@@ -431,7 +435,9 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
                 auto ev = update_weights();
                 if (ev)
                     dependencies.push_back(ev);
-                realloc_if_needed();
+                auto ev_reset = realloc_if_needed();
+                if (ev_reset)
+                    dependencies.push_back(ev_reset);
             }
         }
     }
@@ -763,15 +769,15 @@ static bool user_requesting_mem_reuse_false(const program_node& node) {
 }
 
 memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool, const program_node& _node, const kernel_impl_params& impl_params,
-                                            uint32_t net_id, bool is_internal, size_t idx) {
+                                            uint32_t net_id, bool is_internal, size_t idx, bool reset) {
     auto get_memory_from_pool = [&](engine& _engine, const layout& layout, const primitive_id id, std::set<primitive_id> dependencies,
-            allocation_type type, bool reusable) {
+            allocation_type type, bool reusable, bool reset = true) {
         OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound");
         // Use layout with max tensor for dynamic shape with upper bound
         auto static_layout = cldnn::layout(layout.data_type, layout.format, layout.get_tensor(), layout.data_padding);
         if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool))
-            return pool.get_memory(static_layout, id, net_id, dependencies, type, reusable);
-        return pool.get_memory(static_layout, type);
+            return pool.get_memory(static_layout, id, net_id, dependencies, type, reusable, reset);
+        return pool.get_memory(static_layout, type, reset);
     };
 
 
@@ -817,7 +823,8 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
                 _node.id(),
                 _node.get_memory_dependencies(),
                 alloc_type,
-                false);
+                false,
+                reset);
     } else if (is_internal && _node.is_output() && _node.is_type<generic_layer>() &&
             _engine.supports_allocation(allocation_type::usm_device) && usm_device_allocatable) {
         GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
@@ -829,23 +836,24 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
         return _engine.allocate_memory(layout, alloc_type, false);
     } else if (is_internal || (!_node.can_share_buffer()) || _node.can_be_optimized() || _node.is_output()) {
         GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
-        return _engine.allocate_memory(layout, alloc_type);
+        return _engine.allocate_memory(layout, alloc_type, reset);
     } else {
         return get_memory_from_pool(_engine,
                 layout,
                 _node.id(),
                 _node.get_memory_dependencies(),
                 alloc_type,
-                true);
+                true,
+                reset);
     }
 }
 
-std::vector<memory::ptr> primitive_inst::allocate_outputs(kernel_impl_params* updated_params) {
+std::vector<memory::ptr> primitive_inst::allocate_outputs(kernel_impl_params* updated_params, bool reset_mem) {
     std::vector<memory::ptr> outputs;
     for (size_t i = 0; i < get_node().get_outputs_count() ; ++i) {
         outputs.push_back(allocate_output(get_network().get_engine(), _network.get_memory_pool(),
                          *_node, (updated_params != nullptr) ? *updated_params : *_impl_params,
-                         get_network_id(), _network.is_internal(), i));
+                         get_network_id(), _network.is_internal(), i, reset_mem));
     }
     return outputs;
 }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
index 985fe40bf96b7d..9a39bc2001cac1 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
@@ -245,7 +245,6 @@ class TensorBaseTJitConstant : public JitConstant {
 
     JitDefinitions GetDefinitions(const Tensor::TensorBaseT<DType, Layout>& t) const {
         JitDefinitions definitions{
-            {_name + "_OFFSET", toCodeString(t.GetFirstElementOffset())},
             {_name + "_VIEW_OFFSET", toCodeString(t.GetViewOffset())},
             {_name + "_LENGTH", toCodeString(t.LogicalSize())},
             {_name + "_DIMS", toCodeString(t.GetDims().size())},
@@ -258,6 +257,7 @@ class TensorBaseTJitConstant : public JitConstant {
         definitions.insert(definitions.end(), type_defs.begin(), type_defs.end());
 
         if (!t.is_dynamic()) {
+            definitions.push_back({_name + "_OFFSET", toCodeString(t.GetFirstElementOffset())});
             definitions.push_back({_name + "_SIZE", toCodeString(t.GetDims().size())});
             definitions.push_back(
                 {_name + "_SIZES_DATA",
@@ -265,13 +265,34 @@ class TensorBaseTJitConstant : public JitConstant {
             definitions.push_back(
                 {_name + "_PITCHES",
                 toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 1, [](const Tensor::Dim& d) { return d.pitch; })});
+        } else {
+            // calculate tensor offset
+            std::vector<std::string> padded_pitches = {
+                toVectorMulString({_name + "_X_PITCH", _name + "_PAD_BEFORE_SIZE_X"}),
+                toVectorMulString({_name + "_Y_PITCH", _name + "_PAD_BEFORE_SIZE_Y"}),
+                toVectorMulString({_name + "_Z_PITCH", _name + "_PAD_BEFORE_SIZE_Z"}),
+                toVectorMulString({_name + "_W_PITCH", _name + "_PAD_BEFORE_SIZE_W"}),
+                toVectorMulString({_name + "_FEATURE_PITCH", _name + "_PAD_BEFORE_FEATURE_NUM"}),
+                toVectorMulString({_name + "_BATCH_PITCH", _name + "_PAD_BEFORE_BATCH_NUM"})};
+            std::string offset_str = "(";
+            for (size_t i = 0; i < padded_pitches.size(); ++i) {
+                offset_str += padded_pitches[i];
+                if (i < padded_pitches.size() - 1)
+                    offset_str += " + ";
+            }
+            offset_str += ")";
+            definitions.push_back({_name + "_OFFSET", offset_str});
         }
         definitions.push_back(
             {_name + "_PAD_BEFORE",
-             toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.before; })});
+             toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) {
+                 return d.pad.before;
+             })});
         definitions.push_back(
             {_name + "_PAD_AFTER",
-             toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.after; })});
+             toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) {
+                 return d.pad.after;
+             })});
 
         return definitions;
     }
diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
index 0f18e5ea0b8d56..597f2b6c002874 100644
--- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
@@ -120,7 +120,8 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
                                                   const primitive_id& id,
                                                   uint32_t network_id,
                                                   const std::set<primitive_id>& restrictions,
-                                                  allocation_type type) {
+                                                  allocation_type type,
+                                                  bool reset) {
     auto it = _non_padded_pool.lower_bound(layout.bytes_count());
     while (it != _non_padded_pool.end()) {
         if (it->second._network_id == network_id &&
@@ -139,7 +140,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
     }
     GPU_DEBUG_LOG << "[" << id << ": output]" << std::endl;
     // didn't find anything for you? create new resource
-    auto mem = alloc_memory(layout, type);
+    auto mem = alloc_memory(layout, type, reset);
     {
         _non_padded_pool.emplace(layout.bytes_count(),
                                  memory_record({{id, network_id}}, mem, network_id, type));
@@ -221,21 +222,22 @@ memory::ptr memory_pool::get_memory(const layout& layout,
                                     uint32_t network_id,
                                     const std::set<primitive_id>& restrictions,
                                     allocation_type type,
-                                    bool reusable_across_network) {
+                                    bool reusable_across_network,
+                                    bool reset) {
     if (reusable_across_network) {
         // reusable within the same network
         if (!layout.format.is_image() && layout.data_padding == padding{{0, 0, 0, 0}, 0}) {
             // non-padded buffers
-            return get_from_non_padded_pool(layout, id, network_id, restrictions, type);
+            return get_from_non_padded_pool(layout, id, network_id, restrictions, type, reset);
         } else if (!layout.format.is_image()) {
             // padded buffers
             return get_from_padded_pool(layout, id, network_id, restrictions, type);
         } else {
             // images (reuse not yet implemented)
-            return alloc_memory(layout, type);
+            return alloc_memory(layout, type, reset);
         }
     } else {
-        return alloc_memory(layout, type);
+        return alloc_memory(layout, type, reset);
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
index bf9bf94d50f5a8..1fe556af93b70c 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@@ -151,7 +151,7 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty
         }
 
         if (reset || res->is_memory_reset_needed(layout)) {
-            res->fill(get_service_stream());
+            get_service_stream().wait_for_events({res->fill(get_service_stream())});
         }
 
         return res;
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
index 20a9c871b142cc..47405125ce6997 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
@@ -73,9 +73,6 @@ event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern) {
     cl::Event& ev_ocl = downcast<ocl_event>(ev.get())->get();
     cl_stream.get_cl_queue().enqueueFillBuffer<unsigned char>(_buffer, pattern, 0, size(), nullptr, &ev_ocl);
 
-    // TODO: do we need sync here?
-    cl_stream.finish();
-
     return ev;
 }
 
diff --git a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
index 682b9dc68c5646..850b4233e458bc 100644
--- a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
+++ b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp
@@ -6,6 +6,7 @@
 
 #include <intel_gpu/primitives/input_layout.hpp>
 #include <intel_gpu/primitives/softmax.hpp>
+#include <intel_gpu/primitives/reorder.hpp>
 #include <intel_gpu/primitives/data.hpp>
 
 #include "softmax_inst.h"
@@ -19,6 +20,106 @@ using namespace cldnn;
 using namespace ::tests;
 
 namespace memory_realloc_tests {
+TEST(memory_reuse_realloc_reset_test, basic_conv_with_padding) {
+    auto& engine = get_test_engine();
+
+    layout weight_layout = layout{ov::PartialShape{1, 3, 3, 3}, data_types::f16, format::bfyx};
+
+    auto weights = engine.allocate_memory(weight_layout);
+    set_values<FLOAT16>(weights, {
+            1.0f, 1.0f, 1.0f,
+            1.0f, 1.0f, 1.0f,
+            1.0f, 1.0f, 1.0f,
+            //
+            2.0f, 2.0f, 2.0f,
+            2.0f, 2.0f, 2.0f,
+            2.0f, 2.0f, 2.0f,
+            //
+            3.0f, 3.0f, 3.0f,
+            3.0f, 3.0f, 3.0f,
+            3.0f, 3.0f, 3.0f,
+    });
+
+    layout input_layout_1 = layout{ov::PartialShape{1, 3, 5, 5}, data_types::f32, format::bfyx};
+    auto input_mem_1 = engine.allocate_memory(input_layout_1);
+    set_values(input_mem_1, {
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         //
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         //
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                         1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                        });
+
+    std::vector<float> ref_output_1 = {6,   18,  36, 54,  72,  54,  30,  12,  36, 72, 108, 144, 108,
+                                       60,  18,  54, 108, 162, 216, 162, 90,  18, 54, 108, 162, 216,
+                                       162, 90,  18, 54,  108, 162, 216, 162, 90, 12, 36,  72,  108,
+                                       144, 108, 60, 6,   18,  36,  54,  72,  54, 30};
+
+    layout input_layout_2 = layout{ov::PartialShape{1, 3, 2, 2}, data_types::f32, format::bfyx};
+    auto input_mem_2 = engine.allocate_memory(input_layout_2);
+    set_values(input_mem_2, {11.0f,  11.0f, 11.0f, 11.0f,
+                             11.0f,  11.0f, 11.0f, 11.0f,
+                             11.0f,  11.0f, 11.0f, 11.0f});
+    std::vector<float> ref_output_2 = { 66, 132, 132, 66, 132, 264, 264, 132, 132, 264, 264, 132, 66, 132, 132, 66};
+     std::vector<float> values_to_subtract = {};
+    auto input_l = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
+    topology topology(input_layout("input", input_l),
+                      data("weights", weights),
+                      reorder("reorder", input_info("input"), format::bfyx, data_types::f16, 
+                      values_to_subtract, reorder_mean_mode::subtract, padding{{0, 0, 2, 2}, 0}),
+                      convolution("conv",
+                                  input_info("reorder"),
+                                  {"weights"},
+                                  {},     /*bias*/
+                                  {1, 1}, /*stride*/
+                                  {2, 2}, /*pad*/
+                                  {1, 1}, /*dilation*/
+                                  {2, 2},  /*pad_above*/
+                                  {2, 2},  /*pad_below*/
+                                  padding{{0, 0, 0, 0}, 0}),
+                      reorder("output", input_info("conv"), format::bfyx, data_types::f32)); /*output padding*/
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+
+    network network(engine, topology, config);
+    network.set_input_data("input", input_mem_1);
+    auto outputs_1 = network.execute();
+    network.set_input_data("input", input_mem_2);
+    auto outputs_2 = network.execute();
+    auto output_mem_2 = outputs_2.begin()->second.get_memory();
+    cldnn::mem_lock<float> output_mem_2_ptr(output_mem_2, get_test_stream());
+    for (size_t i = 0; i < output_mem_2->get_layout().get_buffer_size().count(); ++i) {
+        ASSERT_EQ(output_mem_2_ptr[i], ref_output_2[i]);
+    }
+    // check padding of second run of reorder
+    // 0, 0, 0,  0,  0, 0,  
+    // 0, 0, 0,  0,  0, 0, 
+    // 0, 0, 11, 11, 0, 0, 
+    // 0, 0, 11, 11, 0, 0, 
+    // 0, 0,"0","0","0","0", // !! check pad_after
+    // 0, 0,"0","0","0","0", // !! check pad_after
+    auto reorder_mem = network.get_primitive("reorder")->output_memory_ptr();
+    cldnn::mem_lock<FLOAT16, mem_lock_type::read> reorder_mem_ptr(reorder_mem, get_test_stream());
+    for (size_t i = 26; i < 29; ++i) {
+        ASSERT_EQ((float)reorder_mem_ptr[i], 0.f);
+    }
+    for (size_t i = 32; i < 35; ++i) {
+        ASSERT_EQ((float)reorder_mem_ptr[i], 0.f);
+    }
+}
 
 TEST(softmax_gpu_dynamic_f32_test_upper_bound, input_same_values) {
     static const int32_t

From 580b99c99b000edcdea46d23d62b3bbba980f0a6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Sat, 25 Mar 2023 00:26:18 +0400
Subject: [PATCH 091/296] Align plugins in caching properties (#16528)

* Align plugins in caching properties

* Fixed caching mock tests

* Added new TestNoCachingProperties test

* Fixed test

* Added ov::caching_properties to API 1.0 metrics as well
---
 src/inference/dev_api/ie_icore.hpp            |  6 +-
 src/inference/src/dev/core_impl.cpp           | 75 ++++++++--------
 src/inference/src/dev/core_impl.hpp           |  8 +-
 src/inference/src/dev/core_impl_ie.cpp        |  4 +-
 .../tests/functional/caching_test.cpp         | 85 ++++++++++++++++++-
 src/plugins/hetero/executable_network.cpp     |  4 +-
 src/plugins/hetero/plugin.cpp                 |  1 +
 src/plugins/intel_cpu/src/plugin.cpp          |  6 +-
 .../src/plugin/legacy_api_helper.cpp          |  2 +
 src/plugins/template/src/plugin.cpp           |  5 ++
 .../cpp_interfaces/interface/mock_icore.hpp   |  2 +-
 11 files changed, 143 insertions(+), 55 deletions(-)

diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp
index 03c298af681f72..8852c1f4ecd8c9 100644
--- a/src/inference/dev_api/ie_icore.hpp
+++ b/src/inference/dev_api/ie_icore.hpp
@@ -173,13 +173,13 @@ class ICore : public ov::ICore {
     virtual std::vector<std::string> GetAvailableDevices() const = 0;
 
     /**
-     * @brief Checks whether device supports Export & Import functionality of network
+     * @brief Checks whether device supports model caching feature
      *
      * @param deviceName - A name of a device to get a metric value.
-     * @return True if device has IMPORT_EXPORT_SUPPORT metric in SUPPORTED_METRICS and
+     * @return True if device has IMPORT_EXPORT_SUPPORT and CACHING_PROPERTIES metric in SUPPORTED_METRICS and
      * this metric returns 'true', False otherwise.
      */
-    virtual bool DeviceSupportsImportExport(const std::string& deviceName) const = 0;
+    virtual bool DeviceSupportsModelCaching(const std::string& deviceName) const = 0;
 
     /**
      * @brief Create a new shared context object on specified accelerator device
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 3e696487e9d6a1..6af7aec1d6c540 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -299,8 +299,8 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, const An
     };
 
     // clean-up auto-batch related properties
-    clean_batch_properties(updated_device_name, updated_config, ov::hint::allow_auto_batching.name());
-    clean_batch_properties(updated_device_name, updated_config, ov::auto_batch_timeout.name());
+    clean_batch_properties(updated_device_name, updated_config, ov::hint::allow_auto_batching);
+    clean_batch_properties(updated_device_name, updated_config, ov::auto_batch_timeout);
 
     return {updated_device_name, updated_config};
 }
@@ -558,7 +558,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
-    if (cacheManager && device_supports_import_export(plugin)) {
+    if (cacheManager && device_supports_model_caching(plugin)) {
         CacheContent cacheContent{cacheManager};
         cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
         auto lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -587,7 +587,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
     auto plugin = get_plugin(parsed._deviceName);
     ov::SoPtr<ov::ICompiledModel> res;
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
-    if (cacheManager && device_supports_import_export(plugin)) {
+    if (cacheManager && device_supports_model_caching(plugin)) {
         CacheContent cacheContent{cacheManager};
         cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
         auto lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -629,7 +629,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
     ov::SoPtr<ov::ICompiledModel> compiled_model;
 
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
-    if (cacheManager && device_supports_import_export(plugin)) {
+    if (cacheManager && device_supports_model_caching(plugin)) {
         CacheContent cacheContent{cacheManager, model_path};
         cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config));
         auto lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -660,7 +660,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
     ov::SoPtr<ov::ICompiledModel> compiled_model;
 
     auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
-    if (cacheManager && device_supports_import_export(plugin)) {
+    if (cacheManager && device_supports_model_caching(plugin)) {
         CacheContent cacheContent{cacheManager};
         cacheContent.blobId =
             ov::ModelCache::compute_hash(model_str, weights, create_compile_config(plugin, parsed._config));
@@ -1121,25 +1121,26 @@ const std::vector<InferenceEngine::IExtensionPtr>& ov::CoreImpl::GetExtensions()
     return extensions;
 }
 
-bool ov::CoreImpl::device_supports_import_export(const std::string& deviceName) const {
+bool ov::CoreImpl::device_supports_model_caching(const std::string& deviceName) const {
     auto parsed = parseDeviceNameIntoConfig(deviceName);
-    return device_supports_import_export(get_plugin(parsed._deviceName));
+    return device_supports_model_caching(get_plugin(parsed._deviceName));
 }
 
-bool ov::CoreImpl::device_supports_property(const ov::Plugin& plugin, const std::string& key) const {
+bool ov::CoreImpl::device_supports_property(const ov::Plugin& plugin, const ov::PropertyName& key) const {
     return util::contains(plugin.get_property(ov::supported_properties), key);
 }
 
-bool ov::CoreImpl::device_supports_import_export(const ov::Plugin& plugin) const {
+bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin) const {
     auto supportedMetricKeys = plugin.get_property(METRIC_KEY(SUPPORTED_METRICS), {}).as<std::vector<std::string>>();
-    auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT));
-    auto supported =
-        (it != supportedMetricKeys.end()) && plugin.get_property(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}).as<bool>();
+    auto supported = util::contains(supportedMetricKeys, METRIC_KEY(IMPORT_EXPORT_SUPPORT)) &&
+                     plugin.get_property(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}).as<bool>();
     if (!supported) {
-        if (device_supports_property(plugin, ov::device::capabilities.name())) {
-            supported =
-                util::contains(plugin.get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT);
-        }
+        supported =
+            device_supports_property(plugin, ov::device::capabilities) &&
+            util::contains(plugin.get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT);
+    }
+    if (supported) {
+        supported = device_supports_property(plugin, ov::caching_properties);
     }
     return supported;
 }
@@ -1156,7 +1157,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
     OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CoreImpl::compile_model_and_cache");
     ov::SoPtr<ov::ICompiledModel> execNetwork;
     execNetwork = compile_model_with_preprocess(plugin, model, context, parsedConfig);
-    if (cacheContent.cacheManager && device_supports_import_export(plugin)) {
+    if (cacheContent.cacheManager && device_supports_model_caching(plugin)) {
         try {
             // need to export network for further import from "cache"
             OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::compile_model::Export");
@@ -1227,15 +1228,14 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
 
 ov::AnyMap ov::CoreImpl::create_compile_config(const ov::Plugin& plugin, const ov::AnyMap& user_config) const {
     ov::AnyMap property_config;
-    ov::AnyMap compile_config;
 
-    // 0. Move TARGET_FALLBACK key to property_config
-    auto targetFallbackIt = user_config.find("TARGET_FALLBACK");
-    if (targetFallbackIt == user_config.end()) {
-        targetFallbackIt = user_config.find(ov::device::priorities.name());
+    // 0. Move ov::device::priorities / TARGET_FALLBACK key to property_config
+    auto device_priorities_it = user_config.find("TARGET_FALLBACK");
+    if (device_priorities_it == user_config.end()) {
+        device_priorities_it = user_config.find(ov::device::priorities.name());
     }
-    if (targetFallbackIt != user_config.end()) {
-        property_config[targetFallbackIt->first] = targetFallbackIt->second.as<std::string>();
+    if (device_priorities_it != user_config.end()) {
+        property_config[device_priorities_it->first] = device_priorities_it->second.as<std::string>();
     }
 
     // 1. Move DEVICE_ID key to property_config
@@ -1248,24 +1248,17 @@ ov::AnyMap ov::CoreImpl::create_compile_config(const ov::Plugin& plugin, const o
         // for the default device (e.g. DEVICE_ID = 0 for GPU)
     }
 
-    // 2. Replace DEVICE_ID with DEVICE_ARCHITECTURE value to identify device
-    if (device_supports_property(plugin, ov::device::architecture.name())) {
-        compile_config[ov::device::architecture.name()] =
-            plugin.get_property(ov::device::architecture, property_config);
-    } else {
-        // Take device name if device does not support DEVICE_ARCHITECTURE metric
-        compile_config[ov::device::architecture.name()] = plugin.get_name();
-    }
+    // 2. Extract config keys which affect compilation process
+    auto caching_props = plugin.get_property(ov::caching_properties);
+    OPENVINO_ASSERT(!caching_props.empty(), "ov::caching_properties returned by ", plugin.get_name(), " are empty");
 
-    // 3. Extract config keys which affect compilation process
-    if (device_supports_property(plugin, ov::caching_properties.name())) {
-        auto cachingProps = plugin.get_property(ov::caching_properties);
-        for (const auto& prop : cachingProps) {
-            // user_config values have higher priority than plugin parameters
-            auto it = user_config.find(prop);
-            compile_config[prop] = it == user_config.end() ? plugin.get_property(prop, property_config) : it->second;
-        }
+    ov::AnyMap compile_config;
+    for (const auto& prop : caching_props) {
+        // user_config values have higher priority than plugin parameters
+        auto it = user_config.find(prop);
+        compile_config[prop] = it == user_config.end() ? plugin.get_property(prop, property_config) : it->second;
     }
+
     return compile_config;
 }
 
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index c711e416484050..179b59cc671c26 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -162,9 +162,9 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
         const ov::RemoteContext& context,
         std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda);
 
-    bool device_supports_import_export(const ov::Plugin& plugin) const;
+    bool device_supports_model_caching(const ov::Plugin& plugin) const;
 
-    bool device_supports_property(const ov::Plugin& plugin, const std::string& key) const;
+    bool device_supports_property(const ov::Plugin& plugin, const ov::PropertyName& key) const;
 
     OPENVINO_DEPRECATED("Don't use this method, it will be removed soon")
     bool device_supports_cache_dir(const ov::Plugin& plugin) const;
@@ -297,7 +297,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
      */
     const std::vector<InferenceEngine::IExtensionPtr>& GetExtensions() const;
 
-    bool DeviceSupportsImportExport(const std::string& deviceName) const override;
+    bool DeviceSupportsModelCaching(const std::string& deviceName) const override;
 
     std::map<std::string, InferenceEngine::Version> GetVersions(const std::string& deviceName) const;
 
@@ -341,7 +341,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
 
     void add_extension(const std::vector<ov::Extension::Ptr>& extensions);
 
-    bool device_supports_import_export(const std::string& deviceName) const;
+    bool device_supports_model_caching(const std::string& deviceName) const;
 
     // ov::ICore
     std::shared_ptr<ov::Model> read_model(const std::string& model,
diff --git a/src/inference/src/dev/core_impl_ie.cpp b/src/inference/src/dev/core_impl_ie.cpp
index a04d46081040a7..126416517974b9 100644
--- a/src/inference/src/dev/core_impl_ie.cpp
+++ b/src/inference/src/dev/core_impl_ie.cpp
@@ -214,8 +214,8 @@ void ov::CoreImpl::AddExtension(const InferenceEngine::IExtensionPtr& extension)
     AddExtensionUnsafe(extension);
 }
 
-bool ov::CoreImpl::DeviceSupportsImportExport(const std::string& deviceName) const {
-    return device_supports_import_export(deviceName);
+bool ov::CoreImpl::DeviceSupportsModelCaching(const std::string& deviceName) const {
+    return device_supports_model_caching(deviceName);
 }
 
 std::map<std::string, std::string> ov::CoreImpl::GetSupportedConfig(const std::string& deviceName,
diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp
index e23c6c14d716f5..531c115a32fb91 100644
--- a/src/inference/tests/functional/caching_test.cpp
+++ b/src/inference/tests/functional/caching_test.cpp
@@ -371,6 +371,7 @@ class CachingTest : public ::testing::TestWithParam<std::tuple<TestParam, std::s
             .WillByDefault(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
                 std::vector<std::string> res;
                 res.emplace_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT));
+                res.emplace_back(ov::caching_properties.name());
                 res.emplace_back(METRIC_KEY(DEVICE_ARCHITECTURE));
                 return res;
             }));
@@ -379,6 +380,7 @@ class CachingTest : public ::testing::TestWithParam<std::tuple<TestParam, std::s
                 return std::vector<ov::PropertyName>{ov::supported_properties.name(),
                                                      METRIC_KEY(IMPORT_EXPORT_SUPPORT),
                                                      ov::device::capabilities.name(),
+                                                     ov::caching_properties.name(),
                                                      ov::device::architecture.name()};
             }));
 
@@ -404,6 +406,12 @@ class CachingTest : public ::testing::TestWithParam<std::tuple<TestParam, std::s
                 return "mock";
             }));
 
+        ON_CALL(plugin, GetMetric(ov::caching_properties.name(), _))
+            .WillByDefault(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
+                std::vector<ov::PropertyName> cachingProperties = {ov::device::architecture.name()};
+                return decltype(ov::caching_properties)::value_type(cachingProperties);
+            }));
+
         ON_CALL(plugin, ImportNetwork(_, _, _))
             .WillByDefault(Invoke(
                 [&](std::istream& istr, const RemoteContext::Ptr&, const std::map<std::string, std::string>& config) {
@@ -503,6 +511,7 @@ TEST_P(CachingTest, TestLoad) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -541,6 +550,7 @@ TEST_P(CachingTest, TestLoad_by_device_name) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -579,6 +589,7 @@ TEST_P(CachingTest, TestLoadCustomImportExport) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     ON_CALL(*mockPlugin, ImportNetwork(_, _, _))
         .WillByDefault(
             Invoke([&](std::istream& s, const RemoteContext::Ptr&, const std::map<std::string, std::string>&) {
@@ -703,6 +714,7 @@ TEST_P(CachingTest, TestChangeLoadConfig_With_Cache_Dir_inline) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     ON_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _))
         .WillByDefault(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
             return std::vector<std::string>{};
@@ -741,6 +753,7 @@ TEST_P(CachingTest, TestNoCacheEnabled) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -763,6 +776,7 @@ TEST_P(CachingTest, TestNoCacheSupported) {
         .Times(AnyNumber())
         .WillRepeatedly(Return(false));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(ov::device::capabilities.name(), _))
         .Times(AnyNumber())
         .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{}));
@@ -793,6 +807,7 @@ TEST_P(CachingTest, TestNoCacheMetricSupported) {
         .WillRepeatedly(Return(std::vector<std::string>{}));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(ov::device::capabilities.name(), _)).Times(0);
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
@@ -821,6 +836,7 @@ TEST_P(CachingTest, TestNoCacheMetricSupported_by_device_name) {
         .WillRepeatedly(Return(std::vector<std::string>{}));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(ov::device::capabilities.name(), _)).Times(0);
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
@@ -1008,6 +1024,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDir) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1046,6 +1063,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDirOneCore) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, SetConfig(_))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([](const std::map<std::string, std::string>& config) {
@@ -1082,6 +1100,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDirOneCore_overwrite_device_dir) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, SetConfig(_))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([](const std::map<std::string, std::string>& config) {
@@ -1130,6 +1149,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDirOneCore_SupportsCacheDir_NoImportExpor
         .Times(AnyNumber())
         .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{}));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     std::string set_cache_dir = {};
     EXPECT_CALL(*mockPlugin, SetConfig(_))
         .Times(AtLeast(2))
@@ -1167,6 +1187,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDirOneCore_by_device_name) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, SetConfig(_))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([](const std::map<std::string, std::string>& config) {
@@ -1212,6 +1233,7 @@ TEST_P(CachingTest, TestLoadChangeCacheDirOneCore_by_device_name_supports_cache_
         .Times(AnyNumber())
         .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{}));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, SetConfig(_))
         .Times(AtLeast(2))
         .WillRepeatedly(Invoke([](const std::map<std::string, std::string>& config) {
@@ -1247,6 +1269,7 @@ TEST_P(CachingTest, TestClearCacheDir) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(0);
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1270,6 +1293,7 @@ TEST_P(CachingTest, TestChangeOtherConfig) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1294,6 +1318,7 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1335,6 +1360,7 @@ TEST_P(CachingTest, TestCacheDirCreateRecursive) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1358,6 +1384,7 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     EXPECT_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>& options) {
@@ -1433,7 +1460,12 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     EXPECT_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
-            return std::vector<ov::PropertyName>{ov::device::capabilities.name()};
+            return std::vector<ov::PropertyName>{ov::device::capabilities.name(), ov::caching_properties.name()};
+        }));
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _))
+        .Times(AnyNumber())
+        .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
+            return std::vector<ov::PropertyName>{ov::supported_properties};
         }));
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _))
         .Times(AnyNumber())
@@ -1476,12 +1508,48 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     }
 }
 
+TEST_P(CachingTest, TestNoCachingProperties) {
+    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _))
+        .Times(AnyNumber())
+        .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
+            return std::vector<ov::PropertyName>{ov::device::capabilities.name()};
+        }));
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _))
+        .Times(AnyNumber())
+        .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>&) {
+            return std::vector<std::string>{METRIC_KEY(IMPORT_EXPORT_SUPPORT)};
+        }));
+    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::device::capabilities.name(), _))
+        .Times(AnyNumber())
+        .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{ov::device::capability::EXPORT_IMPORT}));
+    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(0);
+    {
+        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(m_type == TestLoadType::EModelName ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        m_post_mock_net_callbacks.emplace_back([&](MockExecutableNetwork& net) {
+            EXPECT_CALL(net, Export(_)).Times(0);
+        });
+        testLoad([&](Core& ie) {
+            deviceToLoad = "mock.0";
+            ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
+            m_testFunction(ie);
+        });
+    }
+}
+
 TEST_P(CachingTest, TestThrowOnExport) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1505,6 +1573,7 @@ TEST_P(CachingTest, TestThrowOnImport) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1558,6 +1627,7 @@ TEST_P(CachingTest, TestNetworkModified) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
@@ -1617,6 +1687,7 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
 
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
@@ -1673,6 +1744,7 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
 
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
@@ -1854,6 +1926,7 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
 
 TEST_P(CachingTest, LoadHetero_MultiArchs) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
 
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _))
         .Times(AnyNumber())
@@ -1938,6 +2011,7 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
 TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>& options) {
@@ -2006,6 +2080,7 @@ TEST_P(CachingTest, LoadAUTO_OneDevice) {
     const auto TEST_COUNT = 2;
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Auto plugin
@@ -2033,6 +2108,7 @@ TEST_P(CachingTest, LoadAUTOWithConfig) {
     const auto TEST_COUNT = 2;
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Auto plugin
@@ -2064,6 +2140,7 @@ TEST_P(CachingTest, LoadAUTO_OneDeviceNoImportExport) {
     EXPECT_CALL(*mockPlugin, GetMetric(ov::device::capabilities.name(), _))
         .Times(AnyNumber())
         .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{}));
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Auto plugin
@@ -2097,6 +2174,7 @@ TEST_P(CachingTest, LoadMulti_race) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Multi plugin
     }
@@ -2137,6 +2215,7 @@ TEST_P(CachingTest, LoadMultiWithConfig_race) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Multi plugin
     }
@@ -2174,6 +2253,7 @@ TEST_P(CachingTest, LoadMulti_Archs) {
     const auto TEST_DEVICE_MAX_COUNT = 30;  // Shall be >= 2
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _))
         .Times(AnyNumber())
         .WillRepeatedly(Invoke([&](const std::string&, const std::map<std::string, Parameter>& options) {
@@ -2230,6 +2310,7 @@ TEST_P(CachingTest, LoadMulti_NoCachingOnDevice) {
         .WillRepeatedly(Return(decltype(ov::device::capabilities)::value_type{}));
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
 
     DataPtr inData = std::make_shared<Data>("Param_1", Precision::FP32);
     InputInfo inpInfo;
@@ -2284,6 +2365,7 @@ TEST_P(CachingTest, LoadBATCHWithConfig) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Auto plugin
     }
@@ -2313,6 +2395,7 @@ TEST_P(CachingTest, Load_threads) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
     if (m_remoteContext) {
         return;  // skip the remote Context test for Multi plugin
     }
diff --git a/src/plugins/hetero/executable_network.cpp b/src/plugins/hetero/executable_network.cpp
index 08bc69bc0413bb..7e16133890bd16 100644
--- a/src/plugins/hetero/executable_network.cpp
+++ b/src/plugins/hetero/executable_network.cpp
@@ -498,7 +498,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
         InferenceEngine::SoExecutableNetworkInternal executableNetwork;
         CNNNetwork cnnnetwork;
         bool loaded = false;
-        if (_heteroPlugin->GetCore()->DeviceSupportsImportExport(deviceName)) {
+        if (_heteroPlugin->GetCore()->DeviceSupportsModelCaching(deviceName)) {
             executableNetwork = _heteroPlugin->GetCore()->ImportNetwork(heteroModel, deviceName, loadConfig);
         } else {
             // read XML content
@@ -699,7 +699,7 @@ void HeteroExecutableNetwork::Export(std::ostream& heteroModel) {
     heteroModel << std::endl;
 
     for (auto&& subnetwork : _networks) {
-        if (_heteroPlugin->GetCore()->DeviceSupportsImportExport(subnetwork._device)) {
+        if (_heteroPlugin->GetCore()->DeviceSupportsModelCaching(subnetwork._device)) {
             subnetwork._network->Export(heteroModel);
         } else {
             auto subnet = subnetwork._clonedNetwork;
diff --git a/src/plugins/hetero/plugin.cpp b/src/plugins/hetero/plugin.cpp
index 78b3c78eb3b40d..9ac6619b00f499 100644
--- a/src/plugins/hetero/plugin.cpp
+++ b/src/plugins/hetero/plugin.cpp
@@ -177,6 +177,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                       ov::device::full_name.name(),
                                                       METRIC_KEY(SUPPORTED_CONFIG_KEYS),
                                                       METRIC_KEY(IMPORT_EXPORT_SUPPORT),
+                                                      ov::caching_properties.name(),
                                                       ov::device::capabilities.name()});
     } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, getSupportedConfigKeys());
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 757537bcdec6f1..7aba2c8e0f63bc 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -534,6 +534,7 @@ Parameter Engine::GetMetricLegacy(const std::string& name, const std::map<std::s
             METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS),
             METRIC_KEY(RANGE_FOR_STREAMS),
             METRIC_KEY(IMPORT_EXPORT_SUPPORT),
+            ov::caching_properties.name(),
         };
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
@@ -565,6 +566,9 @@ Parameter Engine::GetMetricLegacy(const std::string& name, const std::map<std::s
         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
     } else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) {
         IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true);
+    } else if (name == ov::caching_properties) {
+        std::vector<ov::PropertyName> cachingProperties = { METRIC_KEY(FULL_DEVICE_NAME) };
+        return decltype(ov::caching_properties)::value_type(cachingProperties);
     }
 
     IE_CPU_PLUGIN_THROW() << "Unsupported metric key: " << name;
@@ -631,7 +635,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
         const std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
         return decltype(ov::range_for_streams)::value_type(range);
     } else if (name == ov::caching_properties) {
-        std::vector<ov::PropertyName> cachingProperties;
+        std::vector<ov::PropertyName> cachingProperties = { ov::device::full_name };
         return decltype(ov::caching_properties)::value_type(cachingProperties);
     }
     /* Internally legacy parameters are used with new API as part of migration procedure.
diff --git a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
index 2108d800edd5b8..56c92bc84bc457 100644
--- a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
+++ b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
@@ -4,6 +4,7 @@
 
 #include "intel_gpu/plugin/legacy_api_helper.hpp"
 #include "ie_plugin_config.hpp"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "gpu/gpu_config.hpp"
 
 namespace ov {
@@ -262,6 +263,7 @@ std::vector<std::string> LegacyAPIHelper::get_supported_metrics() {
         GPU_METRIC_KEY(UARCH_VERSION),
         GPU_METRIC_KEY(EXECUTION_UNITS_COUNT),
         GPU_METRIC_KEY(MEMORY_STATISTICS),
+        ov::caching_properties.name(),
     };
 
     return supported_metrics;
diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp
index 6747d88cb8eba5..2c66fa25ae4c0d 100644
--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 #include "ie_plugin_config.hpp"
 #include "itt.hpp"
@@ -224,6 +225,7 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const
                                                     ov::device::full_name,
                                                     ov::device::architecture,
                                                     ov::device::capabilities,
+                                                    ov::caching_properties,
                                                     ov::range_for_async_infer_requests};
         return ro_properties;
     };
@@ -281,6 +283,9 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const
         // TODO: return device architecture for device specified by DEVICE_ID config
         std::string arch = "TEMPLATE";
         return decltype(ov::device::architecture)::value_type(arch);
+    } else if (ov::caching_properties == name) {
+        std::vector<ov::PropertyName> caching_properties = {ov::device::architecture};
+        return decltype(ov::caching_properties)::value_type(caching_properties);
     } else if (ov::device::capabilities == name) {
         // TODO: fill actual list of supported capabilities: e.g. Template device supports only FP32 and EXPORT_IMPORT
         std::vector<std::string> capabilities = {ov::device::capability::FP32, ov::device::capability::EXPORT_IMPORT};
diff --git a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
index b59e56a850e117..530bd9e1a7c4e1 100644
--- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
+++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
@@ -59,7 +59,7 @@ class MockICore : public InferenceEngine::ICore {
     MOCK_CONST_METHOD2(GetConfig, ov::Any(const std::string&, const std::string&));
     MOCK_CONST_METHOD3(get_property, ov::Any(const std::string&, const std::string&, const ov::AnyMap&));
     MOCK_CONST_METHOD0(GetAvailableDevices, std::vector<std::string>());
-    MOCK_CONST_METHOD1(DeviceSupportsImportExport, bool(const std::string&));  // NOLINT not a cast to bool
+    MOCK_CONST_METHOD1(DeviceSupportsModelCaching, bool(const std::string&));  // NOLINT not a cast to bool
     MOCK_METHOD2(GetSupportedConfig,
                  std::map<std::string, std::string>(const std::string&, const std::map<std::string, std::string>&));
     MOCK_CONST_METHOD2(get_supported_property,

From a96da994ec865b9029e35cf5bf21db04a2881239 Mon Sep 17 00:00:00 2001
From: Fang Xu <fang.xu@intel.com>
Date: Sat, 25 Mar 2023 14:16:43 +0530
Subject: [PATCH 092/296] Update prebuilt oneTBB2021.2.1 (#16548)

*update prebuilt oneTBB2021.2.1

*modify tbb and tbb component installation

*modify the implementation of removing soft links

*update prebuilt oneTBB2021.2.1
macos: 11.4
windows: win10+visual studio 2019(MSVC 14.21)
https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-2.8.0.tar.gz
https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-2.8.0.zip
https://github.com/oneapi-src/oneTBB/archive/refs/tags/v2021.2.1.tar.gz(commitid:96af5d3)
https://github.com/oneapi-src/oneTBB/archive/refs/tags/v2021.2.1.zip(commitid:96af5d3)

before building oneTBB 2021.2.1, replace all strings "2_4" of the source code with "2_5"

for windows, after compilation, replace all strings
INTERFACE_COMPILE_DEFINITIONS "\$<\$<CONFIG:DEBUG>:TBB_USE_DEBUG>" to INTERFACE_COMPILE_DEFINITIONS "\$<\$<CONFIG:DEBUG>:TBB_USE_DEBUG>;__TBB_NO_IMPLICIT_LINKAGE=1"
in cmake file "%cd%\install\lib\cmake\TBB\TBBTargets.cmake"
---
 cmake/dependencies.cmake           | 12 +++---
 src/bindings/python/wheel/setup.py | 63 +++++++++++++++++++++---------
 src/cmake/install_tbb.cmake        | 23 ++++++++---
 3 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index eb6fec4cda5197..f548332ebd8ccf 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -97,10 +97,10 @@ function(ov_download_tbb)
     if(WIN32 AND X86_64)
         # TODO: add target_path to be platform specific as well, to avoid following if
         RESOLVE_DEPENDENCY(TBB
-                ARCHIVE_WIN "tbb2020_617e9a71_win.zip"
+                ARCHIVE_WIN "oneapi-tbb-2021.2.1-win.zip"
                 TARGET_PATH "${TEMP}/tbb"
                 ENVIRONMENT "TBBROOT"
-                SHA256 "01cac3cc48705bd52b83a6e1fa1ed95c708928be76160f5b9c5c37f954d56df4"
+                SHA256 "d81591673bd7d3d9454054642f8ef799e1fdddc7b4cee810a95e6130eb7323d4"
                 USE_NEW_LOCATION TRUE)
     elseif(ANDROID AND X86_64)
         RESOLVE_DEPENDENCY(TBB
@@ -110,10 +110,10 @@ function(ov_download_tbb)
                 SHA256 "f42d084224cc2d643314bd483ad180b081774608844000f132859fca3e9bf0ce")
     elseif(LINUX AND X86_64)
         RESOLVE_DEPENDENCY(TBB
-                ARCHIVE_LIN "tbb2020_617e9a71_lin_strip.tgz"
+                ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin.tgz"
                 TARGET_PATH "${TEMP}/tbb"
                 ENVIRONMENT "TBBROOT"
-                SHA256 "e7a38f68059fb36de8b59d40b283a849f26275e34a58d2acadfdb84d49e31b9b"
+                SHA256 "0a56f73baaa40d72e06949ea6d593ae63a19f7580ce71c08287c1f59d2e5b988"
                 USE_NEW_LOCATION TRUE)
     elseif(YOCTO_AARCH64)
         RESOLVE_DEPENDENCY(TBB
@@ -123,10 +123,10 @@ function(ov_download_tbb)
                 SHA256 "321261ff2eda6d4568a473cb883262bce77a93dac599f7bd65d2918bdee4d75b")
     elseif(APPLE AND X86_64)
         RESOLVE_DEPENDENCY(TBB
-                ARCHIVE_MAC "tbb2020_617e9a71_mac.tgz"
+                ARCHIVE_MAC "oneapi-tbb-2021.2.1-mac.tgz"
                 TARGET_PATH "${TEMP}/tbb"
                 ENVIRONMENT "TBBROOT"
-                SHA256 "67a44b695bef3348416eaf5bf2baca2b1401576c0e09c394304eba1e0eee96cd"
+                SHA256 "c57ce4b97116cd3093c33e6dcc147fb1bbb9678d0ee6c61a506b2bfe773232cb"
                 USE_NEW_LOCATION TRUE)
     else()
         message(WARNING "Prebuilt TBB is not available on current platform")
diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py
index 7af4d4221e7f2d..56e59bf6776cb6 100644
--- a/src/bindings/python/wheel/setup.py
+++ b/src/bindings/python/wheel/setup.py
@@ -294,6 +294,16 @@ def configure(self, install_cfg):
                 ):
                     set_rpath(comp_data["rpath"], os.path.realpath(path))
 
+    def get_reallink(self, link_file):
+        real_name = link_file
+        while True:
+            real_name = os.readlink(real_name)
+            if not os.path.isabs(real_name):
+                real_name = os.path.join(os.path.dirname(link_file), real_name)
+            if not Path(real_name).is_symlink():
+                break
+        return real_name
+
     def generate_package(self, src_dirs):
         """Collect package data files from preinstalled dirs and put all runtime libraries to the subpackage."""
         # additional blacklist filter, just to fix cmake install issues
@@ -302,27 +312,42 @@ def generate_package(self, src_dirs):
 
         for src_dir in src_dirs:
             local_base_dir = Path(src_dir)
-
-            # skip symlinks of higher level like libX.so or libX.dylib
+            # Wheel package content must not contain symlinks
+            # the block handles two kinds of soft links, take the library on linux as an example
+            #   the first case: there are two soft links pointing to the real file,
+            #     input is libX.so->libX.so.Y and libX.so.Y->libX.so.Y.Z (e.g. hwloc library in oneTBB package)
+            #     input is libX.so->libX.so.Y.Z and libX.so.Y->libX.so.Y.Z (e.g. oneTBB library)
+            #   the second case: there is one soft link pointing to the real file
+            # process results of the above two cases: remove soft links(libX.so and libX.so.Y), rename libX.so.Y.Z to libX.so.Y
+            file_dict = {}
+            # step 1:
+            # record real files and its symlinks {real file: soft link}
+            # if there are two soft links pointing to the same file, like libX.so and libX.so.Y(including the above two cases),
+            # only record the libX.so.Y and remove libX.so
             for symlink in local_base_dir.rglob("*"):
                 if symlink.is_symlink():
-                    file_name = os.readlink(symlink)
-                    if not os.path.isabs(file_name):
-                        file_name = os.path.join(os.path.dirname(symlink), file_name)
-                    if Path(file_name).is_symlink():
-                        self.announce(f"Unlink symlink {symlink}, use {file_name} instead", level=3)
-                        os.unlink(symlink)
-
-            # transform libX.so.Y / libX.Y.dylib symlinks to real files
-            for symlink in local_base_dir.rglob("*"):
-                if symlink.is_symlink():
-                    file_name = os.readlink(symlink)
-                    if not os.path.isabs(file_name):
-                        file_name = os.path.join(os.path.dirname(symlink), file_name)
-
-                    os.unlink(symlink)
-                    os.rename(file_name, symlink)
-                    self.announce(f"Resolved symlink {symlink} as {file_name}", level=3)
+                    real_name = self.get_reallink(symlink)
+                    if real_name in file_dict:
+                        link_file_name_old = os.path.basename(file_dict[real_name])
+                        link_file_name_new = os.path.basename(symlink)
+                        if len(link_file_name_new) > len(link_file_name_old):
+                            # replace libX.so/libX.dylib with libX.so.Y/libX.Y.dylib
+                            self.announce(f"Unlink symlink {file_dict[real_name]}, use {symlink} instead", level=3)
+                            os.unlink(file_dict[real_name])
+                            file_dict[real_name] = symlink
+                        else:
+                            self.announce(f"Unlink symlink {symlink}, use {file_dict[real_name]} instead", level=3)
+                            os.unlink(symlink)
+                    else:
+                        file_dict[real_name] = symlink
+
+            # step 2:
+            # according to the corresponding relationship (file_dict),
+            # remove the reserved soft link and rename the real file to the name of its soft link
+            for real_name, symlink in file_dict.items():
+                os.unlink(symlink)
+                os.rename(real_name, symlink)
+                self.announce(f"Resolved symlink {symlink} as {real_name}", level=3)
 
             # copy so / dylib files to WHEEL_LIBS_INSTALL_DIR
             for file_path in local_base_dir.rglob("*"):
diff --git a/src/cmake/install_tbb.cmake b/src/cmake/install_tbb.cmake
index 9e1bea9b426f62..c31c3847094a4a 100644
--- a/src/cmake/install_tbb.cmake
+++ b/src/cmake/install_tbb.cmake
@@ -192,7 +192,8 @@ if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND
         else()
             install(DIRECTORY "${TBBROOT}/lib"
                     DESTINATION "${IE_TBB_DIR_INSTALL}"
-                    COMPONENT tbb)
+                    COMPONENT tbb
+                    PATTERN "cmake" EXCLUDE)
         endif()
 
         install(FILES "${TBBROOT}/LICENSE"
@@ -206,10 +207,19 @@ if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND
                                DEPENDS tbb)
         list(APPEND core_dev_components tbb_dev)
 
-        install(FILES "${TBBROOT}/cmake/TBBConfig.cmake"
-                      "${TBBROOT}/cmake/TBBConfigVersion.cmake"
-                DESTINATION "${IE_TBB_DIR_INSTALL}/cmake"
-                COMPONENT tbb_dev)
+        if(EXISTS "${TBBROOT}/lib/cmake")
+            # oneTBB case
+            install(DIRECTORY "${TBBROOT}/lib/cmake"
+                    DESTINATION "${IE_TBB_DIR_INSTALL}/lib"
+                    COMPONENT tbb_dev)
+        else()
+            # tbb2020 case
+            install(FILES "${TBBROOT}/cmake/TBBConfig.cmake"
+                          "${TBBROOT}/cmake/TBBConfigVersion.cmake"
+                    DESTINATION "${IE_TBB_DIR_INSTALL}/cmake"
+                    COMPONENT tbb_dev)
+        endif()
+
         install(DIRECTORY "${TBBROOT}/include"
                 DESTINATION "${IE_TBB_DIR_INSTALL}"
                 COMPONENT tbb_dev)
@@ -218,7 +228,8 @@ if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND
             # .lib files are needed only for Windows
             install(DIRECTORY "${TBBROOT}/lib"
                     DESTINATION "${IE_TBB_DIR_INSTALL}"
-                    COMPONENT tbb_dev)
+                    COMPONENT tbb_dev
+                    PATTERN "cmake" EXCLUDE)
         endif()
 
         set(pkg_config_tbb_lib_dir "${IE_TBB_DIR_INSTALL}/lib")

From e66b8371047599c6a1e9776fb8d83c5dc6f15989 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Sat, 25 Mar 2023 19:28:05 +0400
Subject: [PATCH 093/296] Properties improvements: part 2 (#16489)

* Properties improvements: part 2

* Accurate configs handling in HETERO / BATCH

* Align plugins in caching properties

* Fixed caching mock tests

* Added new TestNoCachingProperties test

* Fixed test

* Added ov::caching_properties to API 1.0 metrics as well

* Fixes for HETERO plugin

* Fixed tests

* Even more refactoring in HETERO plugin config management
---
 src/inference/src/dev/core_impl.cpp       |  87 +++++-----
 src/plugins/auto_batch/src/auto_batch.cpp |  81 ++++-----
 src/plugins/hetero/executable_network.cpp |  97 +++++++----
 src/plugins/hetero/executable_network.hpp |  24 +--
 src/plugins/hetero/plugin.cpp             | 197 +++++++++++++++-------
 src/plugins/hetero/plugin.hpp             |  31 ++--
 6 files changed, 298 insertions(+), 219 deletions(-)

diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 6af7aec1d6c540..0be7703087ba3a 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -492,10 +492,8 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const {
                 {
                     auto supportedConfigKeys =
                         plugin.get_property(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {}).as<std::vector<std::string>>();
-                    auto config_iter = std::find(supportedConfigKeys.begin(),
-                                                 supportedConfigKeys.end(),
-                                                 CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID));
-                    const bool supportsConfigDeviceID = config_iter != supportedConfigKeys.end();
+                    const bool supportsConfigDeviceID =
+                        ov::util::contains(supportedConfigKeys, CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID));
                     const std::string deviceKey =
                         supportsConfigDeviceID ? CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID) : CONFIG_KEY(DEVICE_ID);
 
@@ -739,6 +737,26 @@ ov::RemoteContext ov::CoreImpl::create_context(const std::string& device_name, c
 
 ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_name,
                                                 const ov::AnyMap& user_properties) const {
+    if (is_virtual_device(full_device_name)) {
+        // Considerations:
+        // 1. in case of virtual devices all the magic will happen on the level when
+        // virtual device calls ICore::get_supported_property for real HW devices
+        // so, for now we can returns user properties almost as is without any
+        // filtering / flattening
+        // 2. The only exception here: while common properties like ov::num::streams or
+        // ov::hint::performance_mode are shared across all the devices, the
+        // ov::device::priority cannot be shared, because it's specific for current virtual
+        // plugin. So, we need to remove ov::device::priorities from the list, because it's
+        // supposed to be set for current virtual plugin and cannot be propogated down
+        ov::AnyMap return_properties = clone_map(user_properties);
+        auto device_priorities_it = return_properties.find(ov::device::priorities.name());
+        if (device_priorities_it != return_properties.end()) {
+            return_properties.erase(device_priorities_it);
+        }
+
+        return return_properties;
+    }
+
     static const std::vector<std::string> core_level_properties = {
         ov::cache_dir.name(),
         ov::force_tbb_terminate.name(),
@@ -750,28 +768,6 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n
     const auto flattened = ov::parseDeviceNameIntoConfig(full_device_name, user_properties);
     const std::string& device_name = flattened._deviceName;
     const auto& flattened_config = flattened._config;
-    ov::AnyMap supported_config, options;
-
-    // fill 'options' to provide more information to ICore::get_property calls
-    {
-        auto priority_prop_name = get_device_priority_property(device_name).prop_name;
-        auto it = flattened_config.find(priority_prop_name);
-        if (it != flattened_config.end())
-            options[it->first] = it->second;
-        else if (device_name == "HETERO") {
-            // TODO: remove together with API 1.0
-            priority_prop_name = "TARGET_FALLBACK";
-            it = flattened_config.find(priority_prop_name);
-            if (it != flattened_config.end())
-                options[it->first] = it->second;
-        } else if (device_name == "BATCH") {
-            // TODO: remove together with API 1.0
-            priority_prop_name = CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG);
-            it = flattened_config.find(priority_prop_name);
-            if (it != flattened_config.end())
-                options[it->first] = it->second;
-        }
-    }
 
     // virtual plugins should bypass core-level properties to HW plugins
     // so, we need to report them as supported
@@ -780,16 +776,16 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n
     // try to search against IE API 1.0' SUPPORTED_CONFIG_KEYS
     try {
         const auto supported_keys =
-            GetMetric(device_name, METRIC_KEY(SUPPORTED_CONFIG_KEYS), options).as<std::vector<std::string>>();
+            GetMetric(device_name, METRIC_KEY(SUPPORTED_CONFIG_KEYS), {}).as<std::vector<std::string>>();
         for (auto&& config_key : supported_keys) {
             supported_config_keys.emplace_back(config_key);
         }
     } catch (ov::Exception&) {
     }
 
-    // try to search against OV API 2.0' supported_properties
+    // try to search against OV API 2.0' mutable supported_properties
     try {
-        for (auto&& property : ICore::get_property(device_name, ov::supported_properties, options)) {
+        for (auto&& property : ICore::get_property(device_name, ov::supported_properties, {})) {
             if (property.is_mutable()) {
                 supported_config_keys.emplace_back(std::move(property));
             }
@@ -797,11 +793,14 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n
     } catch (ov::Exception&) {
     }
 
+    // collect supported properties for HW device
+    AnyMap supported_config;
     for (auto&& kvp : flattened_config) {
         if (util::contains(supported_config_keys, kvp.first)) {
             supported_config[kvp.first] = kvp.second;
         }
     }
+
     return supported_config;
 }
 
@@ -908,10 +907,10 @@ void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& pr
     // unsupport to set ov::device::properties to HW device through this function
     auto devices = get_registered_devices();
     for (auto&& config : properties) {
-        auto is_secondary_config_for_hw_device = config.first.find(ov::device::properties.name()) != std::string::npos;
-        OPENVINO_ASSERT(!is_secondary_config_for_hw_device,
+        const auto is_secondary_property = config.first.find(ov::device::properties.name()) != std::string::npos;
+        OPENVINO_ASSERT(!is_secondary_property,
                         "set_property do not support ov::device::propreties. "
-                        "You can configure the devices through the compile_model()/loadNetwork() API.");
+                        "You can configure the devices through the compile_model()/query_model() API.");
     }
     set_property_for_device(properties, device_name);
 }
@@ -1087,16 +1086,11 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st
             }
             // Add device specific value to support device_name.device_id cases
             {
-                auto supportedConfigKeys =
-                    plugin.second.get_property(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {}).as<std::vector<std::string>>();
-                auto config_iter = std::find(supportedConfigKeys.begin(),
-                                             supportedConfigKeys.end(),
-                                             CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID));
-                const bool supportsConfigDeviceID = config_iter != supportedConfigKeys.end();
-                const std::string deviceKey =
-                    supportsConfigDeviceID ? CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID) : CONFIG_KEY(DEVICE_ID);
-
                 if (!parser.get_device_id().empty()) {
+                    const std::string deviceKey =
+                        device_supports_property(plugin.second, CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID))
+                            ? CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID)
+                            : CONFIG_KEY(DEVICE_ID);
                     configCopy[deviceKey] = parser.get_device_id();
                 }
             }
@@ -1239,17 +1233,16 @@ ov::AnyMap ov::CoreImpl::create_compile_config(const ov::Plugin& plugin, const o
     }
 
     // 1. Move DEVICE_ID key to property_config
+    const bool supports_device_id = device_supports_property(plugin, ov::device::id);
     auto deviceIt = user_config.find(ov::device::id.name());
     if (deviceIt != user_config.end()) {
         property_config[deviceIt->first] = deviceIt->second.as<std::string>();
-    } else {
-        // we likely need to extract default device_id from the plugin,
-        // but we suppose when we call plugin.get_property it will provide the answer
-        // for the default device (e.g. DEVICE_ID = 0 for GPU)
+    } else if (supports_device_id) {
+        property_config[ov::device::id.name()] = plugin.get_property(ov::device::id, {});
     }
 
     // 2. Extract config keys which affect compilation process
-    auto caching_props = plugin.get_property(ov::caching_properties);
+    auto caching_props = plugin.get_property(ov::caching_properties, property_config);
     OPENVINO_ASSERT(!caching_props.empty(), "ov::caching_properties returned by ", plugin.get_name(), " are empty");
 
     ov::AnyMap compile_config;
@@ -1285,7 +1278,7 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) {
         std::lock_guard<std::mutex> lock(_cacheConfigMutex);
         // fill global cache config
         _cacheConfig = CoreConfig::CacheConfig::create(it->second.as<std::string>());
-        // sets cache config per-device if it's set explicitly before
+        // sets cache config per-device if it's not set explicitly before
         for (auto& deviceCfg : _cacheConfigPerDevice) {
             deviceCfg.second = CoreConfig::CacheConfig::create(it->second.as<std::string>());
         }
diff --git a/src/plugins/auto_batch/src/auto_batch.cpp b/src/plugins/auto_batch/src/auto_batch.cpp
index 8dbaea000ecaad..2c896c952f78c4 100644
--- a/src/plugins/auto_batch/src/auto_batch.cpp
+++ b/src/plugins/auto_batch/src/auto_batch.cpp
@@ -21,6 +21,7 @@
 #include "openvino/pass/manager.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
+#include "openvino/util/common_util.hpp"
 #include "transformations/common_optimizations/dimension_tracking.hpp"
 #include "transformations/init_node_info.hpp"
 #include "transformations/utils/utils.hpp"
@@ -600,9 +601,9 @@ std::shared_ptr<ngraph::Function> AutoBatchExecutableNetwork::GetExecGraphInfo()
                                                     : _networkWithoutBatch->GetExecGraphInfo();
 }
 
-void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) {
-    auto timeout = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
-    if (timeout == config.end() || config.size() > 1) {
+void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter>& user_config) {
+    auto timeout = user_config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
+    if (timeout == user_config.end() || user_config.size() > 1) {
         IE_THROW() << "The only config that can be changed on the fly for the AutoBatching the is the "
                    << CONFIG_KEY(AUTO_BATCH_TIMEOUT);
     } else {
@@ -664,8 +665,8 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::stri
 namespace {
 
 std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
-                                                const std::map<std::string, std::string>& local) {
-    for (auto&& kvp : local) {
+                                                const std::map<std::string, std::string>& user_config) {
+    for (auto&& kvp : user_config) {
         config[kvp.first] = kvp.second;
     }
     return config;
@@ -690,41 +691,25 @@ DeviceInformation AutoBatchInferencePlugin::ParseBatchDevice(const std::string&
     return {deviceName, {{}}, batch};
 }
 
-DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg,
-                                                            const std::map<std::string, std::string>& config) const {
-    auto getDeviceConfig = [&](const DeviceName& deviceWithID) {
-        ov::DeviceIDParser deviceParser(deviceWithID);
-        std::string deviceName = deviceParser.get_device_name();
-        std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
-        // passthrough the cache dir to core->loadnetwork when underlying device does not support cache dir
-        auto deviceConfig = GetCore()->GetSupportedConfig(deviceWithID, tconfig);
-        if (tconfig.find(CONFIG_KEY(CACHE_DIR)) != tconfig.end() &&
-            deviceConfig.find(CONFIG_KEY(CACHE_DIR)) == deviceConfig.end()) {
-            auto tmpiter = tconfig.find(CONFIG_KEY(CACHE_DIR));
-            if (tmpiter != tconfig.end())
-                deviceConfig.insert({tmpiter->first, tmpiter->second});
-        }
-        return deviceConfig;
-    };
-
+DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(
+    const std::string& devicesBatchCfg,
+    const std::map<std::string, std::string>& user_config) const {
     auto metaDevice = ParseBatchDevice(devicesBatchCfg);
-    metaDevice.config = getDeviceConfig(metaDevice.deviceName);
+    metaDevice.config = GetCore()->GetSupportedConfig(metaDevice.deviceName, user_config);
 
-    auto cfg = config;
     // check that no irrelevant config-keys left
-    for (auto k : config) {
+    for (auto k : user_config) {
         const auto& name = k.first;
-        auto found_in_supported_cfg = std::find(supported_configKeys.begin(), supported_configKeys.end(), k.first);
-        auto found_in_device_cfg = metaDevice.config.find(k.first);
-        if (found_in_device_cfg == metaDevice.config.end() && found_in_supported_cfg == supported_configKeys.end()) {
+        if (metaDevice.config.find(name) == metaDevice.config.end() &&
+            !ov::util::contains(supported_configKeys, name)) {
             IE_THROW() << "Unsupported config key: " << name;
         }
     }
     return metaDevice;
 }
 
-RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine::ParamMap& config) {
-    auto cfg = config;
+RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine::ParamMap& remote_properties) {
+    auto cfg = remote_properties;
     auto it = cfg.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
     if (it == cfg.end())
         it = cfg.find(ov::device::priorities.name());
@@ -741,7 +726,7 @@ RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine
 }
 
 Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
-                                              const std::map<std::string, Parameter>& options) const {
+                                              const std::map<std::string, Parameter>& user_options) const {
     if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) {
         auto it = _config.find(name);
         if (it == _config.end()) {
@@ -754,8 +739,8 @@ Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
     }
 }
 
-void AutoBatchInferencePlugin::CheckConfig(const std::map<std::string, std::string>& config) {
-    for (auto&& kvp : config) {
+void AutoBatchInferencePlugin::CheckConfig(const std::map<std::string, std::string>& user_config) {
+    for (auto&& kvp : user_config) {
         const auto name = kvp.first;
         const auto val = kvp.second;
         if (supported_configKeys.end() == std::find(supported_configKeys.begin(), supported_configKeys.end(), name))
@@ -775,9 +760,9 @@ void AutoBatchInferencePlugin::CheckConfig(const std::map<std::string, std::stri
     }
 }
 
-void AutoBatchInferencePlugin::SetConfig(const std::map<std::string, std::string>& config) {
-    CheckConfig(config);
-    for (auto&& kvp : config) {
+void AutoBatchInferencePlugin::SetConfig(const std::map<std::string, std::string>& user_config) {
+    CheckConfig(user_config);
+    for (auto&& kvp : user_config) {
         _config[kvp.first] = kvp.second;
     }
 }
@@ -792,7 +777,7 @@ AutoBatchInferencePlugin::AutoBatchInferencePlugin() {
 
 InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric(
     const std::string& name,
-    const std::map<std::string, InferenceEngine::Parameter>& options) const {
+    const std::map<std::string, InferenceEngine::Parameter>& user_options) const {
     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
         std::vector<std::string> metrics;
         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
@@ -810,26 +795,26 @@ InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric(
 
 IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
     const InferenceEngine::CNNNetwork& network,
-    const std::map<std::string, std::string>& config) {
-    return LoadNetworkImpl(network, nullptr, config);
+    const std::map<std::string, std::string>& user_config) {
+    return LoadNetworkImpl(network, nullptr, user_config);
 }
 
 InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadNetworkImpl(
     const InferenceEngine::CNNNetwork& network,
     const std::shared_ptr<InferenceEngine::RemoteContext> ctx,
-    const std::map<std::string, std::string>& config) {
+    const std::map<std::string, std::string>& user_config) {
     auto core = GetCore();
     if (core == nullptr) {
         IE_THROW() << "Please, work with Auto-Batching device via InferencEngine::Core object";
     }
-    auto fullConfig = mergeConfigs(_config, config);
+    auto fullConfig = mergeConfigs(_config, user_config);
     auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
     if (device_batch == fullConfig.end())
         device_batch = fullConfig.find(ov::device::priorities.name());
     if (device_batch == fullConfig.end()) {
         IE_THROW() << "KEY_AUTO_BATCH key is not set for BATCH device";
     }
-    auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig);
+    auto metaDevice = ParseMetaDevice(device_batch->second, user_config);
     const auto& deviceName = metaDevice.deviceName;
     const auto& deviceConfig = metaDevice.config;
     auto deviceConfigNoAutoBatch = deviceConfig;
@@ -915,8 +900,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
         auto optBatchSize = core->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
         auto res = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
         requests = PerfHintsConfig::CheckPerformanceHintRequestValue(res);
-        const auto& reqs = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
-        if (reqs != config.end())
+        const auto& reqs = user_config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
+        if (reqs != user_config.end())
             requests = static_cast<unsigned int>(PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second));
         if (requests)
             optBatchSize = std::max(1u, std::min(requests, optBatchSize));
@@ -985,17 +970,17 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
 InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
     const InferenceEngine::CNNNetwork& network,
     const std::shared_ptr<InferenceEngine::RemoteContext>& context,
-    const std::map<std::string, std::string>& config) {
-    return LoadNetworkImpl(network, context, config);
+    const std::map<std::string, std::string>& user_config) {
+    return LoadNetworkImpl(network, context, user_config);
 }
 
 InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(
     const InferenceEngine::CNNNetwork& network,
-    const std::map<std::string, std::string>& config) const {
+    const std::map<std::string, std::string>& user_config) const {
     auto core = GetCore();
     if (!core)
         return InferenceEngine::QueryNetworkResult();
-    auto cfg = config;
+    auto cfg = user_config;
     for (auto c : cfg) {
         if (c.first == CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) || c.first == ov::device::priorities.name()) {
             auto val = c.second;
diff --git a/src/plugins/hetero/executable_network.cpp b/src/plugins/hetero/executable_network.cpp
index 7e16133890bd16..a2354adfb2df7e 100644
--- a/src/plugins/hetero/executable_network.cpp
+++ b/src/plugins/hetero/executable_network.cpp
@@ -59,22 +59,30 @@ template <typename T>
 using NodeMap = std::unordered_map<ngraph::Node*, T>;
 
 HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwork& network,
-                                                 const Engine::Configs& config,
+                                                 const Configs& user_config,
                                                  Engine* plugin)
     : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
                                                           std::make_shared<InferenceEngine::ImmediateExecutor>()),
       _heteroPlugin{plugin},
       _name{network.getName()},
-      _config{config} {
+      _hetero_config{},
+      _device_config{} {
     auto function = network.getFunction();
-    IE_ASSERT(function != nullptr);
     auto clonedFunction = ngraph::clone_function(*function);
+
+    // hetero_config, device_config and user_config are unchanged global and local configs set by user
+    // we need to create _hetero_config and _device_config based on them, which will
+    // contain only hetero (_hetero_config) and only device (_device_config) properties
+    auto parsed_config = _heteroPlugin->MergeConfigs(user_config);
+    _hetero_config = parsed_config.hetero_config;
+    _device_config = parsed_config.device_config;
+
     bool dumpDotFile = false;
     if (std::getenv("OPENVINO_HETERO_VISUALIZE")) {
         dumpDotFile = true;
     } else {
-        auto itDumpDotFile = _config.find(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
-        dumpDotFile = itDumpDotFile != _config.end() ? (itDumpDotFile->second == YES) : false;
+        auto itDumpDotFile = _hetero_config.find(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
+        dumpDotFile = itDumpDotFile != _hetero_config.end() ? (itDumpDotFile->second == YES) : false;
     }
 
     QueryNetworkResult queryNetworkResult;
@@ -92,7 +100,9 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
     }
 
     if (queryNetworkResult.supportedLayersMap.empty()) {
-        queryNetworkResult = _heteroPlugin->QueryNetwork(network, _config);
+        // here we need to bypass unchanged / unparsed user-set configuration
+        // because it can contain TARGET_FALLBACK / ov::device::priorities
+        queryNetworkResult = _heteroPlugin->QueryNetwork(network, user_config);
     }
 
     using Input = ngraph::Input<ngraph::Node>;
@@ -434,23 +444,30 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
         ++id;
     }
     for (auto&& network : _networks) {
-        auto metaDevices = _heteroPlugin->GetDevicePlugins(network._device, _config);
+        auto metaDevices = _heteroPlugin->GetDevicePlugins(network._device, _device_config);
 
-        auto config = metaDevices[network._device];
-        // disable caching for subgraphs, because the whole HERERO model is cached
-        config[ov::cache_dir.name()] = "";
+        // disable caching for subgraphs, because the whole HETERO model is cached
+        auto device_config = metaDevices[network._device];
+        device_config[ov::cache_dir.name()] = "";
 
-        network._network = _heteroPlugin->GetCore()->LoadNetwork(network._clonedNetwork, network._device, config);
+        network._network =
+            _heteroPlugin->GetCore()->LoadNetwork(network._clonedNetwork, network._device, device_config);
     }
 }
 
 HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
-                                                 const std::map<std::string, std::string>& configs,
+                                                 const Configs& user_config,
                                                  Engine* heteroPlugin)
-    : _heteroPlugin(heteroPlugin) {
+    : _heteroPlugin(heteroPlugin),
+      _hetero_config{},
+      _device_config{} {
     std::string heteroXmlStr;
     std::getline(heteroModel, heteroXmlStr);
 
+    auto parsed_config = _heteroPlugin->MergeConfigs(user_config);
+    _hetero_config = parsed_config.hetero_config;
+    _device_config = parsed_config.device_config;
+
     pugi::xml_document heteroXmlDoc;
     pugi::xml_parse_result res = heteroXmlDoc.load_string(heteroXmlStr.c_str());
 
@@ -471,10 +488,14 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
     pugi::xml_node outputsNode = heteroNode.child("outputs");
     FOREACH_CHILD (outputNode, outputsNode, "output") { networkOutputs.insert(GetStrAttr(outputNode, "name")); }
 
-    Engine::Configs importedConfigs;
-    auto configsNode = heteroNode.child("configs");
-    FOREACH_CHILD (configNode, configsNode, "config") {
-        importedConfigs.emplace(GetStrAttr(configNode, "key"), GetStrAttr(configNode, "value"));
+    auto heteroConfigsNode = heteroNode.child("hetero_config");
+    FOREACH_CHILD (heteroConfigNode, heteroConfigsNode, "config") {
+        _hetero_config.emplace(GetStrAttr(heteroConfigNode, "key"), GetStrAttr(heteroConfigNode, "value"));
+    }
+
+    auto deviceConfigsNode = heteroNode.child("device_config");
+    FOREACH_CHILD (deviceConfigNode, deviceConfigsNode, "config") {
+        _device_config.emplace(GetStrAttr(deviceConfigNode, "key"), GetStrAttr(deviceConfigNode, "value"));
     }
 
     auto blobNamesNode = heteroNode.child("blob_names_map");
@@ -482,16 +503,12 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
         _blobNameMap.emplace(GetStrAttr(blobNameNode, "key"), GetStrAttr(blobNameNode, "value"));
     }
 
-    for (auto&& config : configs) {
-        importedConfigs[config.first] = config.second;
-    }
-
     std::vector<NetworkDesc> descs;
     pugi::xml_node subnetworksNode = heteroNode.child("subnetworks");
     FOREACH_CHILD (subnetworkNode, subnetworksNode, "subnetwork") {
         auto deviceName = GetStrAttr(subnetworkNode, "device");
 
-        auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, importedConfigs);
+        auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, _device_config);
         assert(metaDevices.size() == 1);
         auto& loadConfig = metaDevices[deviceName];
 
@@ -593,7 +610,6 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
     FOREACH_CHILD (resultNode, resultsNode, "result") { _results.emplace_back(parseNode(resultNode, false)); }
 
     // save state
-    this->_config = importedConfigs;
     this->_networks = std::move(descs);
     this->SetPointerToPlugin(_heteroPlugin->shared_from_this());
 }
@@ -680,11 +696,18 @@ void HeteroExecutableNetwork::Export(std::ostream& heteroModel) {
         }
     }
 
-    auto configsNode = heteroNode.append_child("configs");
-    for (auto&& config : _config) {
-        auto configNode = configsNode.append_child("config");
-        configNode.append_attribute("key").set_value(config.first.c_str());
-        configNode.append_attribute("value").set_value(config.second.c_str());
+    auto heteroConfigsNode = heteroNode.append_child("hetero_config");
+    for (auto&& config : _hetero_config) {
+        auto heteroConfigNode = heteroConfigsNode.append_child("config");
+        heteroConfigNode.append_attribute("key").set_value(config.first.c_str());
+        heteroConfigNode.append_attribute("value").set_value(config.second.c_str());
+    }
+
+    auto deviceConfigsNode = heteroNode.append_child("device_config");
+    for (auto&& config : _device_config) {
+        auto deviceConfigNode = deviceConfigsNode.append_child("config");
+        deviceConfigNode.append_attribute("key").set_value(config.first.c_str());
+        deviceConfigNode.append_attribute("value").set_value(config.second.c_str());
     }
 
     auto blobNamesNode = heteroNode.append_child("blob_names_map");
@@ -762,13 +785,17 @@ IInferRequestInternal::Ptr HeteroExecutableNetwork::CreateInferRequest() {
 InferenceEngine::Parameter HeteroExecutableNetwork::GetConfig(const std::string& name) const {
     InferenceEngine::Parameter result;
     if (name == "TARGET_FALLBACK" || name == ov::device::priorities.name()) {
-        result = _heteroPlugin->GetTargetFallback(_config, false);
-    } else if (name == HETERO_CONFIG_KEY(DUMP_GRAPH_DOT) || name == CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)) {
-        auto it = _config.find(name);
-        IE_ASSERT(it != _config.end());
-        result = it->second == YES ? true : false;
+        result = _heteroPlugin->GetTargetFallback(_hetero_config, false);
+    } else if (name == HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)) {
+        auto it = _hetero_config.find(name);
+        IE_ASSERT(it != _hetero_config.end());
+        result = it->second == YES;
+    } else if (name == CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)) {
+        auto it = _device_config.find(name);
+        IE_ASSERT(it != _device_config.end());
+        result = it->second == YES;
     } else {
-        IE_THROW() << "Unsupported ExecutableNetwork config key: " << name;
+        IE_THROW() << "Unsupported Hetero ExecutableNetwork config key: " << name;
     }
 
     return result;
@@ -833,6 +860,6 @@ InferenceEngine::Parameter HeteroExecutableNetwork::GetMetric(const std::string&
         }
         return decltype(ov::execution_devices)::value_type{exeDevices};
     } else {
-        IE_THROW() << "Unsupported ExecutableNetwork metric key: " << name;
+        IE_THROW() << "Unsupported Hetero ExecutableNetwork metric key: " << name;
     }
 }
diff --git a/src/plugins/hetero/executable_network.hpp b/src/plugins/hetero/executable_network.hpp
index e03905070699ce..345409c212b5e7 100644
--- a/src/plugins/hetero/executable_network.hpp
+++ b/src/plugins/hetero/executable_network.hpp
@@ -21,11 +21,10 @@
 #include "async_infer_request.hpp"
 #include "ie_icore.hpp"
 #include "infer_request.hpp"
+#include "plugin.hpp"
 
 namespace HeteroPlugin {
 
-class Engine;
-
 /**
  * @class ExecutableNetwork
  * @brief Interface of executable network
@@ -34,22 +33,13 @@ class HeteroExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadS
 public:
     typedef std::shared_ptr<HeteroExecutableNetwork> Ptr;
 
-    /**
-     * @brief constructor
-     */
-    HeteroExecutableNetwork(const InferenceEngine::CNNNetwork& network,
-                            const std::map<std::string, std::string>& config,
-                            Engine* plugin);
-    /**
-     * @brief Import from opened file constructor
-     */
-    HeteroExecutableNetwork(std::istream& heteroModel,
-                            const std::map<std::string, std::string>& config,
-                            Engine* plugin);
+    HeteroExecutableNetwork(const InferenceEngine::CNNNetwork& network, const Configs& user_config, Engine* plugin);
+    HeteroExecutableNetwork(std::istream& heteroModel, const Configs& user_config, Engine* plugin);
 
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
         InferenceEngine::InputsDataMap networkInputs,
         InferenceEngine::OutputsDataMap networkOutputs) override;
+
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
         const std::vector<std::shared_ptr<const ov::Node>>& inputs,
         const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
@@ -63,9 +53,6 @@ class HeteroExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadS
     void Export(std::ostream& modelFile) override;
 
 private:
-    void InitCNNImpl(const InferenceEngine::CNNNetwork& network);
-    void InitNgraph(const InferenceEngine::CNNNetwork& network);
-
     struct NetworkDesc {
         std::string _device;
         InferenceEngine::CNNNetwork _clonedNetwork;
@@ -75,7 +62,8 @@ class HeteroExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadS
     std::vector<NetworkDesc> _networks;
     Engine* _heteroPlugin;
     std::string _name;
-    std::map<std::string, std::string> _config;
+    Configs _hetero_config;
+    Configs _device_config;
     std::unordered_map<std::string, std::string> _blobNameMap;
 };
 
diff --git a/src/plugins/hetero/plugin.cpp b/src/plugins/hetero/plugin.cpp
index 9ac6619b00f499..c32591a25ea5fc 100644
--- a/src/plugins/hetero/plugin.cpp
+++ b/src/plugins/hetero/plugin.cpp
@@ -16,6 +16,7 @@
 #include "ie_plugin_config.hpp"
 #include "executable_network.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "openvino/util/common_util.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "internal_properties.hpp"
 #include "openvino/util/common_util.hpp"
@@ -26,45 +27,120 @@ using namespace InferenceEngine::PluginConfigParams;
 using namespace InferenceEngine::HeteroConfigParams;
 using namespace HeteroPlugin;
 
-Engine::Engine() {
-    _pluginName = "HETERO";
-    _config[KEY_EXCLUSIVE_ASYNC_REQUESTS] = YES;
-    _config[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)] = NO;
+namespace {
+
+const std::vector<std::string>& getHeteroSupportedConfigKeys() {
+    static const std::vector<std::string> supported_configKeys = {HETERO_CONFIG_KEY(DUMP_GRAPH_DOT),
+                                                                  "TARGET_FALLBACK",
+                                                                  ov::device::priorities.name()};
+
+    return supported_configKeys;
 }
 
-namespace {
+const std::vector<std::string>& getHeteroDeviceSupportedConfigKeys() {
+    static const std::vector<std::string> supported_configKeys = {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)};
+    return supported_configKeys;
+}
+
+std::vector<std::string> getSupportedConfigKeys() {
+    std::vector<std::string> supported_configKeys = getHeteroSupportedConfigKeys();
+    for (auto&& key : getHeteroDeviceSupportedConfigKeys())
+        supported_configKeys.emplace_back(key);
+    return supported_configKeys;
+}
 
-Engine::Configs mergeConfigs(Engine::Configs config, const Engine::Configs& local) {
-    for (auto&& kvp : local) {
-        config[kvp.first] = kvp.second;
+ov::AnyMap any_copy(const Configs& params) {
+    ov::AnyMap result;
+    for (auto&& value : params) {
+        result.emplace(value.first, value.second);
     }
-    return config;
+    return result;
 }
 
-Engine::Configs mergeConfigs(Engine::Configs config, const ov::AnyMap& local) {
-    for (auto&& kvp : local) {
-        config[kvp.first] = kvp.second.as<std::string>();
+Configs any_copy(const ov::AnyMap& params) {
+    Configs result;
+    for (auto&& value : params) {
+        result.emplace(value.first, value.second.as<std::string>());
     }
-    return config;
+    return result;
 }
 
-const std::vector<std::string>& getSupportedConfigKeys() {
-    static const std::vector<std::string> supported_configKeys = {HETERO_CONFIG_KEY(DUMP_GRAPH_DOT),
-                                                                  "TARGET_FALLBACK",
-                                                                  ov::device::priorities.name(),
-                                                                  CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)};
+ov::AnyMap clone_map(const ov::AnyMap& m) {
+    ov::AnyMap rm;
+    for (auto&& kvp : m) {
+        rm[kvp.first] = kvp.second.is<ov::AnyMap>() ? ov::Any(clone_map(kvp.second.as<ov::AnyMap>())) : kvp.second;
+    }
 
-    return supported_configKeys;
+    return rm;
 }
 
 }  // namespace
 
-std::string Engine::GetTargetFallback(const Engine::Configs& config, bool raise_exception) const {
-    auto it = config.find("TARGET_FALLBACK");
-    if (it == config.end()) {
-        it = config.find(ov::device::priorities.name());
+Engine::Engine() {
+    _pluginName = "HETERO";
+    _config[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)] = NO;
+    _device_config[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] = YES;
+}
+
+ParsedConfig<ov::AnyMap> Engine::MergeConfigs(const ov::AnyMap& user_config) const {
+    auto device_config = clone_map(user_config);
+    auto hetero_config = _config;
+
+    // after API 1.0 removal, replace with the loop over getHeteroSupportedConfigKeys()
+    {
+        auto try_merge_property = [&](const std::string& property_name) -> bool {
+            auto property_it = device_config.find(property_name);
+            if (property_it != device_config.end()) {
+                // migrate HETERO property to hetero_config
+                hetero_config[property_it->first] = property_it->second.as<std::string>();
+                // and erase it from device_config
+                device_config.erase(property_it->first);
+                return true;
+            }
+
+            return false;
+        };
+
+        try_merge_property(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
+
+        // if we have not found TARGET_FALLBACK in user_config, let's try to find device::priorities
+        // Note: we can have conflicts here like
+        //   core.set_property(HETERO, TARGET_FALLBACK=MULTI,CPU)
+        //   core.compile_model(HETERO, DEVICE_PRIORITIES=GPU.0,GPU.1)
+        // so, we need to check whether TARGET_FALLBACK was set before in set_property
+        // This check can be removed after API 1.0 is removed
+        if (!try_merge_property("TARGET_FALLBACK") && hetero_config.find("TARGET_FALLBACK") == hetero_config.end()) {
+            try_merge_property(ov::device::priorities.name());
+        }
+    }
+
+    // merge device_config settings
+    for (auto&& key : getHeteroDeviceSupportedConfigKeys()) {
+        auto user_config_it = user_config.find(key);
+        if (user_config_it != user_config.end()) {
+            device_config[user_config_it->first] = user_config_it->second;
+        }
+    }
+
+    return {hetero_config, device_config};
+}
+
+ParsedConfig<Configs> Engine::MergeConfigs(const Configs& user_config) const {
+    auto parsed_config = MergeConfigs(any_copy(user_config));
+    return {parsed_config.hetero_config, any_copy(parsed_config.device_config)};
+}
+
+std::string Engine::GetTargetFallback(const Configs& user_config, bool raise_exception) const {
+    return GetTargetFallback(any_copy(user_config), raise_exception);
+}
+
+std::string Engine::GetTargetFallback(const ov::AnyMap& user_config, bool raise_exception) const {
+    auto hetero_config = MergeConfigs(user_config).hetero_config;
+    auto it = hetero_config.find("TARGET_FALLBACK");
+    if (it == hetero_config.end()) {
+        it = hetero_config.find(ov::device::priorities.name());
     }
-    if (it == config.end()) {
+    if (it == hetero_config.end()) {
         if (raise_exception)
             IE_THROW() << "The '" << ov::device::priorities.name()
                        << "' option was not defined for heterogeneous plugin";
@@ -74,77 +150,74 @@ std::string Engine::GetTargetFallback(const Engine::Configs& config, bool raise_
 }
 
 InferenceEngine::IExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
-                                                                            const Configs& config) {
+                                                                            const Configs& user_config) {
     if (GetCore() == nullptr) {
         IE_THROW() << "Please, work with HETERO device via InferencEngine::Core object";
     }
-    auto tconfig = mergeConfigs(_config, config);
-    std::string fallbackDevicesStr = GetTargetFallback(tconfig);
-    DeviceMetaInformationMap metaDevices = GetDevicePlugins(fallbackDevicesStr, tconfig);
 
-    auto function = network.getFunction();
-    if (function == nullptr) {
-        IE_THROW() << "HETERO device supports just ngraph network representation";
+    if (network.getFunction() == nullptr) {
+        IE_THROW() << "HETERO device supports only nGraph model representation";
     }
 
-    return std::make_shared<HeteroExecutableNetwork>(network, mergeConfigs(_config, config), this);
+    return std::make_shared<HeteroExecutableNetwork>(network, user_config, this);
 }
 
 InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
     std::istream& heteroModel,
-    const std::map<std::string, std::string>& config) {
-    return std::make_shared<HeteroExecutableNetwork>(heteroModel, mergeConfigs(_config, config), this);
+    const std::map<std::string, std::string>& user_config) {
+    return std::make_shared<HeteroExecutableNetwork>(heteroModel, user_config, this);
 }
 
 Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback,
-                                                          const Configs& localConfig) const {
+                                                          const Configs& device_config) const {
     auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(targetFallback);
     Engine::DeviceMetaInformationMap metaDevices;
     for (auto&& deviceName : fallbackDevices) {
         auto itPlugin = metaDevices.find(deviceName);
         if (metaDevices.end() == itPlugin) {
-            metaDevices[deviceName] = GetCore()->GetSupportedConfig(deviceName, mergeConfigs(_config, localConfig));
+            metaDevices[deviceName] = GetCore()->GetSupportedConfig(deviceName, device_config);
         }
     }
     return metaDevices;
 }
 
-void Engine::SetConfig(const Configs& configs) {
-    for (auto&& kvp : configs) {
+void Engine::SetConfig(const Configs& user_config) {
+    for (auto&& kvp : user_config) {
         const auto& name = kvp.first;
-        const auto& supported_configKeys = getSupportedConfigKeys();
-        if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name))
+        if (ov::util::contains(getHeteroSupportedConfigKeys(), name))
             _config[name] = kvp.second;
+        else if (ov::util::contains(getHeteroDeviceSupportedConfigKeys(), name))
+            _device_config[name] = kvp.second;
         else
-            IE_THROW() << "Unsupported config key: " << name;
+            IE_THROW() << "Unsupported HETERO config key: " << name;
     }
 }
 
-QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const Configs& config) const {
-    QueryNetworkResult qr;
-
+QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const Configs& user_config) const {
     if (GetCore() == nullptr) {
-        IE_THROW() << "Please, work with HETERO device via InferencEngine::Core object";
+        IE_THROW() << "Please, work with HETERO device via ov::Core object";
     }
 
-    auto tconfig = mergeConfigs(_config, config);
-    std::string fallbackDevicesStr = GetTargetFallback(tconfig);
-    DeviceMetaInformationMap metaDevices = GetDevicePlugins(fallbackDevicesStr, tconfig);
+    auto parsed_config = MergeConfigs(user_config);
+    std::string fallbackDevicesStr = GetTargetFallback(parsed_config.hetero_config);
+    DeviceMetaInformationMap metaDevices = GetDevicePlugins(fallbackDevicesStr, parsed_config.device_config);
 
     auto function = network.getFunction();
     if (function == nullptr) {
-        IE_THROW() << "HETERO device supports just ngraph network representation";
+        IE_THROW() << "HETERO device supports just nGraph model representation";
     }
 
     std::map<std::string, QueryNetworkResult> queryResults;
     for (auto&& metaDevice : metaDevices) {
-        auto& deviceName = metaDevice.first;
-        queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, metaDevice.second);
+        const auto& deviceName = metaDevice.first;
+        const auto& device_config = metaDevice.second;
+        queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, device_config);
     }
 
     //  WARNING: Here is devices with user set priority
     auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(fallbackDevicesStr);
 
+    QueryNetworkResult qr;
     for (auto&& deviceName : fallbackDevices) {
         for (auto&& layerQueryResult : queryResults[deviceName].supportedLayersMap) {
             qr.supportedLayersMap.emplace(layerQueryResult);
@@ -157,7 +230,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const Configs
     return qr;
 }
 
-Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
+Parameter Engine::GetMetric(const std::string& name, const ov::AnyMap& user_options) const {
     if (ov::supported_properties == name) {
         return decltype(ov::supported_properties)::value_type{
             ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO},
@@ -168,8 +241,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
     } else if (ov::caching_properties == name) {
         return decltype(ov::caching_properties)::value_type{ov::hetero::caching_device_properties.name()};
     } else if (ov::hetero::caching_device_properties == name) {
-        auto tconfig = mergeConfigs(_config, options);
-        std::string targetFallback = GetTargetFallback(tconfig);
+        std::string targetFallback = GetTargetFallback(user_options);
         return decltype(ov::hetero::caching_device_properties)::value_type{DeviceCachingProperties(targetFallback)};
     } else if (METRIC_KEY(SUPPORTED_METRICS) == name) {
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
@@ -188,7 +260,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
     } else if (ov::device::capabilities == name) {
         return decltype(ov::device::capabilities)::value_type{{ov::device::capability::EXPORT_IMPORT}};
     } else {
-        IE_THROW() << "Unsupported metric key: " << name;
+        IE_THROW() << "Unsupported HETERO metric key: " << name;
     }
 }
 
@@ -222,20 +294,25 @@ std::string Engine::DeviceCachingProperties(const std::string& targetFallback) c
     return result.empty() ? "" : ov::Any(result).as<std::string>();
 }
 
-Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
+Parameter Engine::GetConfig(const std::string& name, const ov::AnyMap& options) const {
     if (name == HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)) {
-        auto it = _config.find(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
+        auto it = _config.find(name);
         IE_ASSERT(it != _config.end());
         bool dump = it->second == YES;
         return {dump};
     } else if (name == ov::device::priorities) {
-        std::string targetFallback = GetTargetFallback(_config);
+        std::string targetFallback = GetTargetFallback(options);
         auto priorities = ov::util::from_string(targetFallback, ov::device::priorities);
         return decltype(ov::device::priorities)::value_type{priorities};
     } else if (name == "TARGET_FALLBACK") {
-        return GetTargetFallback(_config);
+        return GetTargetFallback(options);
+    } else if (name == CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)) {
+        auto it = _device_config.find(name);
+        IE_ASSERT(it != _device_config.end());
+        bool exclusive_async = it->second == YES;
+        return {exclusive_async};
     } else {
-        IE_THROW() << "Unsupported config key: " << name;
+        IE_THROW() << "Unsupported HETERO config key: " << name;
     }
 }
 
diff --git a/src/plugins/hetero/plugin.hpp b/src/plugins/hetero/plugin.hpp
index 23f9f299f94174..9a0b0d12fac7be 100644
--- a/src/plugins/hetero/plugin.hpp
+++ b/src/plugins/hetero/plugin.hpp
@@ -17,9 +17,16 @@
 
 namespace HeteroPlugin {
 
+using Configs = std::map<std::string, std::string>;
+
+template <typename T>
+struct ParsedConfig {
+    Configs hetero_config;
+    T device_config;
+};
+
 class Engine : public InferenceEngine::IInferencePlugin {
 public:
-    using Configs = std::map<std::string, std::string>;
     using DeviceMetaInformationMap = std::unordered_map<std::string, Configs>;
 
     Engine();
@@ -32,23 +39,25 @@ class Engine : public InferenceEngine::IInferencePlugin {
     InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
                                                      const Configs& config) const override;
 
-    InferenceEngine::Parameter GetMetric(
-        const std::string& name,
-        const std::map<std::string, InferenceEngine::Parameter>& options) const override;
+    InferenceEngine::Parameter GetMetric(const std::string& name, const ov::AnyMap& options) const override;
 
-    InferenceEngine::Parameter GetConfig(
-        const std::string& name,
-        const std::map<std::string, InferenceEngine::Parameter>& options) const override;
+    InferenceEngine::Parameter GetConfig(const std::string& name, const ov::AnyMap& options) const override;
 
-    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(
-        std::istream& heteroModel,
-        const std::map<std::string, std::string>& config) override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& heteroModel,
+                                                                   const Configs& config) override;
 
     DeviceMetaInformationMap GetDevicePlugins(const std::string& targetFallback, const Configs& localConfig) const;
 
-    std::string GetTargetFallback(const Engine::Configs& config, bool raise_exception = true) const;
+    std::string GetTargetFallback(const Configs& config, bool raise_exception = true) const;
+    std::string GetTargetFallback(const ov::AnyMap& config, bool raise_exception = true) const;
+
+    ParsedConfig<Configs> MergeConfigs(const Configs& user_config) const;
+    ParsedConfig<ov::AnyMap> MergeConfigs(const ov::AnyMap& user_config) const;
 
 private:
     std::string DeviceCachingProperties(const std::string& targetFallback) const;
+
+    Configs _device_config;
 };
+
 }  // namespace HeteroPlugin

From 60ab7490bfda7c7f42639f294e212c9c9ec93a15 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Sun, 26 Mar 2023 12:35:26 +0800
Subject: [PATCH 094/296] Implement CTPUT in AUTO code logic (#16220)

* Implement CTPUT in AUTO code logic

* Add logic to handle device loading failure

* add some code comments

* fix warnning conversion from size_t to int

* Updated code according to comments of bell and wanglei

* the preferred device code path need to be updated with ctput also

* add fallback logic for CTPUT

* Modify the code logic according to bell suggestion

* Add prints for debugging bug

* throw exception when no device to run pipline task

* initialize idleWorkerRequest for CTPUT

* fix getting properties

Signed-off-by: fishbell <bell.song@intel.com>

refine

Signed-off-by: fishbell <bell.song@intel.com>

* fix warning

Signed-off-by: fishbell <bell.song@intel.com>

* fix illegal character on windows

Signed-off-by: fishbell <bell.song@intel.com>

* fix illegal character

Signed-off-by: fishbell <bell.song@intel.com>

add missing include

Signed-off-by: fishbell <bell.song@intel.com>

* more code refine

Signed-off-by: fishbell <bell.song@intel.com>

---------

Signed-off-by: fishbell <bell.song@intel.com>
Co-authored-by: fishbell <bell.song@intel.com>
---
 src/plugins/auto/auto_executable_network.cpp  |  64 ++-
 src/plugins/auto/auto_schedule.cpp            | 386 ++++++++++++------
 src/plugins/auto/auto_schedule.hpp            |   3 +-
 src/plugins/auto/multi_schedule.cpp           |  23 +-
 .../include/behavior/plugin/caching_tests.hpp |  10 +-
 .../src/behavior/ov_plugin/caching_tests.cpp  |  18 +-
 src/tests/unit/auto/auto_ctput_test.cpp       |  18 +-
 .../unit/auto/auto_runtime_fallback_test.cpp  |  92 +++++
 8 files changed, 427 insertions(+), 187 deletions(-)

diff --git a/src/plugins/auto/auto_executable_network.cpp b/src/plugins/auto/auto_executable_network.cpp
index 48de040c3985c7..cd83a1beca9092 100644
--- a/src/plugins/auto/auto_executable_network.cpp
+++ b/src/plugins/auto/auto_executable_network.cpp
@@ -64,9 +64,8 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
         return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
     } else if (name == ov::device::properties) {
         ov::AnyMap all_devices = {};
-        if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
-            ov::AnyMap device_properties = {};
-            auto& context = _autoSchedule->_loadContext[ACTUALDEVICE];
+        auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) {
+             ov::AnyMap device_properties = {};
             auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS));
             for (auto&& property_name : device_supported_metrics.as<std::vector<std::string>>()) {
                 device_properties[property_name] = context.executableNetwork->GetMetric(property_name);
@@ -76,6 +75,26 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
                 device_properties[property_name] = context.executableNetwork->GetConfig(property_name);
             }
             all_devices[context.deviceInfo.deviceName] = device_properties;
+        };
+        if (_autoSchedule->_pCTPUTLoadContext) {
+            // need lock for inference failure
+            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+            auto load_count = _autoSContext->_devicePriorities.size();
+            for (size_t i = 0; i < load_count; i++)
+                get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
+        } else {
+            {
+                std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+                if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
+                    get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]);
+                }
+            }
+            std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
+            if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
+                get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]);
+            } else {
+                get_device_supported_metrics(_autoSchedule->_loadContext[CPU]);
+            }
         }
         return all_devices;
     } else if (name == ov::hint::model_priority) {
@@ -91,6 +110,24 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
         const unsigned int defaultNumForTPUT = 4u;
         const unsigned int defaultNumForLatency = 1u;
         unsigned int real = 0;
+        if (_autoSchedule->_pCTPUTLoadContext) {
+            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+            unsigned int res = 0u;
+            auto load_count = _autoSContext->_devicePriorities.size();
+            for (size_t i = 0; i < load_count; i++) {
+                try {
+                    res += (_autoSchedule->_pCTPUTLoadContext[i]).executableNetwork->GetMetric(
+                        METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+                } catch (const IE::Exception& iie) {
+                    IE_THROW()
+                        << "Every device used in cumulative mode should "
+                            << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
+                            << "Failed to query the metric for with error:" <<
+                            iie.what();
+                }
+            }
+            return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
+        }
         if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
             real = _autoSchedule->_loadContext[ACTUALDEVICE].
                 executableNetwork->GetMetric(name).as<unsigned int>();
@@ -181,12 +218,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
             exeDevices.push_back(ExeDevicesString);
             execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
         };
-        if (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
-            try {
-                execution_devices = _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
-            } catch(const IE::Exception&) {
-                GetExecutionDevices(_autoSchedule->_loadContext[ACTUALDEVICE].workName);
+        if (_autoSchedule->_pCTPUTLoadContext) {
+            std::vector<std::string> exeDevices = {};
+            std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
+            for (auto n : _autoSContext->_devicePriorities) {
+                exeDevices.push_back(n.deviceName);
             }
+            execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
         } else {
             std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
             for (int i = 0; i < CONTEXTNUM; i++) {
@@ -203,9 +241,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
         return execution_devices;
     } else if (name == ov::model_name) {
         std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
-        if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
-            return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
-        return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
+        if (_autoSchedule->_pCTPUTLoadContext) {
+            return _autoSchedule->_pCTPUTLoadContext[0].executableNetwork->GetMetric(name);
+        } else {
+            if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
+                return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
+            return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
+        }
     } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
                              {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/auto_schedule.cpp
index c645334c2d328b..7e5a85809b86db 100644
--- a/src/plugins/auto/auto_schedule.cpp
+++ b/src/plugins/auto/auto_schedule.cpp
@@ -3,7 +3,6 @@
 //
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-
 #include "auto_schedule.hpp"
 #include "async_infer_request.hpp"
 #include "auto_executable_network.hpp"
@@ -183,6 +182,27 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
                 return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
             }
         };
+
+        auto removeInferFailDevice = [&](const std::string& deviceName) {
+            if (_autoSContext->_devicePriorities.size() > 1) {
+                const auto CurrentDeviceIter =
+                    std::find_if(_autoSContext->_devicePriorities.begin(),
+                                 _autoSContext->_devicePriorities.end(),
+                                 [=](const DeviceInformation& d) -> bool {
+                                     return d.deviceName.find(deviceName) != std::string::npos;
+                                 });
+                if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
+                    _autoSContext->_devicePriorities.erase(CurrentDeviceIter);
+                    return true;
+                }
+            }
+            return false;
+        };
+
+        if (_pCTPUTLoadContext) {
+            return removeInferFailDevice(currentDeviceName);
+        }
+
         return getExecutionDevices(currentDeviceName);
     }
 }
@@ -217,30 +237,40 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
         std::list<DeviceInformation> validDevices =
             _autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities,
                                                    _loadContext[ACTUALDEVICE].networkPrecision);
+        // When the hint is ctput and there is only one device, the single-device logic is used
         if (validDevices.size() == 1) {
-            // When the hint is ctput and there is only one device, the single-device logic is used instead of
-            // the MULTI logic
-            // can not change _autoSContext->_performanceHint to THROUGHPUT, because GetMetric needs to return CTPUT
             _loadContext[ACTUALDEVICE].deviceInfo = validDevices.front();
             _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
                 IE::PluginConfigParams::THROUGHPUT;
-            isCumulative = false;
-        } else {
-            // When the hint is ctput and there are more than one device, the MULTI logic is used
-            std::string deviceName = "MULTI:";
+        } else if (validDevices.size() > 1) {
+            _loadContext[ACTUALDEVICE].isEnabled = false;
+            _autoSContext->_devicePriorities.clear();
+            std::copy(std::begin(validDevices),
+                      std::end(validDevices),
+                      std::back_inserter(_autoSContext->_devicePriorities));
+            // Total number of devices in CTPUT
+            auto nCTputDeviceNums = validDevices.size();
+            // Generate contexts for loading each device
+            _pCTPUTLoadContext.reset(new AutoLoadContext[nCTputDeviceNums]);
+            int idx = 0;
+            DeviceInformation cpuDeviceInformation;
             for (auto& device : validDevices) {
-                deviceName += device.deviceName;
-                deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ",");
+                if (device.deviceName.find("CPU") == std::string::npos) {
+                    _pCTPUTLoadContext[idx].deviceInfo = device;
+                    _pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
+                        IE::PluginConfigParams::THROUGHPUT;
+                    idx++;
+                } else {
+                    cpuDeviceInformation = device;
+                    cpuDeviceInformation.config.insert(
+                        {ov::affinity.name(), ov::Any(ov::Affinity::CORE).as<std::string>()});
+                }
+            }
+            if (!cpuDeviceInformation.deviceName.empty()) {
+                _pCTPUTLoadContext[idx].deviceInfo = cpuDeviceInformation;
+                _pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
+                    IE::PluginConfigParams::THROUGHPUT;
             }
-            _loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName;
-            _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
-                InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT;
-            _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] =
-                _autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES
-                                                 : InferenceEngine::PluginConfigParams::NO;
-            if (_autoSContext->_bindBuffer)
-                _loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] =
-                    InferenceEngine::PluginConfigParams::YES;
         }
     } else {
         _loadContext[ACTUALDEVICE].deviceInfo =
@@ -248,76 +278,127 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
                                                  _loadContext[ACTUALDEVICE].networkPrecision,
                                                  _autoSContext->_modelPriority);
     }
-    LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
-    bool isActualDevCPU =
-        _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") !=std::string::npos && !isCumulative;
-    // if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
-    if (isActualDevCPU || isCumulative || !_autoSContext->_startupfallback) {
-        _loadContext[CPU].isEnabled = false;
-    } else {
-        const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
-                                          [=](const DeviceInformation& d) -> bool { return d.deviceName.find("CPU") != std::string::npos; });
-        // if have CPU Device,  enable _loadContext[CPU]
-        if (CPUIter != _autoSContext->_devicePriorities.end()) {
-            _loadContext[CPU].isEnabled = true;
-            _loadContext[CPU].deviceInfo = *CPUIter;
-            _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY;
-            _loadContext[CPU].workName = "CPU_HELP";
-            LOG_INFO_TAG("will load CPU for accelerator");
-        } else {
+
+    auto loadDeviceTask = [&](AutoLoadContext* contextPtr,
+                              const std::string& modelPath,
+                              const IE::CNNNetwork& network,
+                              bool isCumulative) {
+        TryToLoadNetWork(*contextPtr, modelPath, network, isCumulative);
+        if (contextPtr->isLoadSuccess) {
+            if (contextPtr->workName.empty()) {
+                contextPtr->workName = contextPtr->deviceInfo.deviceName;
+            }
+            GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
+            // need lock
+            {
+                std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
+                _autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(),
+                                              contextPtr->deviceInfo.config.end());
+            }
+            contextPtr->isAlready = true;
+            // reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
+            contextPtr->isReloadSuccess = true;
+            auto& deviceName = contextPtr->deviceInfo.deviceName;
+            LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
+            auto supported_config_keys = _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
+                                             .as<std::vector<std::string>>();
+            DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
+                std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
+                for (const auto& cfg : supported_config_keys) {
+                    try {
+                        LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s",
+                                      deviceName.c_str(),
+                                      cfg.c_str(),
+                                      contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
+                    } catch (const IE::Exception&) {
+                    }
+                }
+            });
+        }
+        // Handle device load failure in case of ctput
+        if (isCumulative && !contextPtr->isLoadSuccess) {
+            std::string failedDeviceName = contextPtr->deviceInfo.deviceName;
+            std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
+            const auto DeviceIter =
+                std::find_if(_autoSContext->_devicePriorities.begin(),
+                             _autoSContext->_devicePriorities.end(),
+                             [&](const DeviceInformation& d) -> bool {
+                                 return d.deviceName.find(failedDeviceName) != std::string::npos;
+                             });
+            // Remove failed device from _devicePriorities
+            if (DeviceIter != _autoSContext->_devicePriorities.end()) {
+                _autoSContext->_devicePriorities.erase(DeviceIter);
+            }
+            // Remove failed device from ov::device::priorities in config
+            auto it_prior = _autoSContext->_config.find(ov::device::priorities.name());
+            if (it_prior != _autoSContext->_config.end()) {
+                auto priorities = it_prior->second.as<std::string>();
+                size_t nPos = priorities.find(failedDeviceName);
+                if (nPos != std::string::npos) {
+                    // If need to delete failed device and "," then length plus 1
+                    size_t nNameLen = (nPos + failedDeviceName.length()) == priorities.length()
+                                   ? failedDeviceName.length()
+                                   : failedDeviceName.length() + 1;
+                    priorities.erase(nPos, nNameLen);
+                    it_prior->second = priorities;
+                }
+            }
+        }
+        contextPtr->promise.set_value();
+        // the first load network process finished
+        std::call_once(_firstLoadOC, [this]() {
+            _firstLoadPromise.set_value();
+        });
+    };
+    if (_loadContext[ACTUALDEVICE].isEnabled) {
+        LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
+        bool isActualDevCPU = _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
+        // if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use
+        // _loadContext[ACTUALDEVICE]
+        if (isActualDevCPU || !_autoSContext->_startupfallback) {
             _loadContext[CPU].isEnabled = false;
+        } else {
+            const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(),
+                                              _autoSContext->_devicePriorities.end(),
+                                              [](const DeviceInformation& d) -> bool {
+                                                  return d.deviceName.find("CPU") != std::string::npos;
+                                              });
+            // if have CPU Device,  enable _loadContext[CPU]
+            if (CPUIter != _autoSContext->_devicePriorities.end()) {
+                _loadContext[CPU].isEnabled = true;
+                _loadContext[CPU].deviceInfo = *CPUIter;
+                _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY;
+                _loadContext[CPU].workName = "CPU_HELP";
+                LOG_INFO_TAG("will load CPU for accelerator");
+            } else {
+                _loadContext[CPU].isEnabled = false;
+            }
+        }
+        // initialize the rest members of load context
+        for (int i = 0; i < CONTEXTNUM; i++) {
+            if (_loadContext[i].isEnabled) {
+                _loadContext[i].future = _loadContext[i].promise.get_future();
+                auto* contextPtr = &_loadContext[i];
+                auto modelPath = _autoSContext->_modelPath;
+                auto network = _autoSContext->_network;
+                _loadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
+            }
         }
     }
-    // initialize the rest members of load context
-    for (int i = 0; i < CONTEXTNUM; i++) {
-        if (_loadContext[i].isEnabled) {
-            _loadContext[i].future = _loadContext[i].promise.get_future();
-            auto* contextPtr = &_loadContext[i];
+    std::vector<Task> otherDevicesloads;
+    std::vector<Task> cpuLoads;
+    if (_pCTPUTLoadContext) {
+        for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
+            auto* contextPtr = &_pCTPUTLoadContext[i];
             auto modelPath = _autoSContext->_modelPath;
             auto network = _autoSContext->_network;
-            _loadContext[i].task = [this, contextPtr, modelPath, network, isCumulative]() mutable {
-                TryToLoadNetWork(*contextPtr, modelPath, network);
-                if (contextPtr->isLoadSuccess) {
-                    if (contextPtr->workName.empty()) {
-                        contextPtr->workName = contextPtr->deviceInfo.deviceName;
-                    }
-                    if (!isCumulative)
-                        GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
-                    //need lock
-                    {
-                        std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
-                        _autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end());
-                    }
-                    contextPtr->isAlready = true;
-                    // reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
-                    contextPtr->isReloadSuccess = true;
-                    auto& deviceName = contextPtr->deviceInfo.deviceName;
-                    LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
-                    if (!isCumulative) {
-                        auto supported_config_keys =
-                            _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
-                                          .as<std::vector<std::string>>();
-                        DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
-                            std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
-                            for (const auto& cfg : supported_config_keys) {
-                                try {
-                                    LOG_DEBUG_TAG(
-                                        "device:%s, GetConfig:%s=%s",
-                                        deviceName.c_str(),
-                                        cfg.c_str(),
-                                        contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
-                                } catch (const IE::Exception&) {
-                                }
-                            }
-                        });
-                    }
-                }
-                contextPtr->promise.set_value();
-                // the first load network process finished
-                std::call_once(_firstLoadOC, [this]() {
-                    _firstLoadPromise.set_value();
-                });
-            };
+            _pCTPUTLoadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
+            if (i == _autoSContext->_devicePriorities.size() - 1 &&
+                _pCTPUTLoadContext[i].deviceInfo.deviceName.find("CPU") != std::string::npos) {
+                cpuLoads.push_back(_pCTPUTLoadContext[i].task);
+            } else {
+                otherDevicesloads.push_back(_pCTPUTLoadContext[i].task);
+            }
         }
     }
     OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin,
@@ -350,13 +431,11 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
                 _loadContext[CPU].future.wait();
                 // clean up helper infer requests
                 // first, wait for all the remaining requests to finish
-                if (!_autoSContext->_runtimeFallback) {
-                    for (auto& iter : _workerRequests["CPU_HELP"]) {
-                        try {
-                            iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
-                        } catch (const IE::Exception& iie) {
-                            LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
-                        }
+                for (auto& iter : _workerRequests["CPU_HELP"]) {
+                    try {
+                        iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
+                    } catch (const IE::Exception& iie) {
+                        LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
                     }
                 }
                 // late enough to check the idle queue now
@@ -411,14 +490,38 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
         }
         _loadContext[ACTUALDEVICE].task();
     } else {
-        // only one device need to load network, do not need to load it async
-        _loadContext[ACTUALDEVICE].task();
-        _passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
+        if (_pCTPUTLoadContext) {
+            for (auto&& device : _autoSContext->_devicePriorities) {
+                // initialize containers before run async task, if not initialized, it will hang during infer
+                _idleWorkerRequests[device.deviceName];
+                _workerRequests[device.deviceName];
+                _inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
+            }
+            _executor = _autoSContext->_plugin->executorManager()->getIdleCPUStreamsExecutor(IStreamsExecutor::Config{
+                "CTPUTDeviceAsyncLoad",
+                static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
+                0 /*default threads per stream, workaround for ticket 62376*/,
+                IStreamsExecutor::ThreadBindingType::NONE});
+            // load devices other than CPU first
+            if (otherDevicesloads.size() > 0) {
+                // Wait for the devices other than CPU to load the network
+                _executor->runAndWait(otherDevicesloads);
+            }
+            // Finally load the CPU
+            if (cpuLoads.size() > 0) {
+                // Wait for CPU to load the network
+                _executor->runAndWait(cpuLoads);
+            }
+        } else {
+            // only one device need to load network, do not need to load it async
+            _loadContext[ACTUALDEVICE].task();
+            _passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
+        }
     }
     WaitFirstNetworkReady();
 }
 
-void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network) {
+void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative) {
     auto& device = context.deviceInfo.deviceName;
     auto& deviceConfig = context.deviceInfo.config;
     auto& deviceList = context.metaDevices;
@@ -458,7 +561,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
         context.errMessage += device + ":" + e.what();
         context.isLoadSuccess = false;
     }
-    if (context.isLoadSuccess || curDevIsCPU) {
+    if (context.isLoadSuccess || curDevIsCPU || isCumulative) {
         return;
     }
     // need to reload network, unregister it's priority
@@ -512,7 +615,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
     }
     LOG_DEBUG_TAG("try to load %s", context.deviceInfo.deviceName.c_str());
     // try to load this candidate device
-    TryToLoadNetWork(context, modelPath, network);
+    TryToLoadNetWork(context, modelPath, network, isCumulative);
 }
 
 void AutoSchedule::WaitFirstNetworkReady() {
@@ -542,6 +645,20 @@ void AutoSchedule::WaitFirstNetworkReady() {
             LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
         }
     }
+    // devices loaded successfully in CTPUT
+    if (_pCTPUTLoadContext) {
+        int nLoadSucNums = 0;
+        for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
+            // check if device loaded successfully
+            if (_pCTPUTLoadContext[i].isAlready) {
+                nLoadSucNums++;
+            }
+        }
+        // one or more devices loaded successfully
+        if (nLoadSucNums > 0) {
+            return;
+        }
+    }
     IE_THROW() << GetLogTag() << "load all devices failed";
 }
 
@@ -560,29 +677,45 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
     std::vector<DeviceInformation> devices;
     // AUTO work mode
     if (!preferred_device.empty()) {
-        // if the device needed by customer is not ready, need to wait for it
-        WaitActualNetworkReady();
-        // the preferred_device should be the selected device in AUTO work mode
-        if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
-            IE_THROW(NotFound) << "The preferred device should be the selected device";
+        if (_pCTPUTLoadContext) {
+            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+            devices = _autoSContext->_devicePriorities;
+        } else {
+            // if the device needed by customer is not ready, need to wait for it
+            WaitActualNetworkReady();
+            // the preferred_device should be the selected device in AUTO work mode
+            if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
+                IE_THROW(NotFound) << "The preferred device should be the selected device";
+            }
+            devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
         }
-        devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
     } else {
-        // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
-        if (_loadContext[FALLBACKDEVICE].isAlready) {
-            devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
+        if (_pCTPUTLoadContext) {
+            // Devices that fail infer will be removed from the priority list in the callback, need lock here
+            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+            for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
+                devices.push_back(_autoSContext->_devicePriorities[i]);
+            }
         } else {
-            if (_loadContext[ACTUALDEVICE].isAlready) {
-                devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
+            // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
+            if (_loadContext[FALLBACKDEVICE].isAlready) {
+                devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
             } else {
-                // replace deviceName with workName, so schedule can select correct
-                // idleWorkerQueue
-                auto deviceInfo =  _loadContext[CPU].deviceInfo;
-                deviceInfo.deviceName = _loadContext[CPU].workName;
-                devices.push_back(std::move(deviceInfo));
+                if (_loadContext[ACTUALDEVICE].isAlready) {
+                    devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
+                } else {
+                    // replace deviceName with workName, so schedule can select correct
+                    // idleWorkerQueue
+                    auto deviceInfo = _loadContext[CPU].deviceInfo;
+                    deviceInfo.deviceName = _loadContext[CPU].workName;
+                    devices.push_back(std::move(deviceInfo));
+                }
             }
         }
     }
+    if (devices.size() == 0) {
+        IE_THROW(GeneralError) << "No device to run pipeline task";
+    }
     for (auto&& device : devices) {
         if (!preferred_device.empty() && (device.deviceName != preferred_device)) {
             continue;
@@ -644,27 +777,12 @@ IInferPtr AutoSchedule::CreateInferRequest() {
     if (!syncRequestImpl)
         syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
     syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
-    bool isCumulative = (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
-    bool isCTPUTSingleDevice =
-        isCumulative && _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("MULTI:") == std::string::npos ? true
-                                                                                                            : false;
-    if ((_passthroughExeNet && !isCumulative) || isCTPUTSingleDevice) {
-        std::string perfmode;
-        try {
-            perfmode = _passthroughExeNet->GetConfig(
-                                CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
-        } catch (const IE::Exception&) {
-            LOG_INFO("query perf hint from passthrough network failed");
-        }
-        if (_autoSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
-            syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
-        } else {
-            auto so = _passthroughExeNet._ptr->GetPointerToSo();
-            // Get the _so from passthrough executable network when batch plugin is disable.
-            if (!so)
-                so = _passthroughExeNet._so;
-            syncRequestImpl->setPointerToSo(so);
-        }
+    if (_passthroughExeNet) {
+        auto so = _passthroughExeNet._ptr->GetPointerToSo();
+        // Get the _so from passthrough executable network when batch plugin is disable.
+        if (!so)
+            so = _passthroughExeNet._so;
+        syncRequestImpl->setPointerToSo(so);
     } else if (std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest()) {
         // cumulative case, load to MULTI:*
         auto sharedMultiRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
diff --git a/src/plugins/auto/auto_schedule.hpp b/src/plugins/auto/auto_schedule.hpp
index 6e08a94005a1e4..08be8e5a5e5a47 100644
--- a/src/plugins/auto/auto_schedule.hpp
+++ b/src/plugins/auto/auto_schedule.hpp
@@ -50,6 +50,7 @@ class AutoSchedule : public MultiSchedule {
 
 public:
     AutoLoadContext                           _loadContext[CONTEXTNUM];
+    std::unique_ptr<AutoLoadContext[]>        _pCTPUTLoadContext = nullptr;
 
 protected:
     void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
@@ -60,7 +61,7 @@ class AutoSchedule : public MultiSchedule {
 
 private:
     void WaitFirstNetworkReady();
-    void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network);
+    void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
     bool selectOtherDevice(const std::string& currentDeviceName);
     IE::Task releaseActualdeviceTask;
 
diff --git a/src/plugins/auto/multi_schedule.cpp b/src/plugins/auto/multi_schedule.cpp
index 888c25095bd56e..b2dd7097587b8e 100644
--- a/src/plugins/auto/multi_schedule.cpp
+++ b/src/plugins/auto/multi_schedule.cpp
@@ -307,26 +307,15 @@ IInferPtr MultiSchedule::CreateInferRequest() {
         syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
     syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
     if (_passthroughExeNet) {
-        std::string perfmode;
-        try {
-            perfmode = _passthroughExeNet->GetConfig(
-                                CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
-        } catch (const IE::Exception&) {
-            LOG_INFO("query perf hint from passthrough network failed");
-        }
-        if (_multiSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
-            syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
-        } else {
-            auto so = _passthroughExeNet._ptr->GetPointerToSo();
-            // Get the _so from passthrough executable network when batch plugin is disable.
-            if (!so)
-                so = _passthroughExeNet._so;
-            syncRequestImpl->setPointerToSo(so);
-        }
+        auto so = _passthroughExeNet._ptr->GetPointerToSo();
+        // Get the _so from passthrough executable network when batch plugin is disable.
+        if (!so)
+            so = _passthroughExeNet._so;
+        syncRequestImpl->setPointerToSo(so);
     } else if (_multiSContext->_bindBuffer) {
         auto sharedRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
         if (sharedRequest._ptr->getPointerToSo())
-             syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo());
+            syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo());
         else
             syncRequestImpl->setPointerToSo(sharedRequest._so);
     }
diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
index 89b86ae7fe3dec..975b253178ae47 100644
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp
@@ -6,7 +6,7 @@
 
 #include <string>
 #include <vector>
-
+#include <thread>
 #include "shared_test_classes/base/layer_test_utils.hpp"
 #include "ngraph/function.hpp"
 #include "ngraph_functions/subgraph_builders.hpp"
@@ -90,9 +90,11 @@ class LoadNetworkCompiledKernelsCacheTest : virtual public LayerTestsUtils::Laye
         } else {
             m_extList.push_back(ext);
         }
-        std::replace(test_name.begin(), test_name.end(), '/', '_');
-        std::replace(test_name.begin(), test_name.end(), '\\', '_');
-        cache_path = "LoadNetwork" + test_name + "_cache";
+        auto hash = std::hash<std::string>()(test_name);
+        std::stringstream ss;
+        ss << std::this_thread::get_id();
+        cache_path = "LoadNetwork" + std::to_string(hash) + "_"
+                + ss.str() + "_" + GetTimestamp() + "_cache";
     }
     void TearDown() override {
         APIBaseTest::TearDown();
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
index f6c7912a0ff979..97ade0c6998e44 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
@@ -510,14 +510,16 @@ void CompiledKernelsCacheTest::SetUp() {
     std::string ext = userConfig.second;
     std::string::size_type pos = 0;
     if ((pos = ext.find(",", pos)) != std::string::npos) {
-    m_extList.push_back(ext.substr(0, pos));
-    m_extList.push_back(ext.substr(pos + 1));
-} else {
-    m_extList.push_back(ext);
-}
-    std::replace(test_name.begin(), test_name.end(), '/', '_');
-    std::replace(test_name.begin(), test_name.end(), '\\', '_');
-    cache_path = "compiledModel" + test_name + "_cache";
+        m_extList.push_back(ext.substr(0, pos));
+        m_extList.push_back(ext.substr(pos + 1));
+    } else {
+        m_extList.push_back(ext);
+    }
+    auto hash = std::hash<std::string>()(test_name);
+    std::stringstream ss;
+    ss << std::this_thread::get_id();
+    cache_path = "compiledModel" + std::to_string(hash) + "_"
+                + ss.str() + "_" + GetTimestamp() + "_cache";
 }
 
 void CompiledKernelsCacheTest::TearDown() {
diff --git a/src/tests/unit/auto/auto_ctput_test.cpp b/src/tests/unit/auto/auto_ctput_test.cpp
index 7585fb3b25b9f8..28c990ffdc3e61 100644
--- a/src/tests/unit/auto/auto_ctput_test.cpp
+++ b/src/tests/unit/auto/auto_ctput_test.cpp
@@ -200,12 +200,6 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
                                 ::testing::Matcher<const std::map<std::string, std::string>&>(
                                     ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
             .Times(1);
-        // no MULTI logic to be called
-        EXPECT_CALL(*core,
-                    LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
-                                ::testing::Matcher<const std::string&>("MULTI:" + targetDevice),
-                                ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
-            .Times(0);
         // if target device only has GPU, no CPU helper to be called
         if (targetDevice.find("GPU") != std::string::npos) {
             EXPECT_CALL(*core,
@@ -220,14 +214,14 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
         for (auto& deviceName : targetDevices) {
             targetDev += deviceName;
             targetDev += ((deviceName == targetDevices.back()) ? "" : ",");
+            EXPECT_CALL(*core,
+                        LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                                    ::testing::Matcher<const std::string&>(deviceName),
+                                    ::testing::Matcher<const std::map<std::string, std::string>&>(
+                                        ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
+                .Times(1);
         }
         config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
-        // Call MULTI logic
-        EXPECT_CALL(*core,
-                    LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
-                                ::testing::Matcher<const std::string&>("MULTI:" + targetDev),
-                                ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
-            .Times(1);
         // no CPU helper to be called
         EXPECT_CALL(*core,
                     LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
diff --git a/src/tests/unit/auto/auto_runtime_fallback_test.cpp b/src/tests/unit/auto/auto_runtime_fallback_test.cpp
index d177ea48c2dfee..9b4e76a3b611ef 100644
--- a/src/tests/unit/auto/auto_runtime_fallback_test.cpp
+++ b/src/tests/unit/auto/auto_runtime_fallback_test.cpp
@@ -230,6 +230,8 @@ class AutoRuntimeFallback : public ::testing::TestWithParam<ConfigParams> {
     }
 };
 
+using AutoCTPUTRuntimeFallback = AutoRuntimeFallback;
+
 TEST_P(AutoRuntimeFallback, releaseResource) {
     std::string targetDev;
     std::vector<std::tuple<std::string, bool>> targetDevices;
@@ -362,3 +364,93 @@ const std::vector<ConfigParams> testConfigs = {
 INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback,
                 ::testing::ValuesIn(testConfigs),
            AutoRuntimeFallback::getTestCaseName);
+
+TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) {
+    std::string targetDev;
+    std::vector<std::tuple<std::string, bool>> targetDevices; //std::tuple<deviceName, will infer throw exception>
+    int loadNetworkNum;
+    bool enableRumtimeFallback;
+    bool expectThrow;
+    bool loadNetworkFail;
+    bool generateWorkersFail;
+    std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam();
+    if (loadNetworkFail) {
+        ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+            ::testing::Matcher<const std::string&>(StrEq("GPU.1")),
+            ::testing::Matcher<const Config&>(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+    }
+    for (auto& deviceInfo : targetDevices) {
+        std::string deviceName;
+        bool ifThrow;
+        std::tie(deviceName, ifThrow) = deviceInfo;
+        targetDev += deviceName;
+        targetDev += ((deviceInfo == targetDevices.back()) ? "" : ",");
+        if (deviceName == "CPU") {
+            mockInferrequest = std::make_shared<mockAsyncInferRequest>(
+                inferReqInternal, mockExecutor, nullptr, ifThrow);
+            ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest));
+        } else if (deviceName == "GPU.0") {
+            mockInferrequestGPU_0 = std::make_shared<mockAsyncInferRequest>(
+                inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow);
+            ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
+                        std::this_thread::sleep_for(std::chrono::milliseconds(0));
+                        return mockInferrequestGPU_0; }));
+        } else if (deviceName == "GPU.1") {
+            if (generateWorkersFail) {
+                mockInferrequestGPU_1 =
+                    std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
+                ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest())
+                    .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+            } else {
+                mockInferrequestGPU_1 =
+                    std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
+                ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(0));
+                    return mockInferrequestGPU_1;
+                }));
+            }
+        } else {
+            return;
+        }
+    }
+    plugin->SetName("AUTO");
+    config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
+    config.insert({InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT,
+                   InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT});
+    if (!enableRumtimeFallback) {
+        config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}});
+    }
+
+    EXPECT_CALL(*core,
+                LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                            ::testing::Matcher<const std::string&>(_),
+                            ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
+        .Times(loadNetworkNum);
+
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
+    std::shared_ptr<IInferRequestInternal> infer_request;
+
+    ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config));
+    ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest());
+    if (expectThrow) {
+        EXPECT_THROW(infer_request->Infer(), IE::Exception);
+    } else {
+        ASSERT_NO_THROW(infer_request->Infer());
+    }
+}
+
+// ConfigParams: targetDevices(deviceName, will infer throw exception), loadNetworkNum, enableRumtimeFallback,
+// expectThrow, loadNetworkFail, generateWorkersFail
+const std::vector<ConfigParams> testCtputConfigs = {
+    ConfigParams{{{"CPU", false}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, false, false, false},
+    ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", true}}, 3, true, false, false, false},
+    ConfigParams{{{"CPU", true}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, true, false, false},
+    // disable RumtimeFallback
+    ConfigParams{{{"CPU", false}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, false, false, false},
+    ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, true, false, false},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_AutoCTPUTRuntimeFallback,
+                         AutoCTPUTRuntimeFallback,
+                         ::testing::ValuesIn(testCtputConfigs),
+                         AutoCTPUTRuntimeFallback::getTestCaseName);

From 2956717118ba1397eb152471bc08d3cd94a14b4b Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.park@intel.com>
Date: Sun, 26 Mar 2023 14:32:17 +0900
Subject: [PATCH 095/296] [GPU] Added shape agnostic TopK kernel (#16161)

* [GPU] Added shape agnostic TopK kernel implementation

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Update kernel to use internal buffers for shape agnostic kernel

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Add WA to compile_graph for shape agnostic arg_max_min_axis with non-const k input

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Fix is_dynamic pameter for FillCLKernelData with the case where the output is static shape

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Fix corner case where inbuf size becomes 0 when ops_size is 1

Signed-off-by: Andrew Park <andrew.park@intel.com>

---------

Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../graph/graph_optimizer/compile_graph.cpp   |   6 +
 .../src/graph/impls/ocl/arg_max_min.cpp       |  43 +-
 .../cl_kernels/arg_max_min_axis.cl            | 431 ++++++++++--------
 .../cl_kernels/arg_max_min_gpu_ref.cl         |   5 +-
 .../arg_max_min/arg_max_min_kernel_axis.cpp   | 103 ++++-
 .../arg_max_min/arg_max_min_kernel_axis.h     |   1 +
 .../arg_max_min/arg_max_min_kernel_base.cpp   |  27 +-
 .../arg_max_min_kernel_gpu_ref.cpp            |   1 +
 .../tests/test_cases/arg_max_gpu_test.cpp     |  51 +++
 9 files changed, 434 insertions(+), 234 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
index 995c6f965b5cd2..83d2fb224afcb3 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@@ -7,6 +7,7 @@
 #include "mutable_data_inst.h"
 #include "reshape_inst.h"
 #include "quantize_inst.h"
+#include "arg_max_min_inst.h"
 #include "program_node.h"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/itt.hpp"
@@ -51,6 +52,11 @@ void compile_graph::run(program& p) {
         if (node->is_type<fully_connected>() && node->is_dynamic() && node->get_output_layout().get_partial_shape().size() > 3)
             can_select_impl = false;
 
+        // TODO: Remove this WA once we have shape agnostic arg_max_min_axis kernel with non-const k input
+        if (node->is_type<arg_max_min>() && node->is_dynamic() && node->as<arg_max_min>().get_primitive()->top_k == 0) {
+            can_select_impl = false;
+        }
+
         bool is_planar = node->get_output_layout().format == format::bfyx ||
                          node->get_output_layout().format == format::bfzyx ||
                          node->get_output_layout().format == format::bfwzyx;
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
index 2e91cd92ab412f..094c3194cc7caa 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
@@ -59,7 +59,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
     }
 
 public:
-    static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
+    static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
         const auto& primitive = impl_param.typed_desc<arg_max_min>();
         const auto& axis = primitive->axis;
         const auto& top_k = primitive->top_k;
@@ -68,7 +68,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
         const auto& values_first = primitive->values_first;
         const auto& outputs_num = primitive->input_size() == 3 ? 2 : static_cast<uint32_t>(primitive->output_size());
 
-        auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param);
+        auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param, is_shape_agnostic);
         auto argm_optional_params =
             get_default_optional_params<kernel_selector::arg_max_min_optional_params>(impl_param.get_program());
 
@@ -76,7 +76,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
         argm_params.argMaxMinAxis = GetArgMaxMinAxis(axis, impl_param.get_output_layout().get_rank());
 
         auto& constant_mem = impl_param.memory_deps;
-        if (constant_mem.count(1)) {
+        if (constant_mem.count(1) && !argm_params.has_dynamic_outputs()) {
             // The topK could be got by reading impl_param.memory_deps.at(1).
             // However, here we utilize output_layout and axis information to minimize mem_lock.
             auto output_layout = impl_param.get_output_layout(0);
@@ -110,26 +110,45 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
 
         return {argm_params, argm_optional_params};
     }
+
+    void update_dispatch_data(const kernel_impl_params& impl_param) override {
+        auto kernel_params = get_kernel_params(impl_param, true);
+        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
+        update_kernels_list_to_skip();
+    }
 };
 
 namespace detail {
 attach_arg_max_min_impl::attach_arg_max_min_impl() {
     auto types = {data_types::f16, data_types::f32, data_types::i8, data_types::i32};
 
-    auto formats = {format::bfyx,
-                    format::yxfb,
-                    format::b_fs_yx_fsv16,
-                    format::b_fs_yx_fsv32,
-                    format::bs_fs_yx_bsv16_fsv16,
-                    format::bs_fs_yx_bsv32_fsv16,
-                    format::bs_fs_yx_bsv32_fsv32,
-
-                    format::bfzyx};
+    auto formats = {
+        format::bfyx,
+        format::yxfb,
+        format::b_fs_yx_fsv16,
+        format::b_fs_yx_fsv32,
+        format::bs_fs_yx_bsv16_fsv16,
+        format::bs_fs_yx_bsv32_fsv16,
+        format::bs_fs_yx_bsv32_fsv32,
+        format::bfzyx
+    };
 
     implementation_map<arg_max_min>::add(impl_types::ocl,
+                                         shape_types::static_shape,
                                          typed_primitive_impl_ocl<arg_max_min>::create<arg_max_min_impl>,
                                          types,
                                          formats);
+
+    auto dyn_formats = {
+        format::bfyx,
+        format::bfzyx
+    };
+
+    implementation_map<arg_max_min>::add(impl_types::ocl,
+                                         shape_types::dynamic_shape,
+                                         typed_primitive_impl_ocl<arg_max_min>::create<arg_max_min_impl>,
+                                         types,
+                                         dyn_formats);
 }
 }  // namespace detail
 }  // namespace ocl
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_axis.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_axis.cl
index 1f0af43f1c95f8..dc65e37a244f2e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_axis.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_axis.cl
@@ -43,48 +43,22 @@
 
 #define MINIMUM_NUMBER_FOR_PARTIAL_SORTING 100
 
-KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
-                                  ,__global OUTPUT_TYPE* output
-#ifdef SECOND_OUTPUT_EXIST
-#ifdef MULTIPLE_OUTPUTS
-                                  ,__global OUTPUT1_TYPE* second_output
-#else
-                                  ,__global INPUT1_TYPE* second_output
-#endif
-#endif
-                            )
+inline void FUNC(get_indices_from_dims)(OPTIONAL_SHAPE_INFO_ARG
+                                        const uint output_idx,
+                                        uint* indices)
 {
-#include "include/arg_max_min_common.cl"
-#if SORT_BY_VALUE
-    const uint sort_idx = (uint)get_global_id(1);
-#elif TOP_K == 1
-    iav_type result[TOP_K];
-#else
-    iav_type result[VALUES_NUM], temp_buf[VALUES_NUM];
-    const uint group_size = TOP_K >= 8 ? TOP_K : 8;
-    const uint group_num = ((VALUES_NUM - 1) / group_size) + 1;
-    const uint last_group_size = (VALUES_NUM % group_size > 0) ? (VALUES_NUM % group_size) : group_size;
-    const uint last_group_offset = (group_num - 1) * group_size;
-#endif // SORT_BY_VALUE
-
-#if OPERATION_NUM > 1
-    const uint output_idx = (uint)get_global_id(0);
-
-    if (output_idx >= OPERATION_NUM)
-        return;
-
 #ifdef BATCH_AXIS
     #ifdef OUTPUT_LAYOUT_YXFB
     const uint out_first_dim = output_idx / (INPUT0_SIZE_X * INPUT0_FEATURE_NUM); // Y
     const uint out_second_dim = output_idx / INPUT0_FEATURE_NUM % INPUT0_SIZE_X; // X
     const uint out_fourth_dim = output_idx % INPUT0_FEATURE_NUM; // F
-    uint indices[] = { 0, out_fourth_dim, 0, out_first_dim, out_second_dim }; // BFZYX
+    indices[1] = out_fourth_dim; indices[3] = out_first_dim; indices[4] = out_second_dim; // BFZYX
     #else
     const uint out_first_dim = output_idx / (INPUT0_SIZE_Z * INPUT0_SIZE_Y * INPUT0_SIZE_X); // F
     const uint out_second_dim = output_idx / (INPUT0_SIZE_Y * INPUT0_SIZE_X) % INPUT0_SIZE_Z; // Z
     const uint out_third_dim = output_idx / INPUT0_SIZE_X % INPUT0_SIZE_Y; // Y
     const uint out_fourth_dim = output_idx % INPUT0_SIZE_X; // X
-    uint indices[] = { 0, out_first_dim, out_second_dim, out_third_dim, out_fourth_dim };
+    indices[1] = out_first_dim; indices[2] = out_second_dim; indices[3] = out_third_dim; indices[4] = out_fourth_dim;
     #endif
 #endif
 #ifdef FEATURE_AXIS
@@ -92,13 +66,13 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
     const uint out_first_dim = output_idx / (INPUT0_SIZE_X * INPUT0_BATCH_NUM); // Y
     const uint out_second_dim = output_idx / INPUT0_BATCH_NUM % INPUT0_SIZE_X; // X
     const uint out_fourth_dim = output_idx % INPUT0_BATCH_NUM; // B
-    uint indices[] = { out_fourth_dim, 0, 0, out_first_dim, out_second_dim }; // BFZYX
+    indices[0] = out_fourth_dim; indices[3] = out_first_dim; indices[4] = out_second_dim; // BFZYX
     #else
     const uint out_first_dim = output_idx / (INPUT0_SIZE_Z * INPUT0_SIZE_Y * INPUT0_SIZE_X); // B
     const uint out_second_dim = output_idx / (INPUT0_SIZE_Y * INPUT0_SIZE_X) % INPUT0_SIZE_Z; // Z
     const uint out_third_dim = output_idx / INPUT0_SIZE_X % INPUT0_SIZE_Y;  // Y
     const uint out_fourth_dim = output_idx % INPUT0_SIZE_X;  // X
-    uint indices[] = { out_first_dim, 0, out_second_dim, out_third_dim, out_fourth_dim };
+    indices[0] = out_first_dim; indices[2] = out_second_dim; indices[3] = out_third_dim; indices[4] = out_fourth_dim;
     #endif
 #endif
 #ifdef Z_AXIS
@@ -106,20 +80,20 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
     const uint out_second_dim = output_idx / (INPUT0_SIZE_Y * INPUT0_SIZE_X) % INPUT0_FEATURE_NUM; // F
     const uint out_third_dim = output_idx / INPUT0_SIZE_X % INPUT0_SIZE_Y; // Y
     const uint out_fourth_dim = output_idx % INPUT0_SIZE_X; // X
-    uint indices[] = { out_first_dim, out_second_dim, 0, out_third_dim, out_fourth_dim };
+    indices[0] = out_first_dim; indices[1] = out_second_dim; indices[3] = out_third_dim; indices[4] = out_fourth_dim;
 #endif
 #ifdef Y_AXIS
     #ifdef OUTPUT_LAYOUT_YXFB
     const uint out_first_dim = output_idx / (INPUT0_FEATURE_NUM * INPUT0_BATCH_NUM); // X
     const uint out_second_dim = output_idx / INPUT0_BATCH_NUM % INPUT0_FEATURE_NUM; // F
     const uint out_fourth_dim = output_idx % INPUT0_BATCH_NUM; // B
-    uint indices[] = { out_fourth_dim, out_second_dim, 0, 0, out_first_dim }; // BFZYX
+    indices[0] = out_fourth_dim; indices[1] = out_second_dim; indices[4] = out_first_dim; // BFZYX
     #else
     const uint out_first_dim = output_idx / (INPUT0_FEATURE_NUM * INPUT0_SIZE_Z * INPUT0_SIZE_X); // B
     const uint out_second_dim = output_idx / (INPUT0_SIZE_Z * INPUT0_SIZE_X) % INPUT0_FEATURE_NUM; // F
     const uint out_third_dim = output_idx / INPUT0_SIZE_X % INPUT0_SIZE_Z; // Z
     const uint out_fourth_dim = output_idx % INPUT0_SIZE_X; // X
-    uint indices[] = { out_first_dim, out_second_dim, out_third_dim, 0, out_fourth_dim };
+    indices[0] = out_first_dim; indices[1] = out_second_dim; indices[2] = out_third_dim; indices[4] = out_fourth_dim;
     #endif
 #endif
 #ifdef X_AXIS
@@ -127,19 +101,63 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
     const uint out_first_dim = output_idx / (INPUT0_FEATURE_NUM * INPUT0_BATCH_NUM); // Y
     const uint out_second_dim = output_idx / INPUT0_BATCH_NUM % INPUT0_FEATURE_NUM; // F
     const uint out_fourth_dim = output_idx % INPUT0_BATCH_NUM; // B
-    uint indices[] = { out_fourth_dim, out_second_dim, 0, out_first_dim, 0 }; // BFZYX
+    indices[0] = out_fourth_dim; indices[1] = out_second_dim; indices[3] = out_first_dim; // BFZYX
     #else
     const uint out_first_dim = output_idx / (INPUT0_FEATURE_NUM * INPUT0_SIZE_Z * INPUT0_SIZE_Y); // B
     const uint out_second_dim = output_idx / (INPUT0_SIZE_Z * INPUT0_SIZE_Y) % INPUT0_FEATURE_NUM; // F
     const uint out_third_dim = output_idx / INPUT0_SIZE_Y % INPUT0_SIZE_Z; // Z
     const uint out_fourth_dim = output_idx % INPUT0_SIZE_Y; // Y
-    uint indices[] = { out_first_dim, out_second_dim, out_third_dim, out_fourth_dim, 0 };
+    indices[0] = out_first_dim; indices[1] = out_second_dim; indices[2] = out_third_dim; indices[3] = out_fourth_dim;
     #endif
 #endif
+}
 
-#else // OPERATION_NUM > 1
+KERNEL(arg_max_min_modified)(
+    OPTIONAL_SHAPE_INFO_ARG
+    const __global INPUT0_TYPE* input
+    ,__global OUTPUT_TYPE* output
+#ifdef SECOND_OUTPUT_EXIST
+#ifdef MULTIPLE_OUTPUTS
+    ,__global OUTPUT1_TYPE* second_output
+#else
+    ,__global INPUT1_TYPE* second_output
+#endif
+#endif
+#ifdef IS_DYNAMIC
+    ,__global INPUT0_TYPE* tmp_buffer0
+    ,__global INPUT0_TYPE* tmp_buffer1
+    ,__global INPUT0_TYPE* tmp_buffer2
+#endif
+)
+{
+#include "include/arg_max_min_common.cl"
+    const uint output_idx = (uint)get_global_id(0);
+#if SORT_BY_VALUE
+    const uint sort_idx = (uint)get_global_id(1);
+#elif TOP_K == 1
+    iav_type result[TOP_K];
+#else
+#ifdef IS_DYNAMIC
+    const uint iav_type_size = INPUT0_TYPE_SIZE + 4;
+    const uint buffer_size = iav_type_size * VALUES_NUM;
+    const uint buffer_offset = buffer_size * OPERATION_NUM;
+    __global iav_type *result = OFFSET_GLOBAL_PTR(iav_type, tmp_buffer0, output_idx * buffer_size);
+    __global iav_type *temp_buf = OFFSET_GLOBAL_PTR(iav_type, tmp_buffer0, buffer_offset + output_idx * buffer_size);
+#else
+    iav_type result[VALUES_NUM], temp_buf[VALUES_NUM];
+#endif
+    const uint group_size = TOP_K >= 8 ? TOP_K : 8;
+    const uint group_num = ((VALUES_NUM - 1) / group_size) + 1;
+    const uint last_group_size = (VALUES_NUM % group_size > 0) ? (VALUES_NUM % group_size) : group_size;
+    const uint last_group_offset = (group_num - 1) * group_size;
+#endif // SORT_BY_VALUE
     uint indices[] = { 0, 0, 0, 0, 0 };
-#endif // OPERATION_NUM > 1
+
+    if (OPERATION_NUM > 1) {
+        if (output_idx >= OPERATION_NUM)
+            return;
+        FUNC_CALL(get_indices_from_dims)(OPTIONAL_SHAPE_INFO_TENSOR output_idx, indices);
+    }
 
 // Using parallel sorting for sorting by values
 #if SORT_BY_VALUE
@@ -147,41 +165,41 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
     indices[AXIS] = sort_idx;
 
     iav_type result;
-    result.value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+    result.value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
     result.index = sort_idx;
 
     for (uint i = 0; i < sort_idx / 8; i++) {
         uint index_offset = i * 8;
         indices[AXIS] = index_offset;
-        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 1;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 2;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 3;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 4;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 5;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 6;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         indices[AXIS] = index_offset + 7;
-        test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
         if (sort_position >= TOP_K)
@@ -190,7 +208,7 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
 
     for (uint i = (sort_idx / 8) * 8; i < sort_idx; i++) {
         indices[AXIS] = i;
-        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_1 test_value)
             sort_position++;
     }
@@ -200,7 +218,7 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
 
     for (uint i = sort_idx + 1; i < VALUES_NUM; i++) {
         indices[AXIS] = i;
-        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        INPUT0_TYPE test_value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
         if (result.value COMPARE_PARALLEL_SIGN_2 test_value)
             sort_position++;
         if (sort_position >= TOP_K)
@@ -209,7 +227,7 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
 
 // Using simple sorting for sorting by indices and when TOP_K == 1
 #elif TOP_K == 1
-    INPUT0_TYPE val = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+    INPUT0_TYPE val = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
     result[0].index = 0;
     result[0].value = val;
     bool already_exist = false;
@@ -228,7 +246,7 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
             }
 
             indices[AXIS] = i;
-            INPUT0_TYPE in_data = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            INPUT0_TYPE in_data = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
             if (val COMPARE_SIGN in_data) {
                 result[top_k].index = i;
                 result[top_k].value = in_data;
@@ -237,197 +255,206 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
         }
         val = INPUT0_FILL_VAL;
     }
+#else // SORT_BY_VALUE
+    // Using merge sorting for sorting by indices and when (TOP_K >= (VALUES_NUM / 2)) or (VALUES_NUM < MINIMUM_NUMBER_FOR_PARTIAL_SORTING)
+    bool use_merge_sorting = (TOP_K >= (VALUES_NUM / 2)) || (VALUES_NUM < MINIMUM_NUMBER_FOR_PARTIAL_SORTING);
+    if (use_merge_sorting) {
+        for (uint i = 0; i < VALUES_NUM / 8; i++) {
+            uint index_offset = i * 8;
+            indices[AXIS] = result[index_offset].index = index_offset;
+            result[index_offset].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 1].index = index_offset + 1;
+            result[index_offset + 1].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 2].index = index_offset + 2;
+            result[index_offset + 2].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 3].index = index_offset + 3;
+            result[index_offset + 3].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 4].index = index_offset  + 4;
+            result[index_offset + 4].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 5].index = index_offset + 5;
+            result[index_offset + 5].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 6].index = index_offset + 6;
+            result[index_offset + 6].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = result[index_offset + 7].index = index_offset + 7;
+            result[index_offset + 7].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        }
 
-// Using merge sorting for sorting by indices and when (TOP_K >= (VALUES_NUM / 2)) or (VALUES_NUM < MINIMUM_NUMBER_FOR_PARTIAL_SORTING)
-#elif ((TOP_K >= (VALUES_NUM / 2)) || (VALUES_NUM < MINIMUM_NUMBER_FOR_PARTIAL_SORTING))
-    for (uint i = 0; i < VALUES_NUM / 8; i++) {
-        uint index_offset = i * 8;
-        indices[AXIS] = result[index_offset].index = index_offset;
-        result[index_offset].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 1].index = index_offset + 1;
-        result[index_offset + 1].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 2].index = index_offset + 2;
-        result[index_offset + 2].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 3].index = index_offset + 3;
-        result[index_offset + 3].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 4].index = index_offset  + 4;
-        result[index_offset + 4].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 5].index = index_offset + 5;
-        result[index_offset + 5].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 6].index = index_offset + 6;
-        result[index_offset + 6].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = result[index_offset + 7].index = index_offset + 7;
-        result[index_offset + 7].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-    }
-
-    for (uint i = (VALUES_NUM / 8) * 8; i < VALUES_NUM; i++) {
-        indices[AXIS] = result[i].index = i;
-        result[i].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-    }
+        for (uint i = (VALUES_NUM / 8) * 8; i < VALUES_NUM; i++) {
+            indices[AXIS] = result[i].index = i;
+            result[i].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        }
 
-    for (uint k = 1; k < VALUES_NUM; k *= 2) {
-        for (uint left = 0; left + k < VALUES_NUM; left += k * 2) {
-            uint i, j, m;
-            uint right = left + k;
-            uint right_end = right + k;
-            if (right_end > VALUES_NUM) right_end = VALUES_NUM;
-            m = i = left; j = right;
-            while ((i < right) && (j < right_end)) {
-                if (result[i].value COMPARE_PARTIAL_SIGN result[j].value) {
+        for (uint k = 1; k < VALUES_NUM; k *= 2) {
+            for (uint left = 0; left + k < VALUES_NUM; left += k * 2) {
+                uint i, j, m;
+                uint right = left + k;
+                uint right_end = right + k;
+                if (right_end > VALUES_NUM) right_end = VALUES_NUM;
+                m = i = left; j = right;
+                while ((i < right) && (j < right_end)) {
+                    if (result[i].value COMPARE_PARTIAL_SIGN result[j].value) {
+                        temp_buf[m++] = result[i++];
+                    } else {
+                        temp_buf[m++] = result[j++];
+                    }
+                }
+                while (i < right)
                     temp_buf[m++] = result[i++];
-                } else {
+                while (j < right_end)
                     temp_buf[m++] = result[j++];
-                }
+                for (m = left; m < right_end; m++)
+                    result[m] = temp_buf[m];
             }
-            while (i < right)
-                temp_buf[m++] = result[i++];
-            while (j < right_end)
-                temp_buf[m++] = result[j++];
-            for (m = left; m < right_end; m++)
-                result[m] = temp_buf[m];
         }
-    }
+    } else {
+        // In other cases for sorting by indices using mixed partial/merge sorting
+        for (uint i = 0; i < VALUES_NUM / 8; i++) {
+            uint index_offset = i * 8;
+            indices[AXIS] = temp_buf[index_offset].index = index_offset;
+            temp_buf[index_offset].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 1].index = index_offset + 1;
+            temp_buf[index_offset + 1].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 2].index = index_offset + 2;
+            temp_buf[index_offset + 2].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 3].index = index_offset + 3;
+            temp_buf[index_offset + 3].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 4].index = index_offset  + 4;
+            temp_buf[index_offset + 4].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 5].index = index_offset + 5;
+            temp_buf[index_offset + 5].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 6].index = index_offset + 6;
+            temp_buf[index_offset + 6].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+            indices[AXIS] = temp_buf[index_offset + 7].index = index_offset + 7;
+            temp_buf[index_offset + 7].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        }
 
-// In other cases for sorting by indices using mixed partial/merge sorting
-#else // SORT_BY_VALUE
-    for (uint i = 0; i < VALUES_NUM / 8; i++) {
-        uint index_offset = i * 8;
-        indices[AXIS] = temp_buf[index_offset].index = index_offset;
-        temp_buf[index_offset].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 1].index = index_offset + 1;
-        temp_buf[index_offset + 1].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 2].index = index_offset + 2;
-        temp_buf[index_offset + 2].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 3].index = index_offset + 3;
-        temp_buf[index_offset + 3].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 4].index = index_offset  + 4;
-        temp_buf[index_offset + 4].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 5].index = index_offset + 5;
-        temp_buf[index_offset + 5].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 6].index = index_offset + 6;
-        temp_buf[index_offset + 6].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-        indices[AXIS] = temp_buf[index_offset + 7].index = index_offset + 7;
-        temp_buf[index_offset + 7].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-    }
+        for (uint i = (VALUES_NUM / 8) * 8; i < VALUES_NUM; i++) {
+            indices[AXIS] = temp_buf[i].index = i;
+            temp_buf[i].value = input[FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
+        }
 
-    for (uint i = (VALUES_NUM / 8) * 8; i < VALUES_NUM; i++) {
-        indices[AXIS] = temp_buf[i].index = i;
-        temp_buf[i].value = input[FUNC_CALL(get_input_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])];
-    }
+        for (uint group = 0; group < group_num - 1; group++) {
+            uint group_offset = group * group_size;
+            for (uint k = 1; k < group_size; k *= 2) {
+                for (uint left = 0; left + k < group_size; left += k * 2) {
+                    uint i, j, m;
+                    uint right = left + k;
+                    uint right_end = right + k;
+                    if (right_end > group_size) right_end = group_size;
+                    m = i = left; j = right;
+                    while ((i < right) && (j < right_end)) {
+                        if (temp_buf[group_offset + i].value COMPARE_PARTIAL_SIGN temp_buf[group_offset + j].value) {
+                            result[group_offset + (m++)] = temp_buf[group_offset + (i++)];
+                        } else {
+                            result[group_offset + (m++)] = temp_buf[group_offset + (j++)];
+                        }
+                    }
+                    while (i < right)
+                        result[group_offset + (m++)] = temp_buf[group_offset + (i++)];
+                    while (j < right_end)
+                        result[group_offset + (m++)] = temp_buf[group_offset + (j++)];
+                    for (m = left; m < right_end; m++)
+                        temp_buf[group_offset + m] = result[group_offset + m];
+                }
+            }
+        }
 
-    for (uint group = 0; group < group_num - 1; group++) {
-        uint group_offset = group * group_size;
-        for (uint k = 1; k < group_size; k *= 2) {
-            for (uint left = 0; left + k < group_size; left += k * 2) {
+        for (uint k = 1; k < last_group_size; k *= 2) {
+            for (uint left = 0; left + k < last_group_size; left += k * 2) {
                 uint i, j, m;
                 uint right = left + k;
                 uint right_end = right + k;
-                if (right_end > group_size) right_end = group_size;
+                if (right_end > last_group_size) right_end = last_group_size;
                 m = i = left; j = right;
                 while ((i < right) && (j < right_end)) {
-                    if (temp_buf[group_offset + i].value COMPARE_PARTIAL_SIGN temp_buf[group_offset + j].value) {
-                        result[group_offset + (m++)] = temp_buf[group_offset + (i++)];
+                    if (temp_buf[last_group_offset + i].value COMPARE_PARTIAL_SIGN temp_buf[last_group_offset + j].value) {
+                        result[last_group_offset + (m++)] = temp_buf[last_group_offset + (i++)];
                     } else {
-                        result[group_offset + (m++)] = temp_buf[group_offset + (j++)];
+                        result[last_group_offset + (m++)] = temp_buf[last_group_offset + (j++)];
                     }
                 }
                 while (i < right)
-                    result[group_offset + (m++)] = temp_buf[group_offset + (i++)];
+                    result[last_group_offset + (m++)] = temp_buf[last_group_offset + (i++)];
                 while (j < right_end)
-                    result[group_offset + (m++)] = temp_buf[group_offset + (j++)];
+                    result[last_group_offset + (m++)] = temp_buf[last_group_offset + (j++)];
                 for (m = left; m < right_end; m++)
-                    temp_buf[group_offset + m] = result[group_offset + m];
+                    temp_buf[last_group_offset + m] = result[last_group_offset + m];
             }
         }
-    }
 
-    for (uint k = 1; k < last_group_size; k *= 2) {
-        for (uint left = 0; left + k < last_group_size; left += k * 2) {
-            uint i, j, m;
-            uint right = left + k;
-            uint right_end = right + k;
-            if (right_end > last_group_size) right_end = last_group_size;
-            m = i = left; j = right;
-            while ((i < right) && (j < right_end)) {
-                if (temp_buf[last_group_offset + i].value COMPARE_PARTIAL_SIGN temp_buf[last_group_offset + j].value) {
-                    result[last_group_offset + (m++)] = temp_buf[last_group_offset + (i++)];
-                } else {
-                    result[last_group_offset + (m++)] = temp_buf[last_group_offset + (j++)];
-                }
-            }
-            while (i < right)
-                result[last_group_offset + (m++)] = temp_buf[last_group_offset + (i++)];
-            while (j < right_end)
-                result[last_group_offset + (m++)] = temp_buf[last_group_offset + (j++)];
-            for (m = left; m < right_end; m++)
-                temp_buf[last_group_offset + m] = result[last_group_offset + m];
-        }
-    }
-
-    uint merge_counter[group_num];
-    uint max_merge_counter[group_num];
-    iav_type merge_buf;
-    bool subgroup_done[group_num];
+    #ifdef IS_DYNAMIC
+        const uint counter_size = group_num * 4;
+        const uint counter_offset = counter_size * OPERATION_NUM;
+        __global uint* merge_counter = OFFSET_GLOBAL_PTR(uint, tmp_buffer1, output_idx * counter_size);
+        __global uint* max_merge_counter = OFFSET_GLOBAL_PTR(uint, tmp_buffer1, counter_offset + output_idx * counter_size);
+        __global bool* subgroup_done = OFFSET_GLOBAL_PTR(bool, tmp_buffer2, output_idx);
+    #else
+        uint merge_counter[group_num];
+        uint max_merge_counter[group_num];
+        bool subgroup_done[group_num];
+    #endif
+        iav_type merge_buf;
 
-    unroll_for (uint i = 0; i < group_num - 1; i++) {
-        merge_counter[i] = 0;
-        max_merge_counter[i] = group_size;
-        subgroup_done[i] = false;
-    }
+        unroll_for (uint i = 0; i < group_num - 1; i++) {
+            merge_counter[i] = 0;
+            max_merge_counter[i] = group_size;
+            subgroup_done[i] = false;
+        }
 
-    merge_counter[group_num - 1] = 0;
-    max_merge_counter[group_num - 1] = last_group_size;
-    subgroup_done[group_num - 1] = false;
+        merge_counter[group_num - 1] = 0;
+        max_merge_counter[group_num - 1] = last_group_size;
+        subgroup_done[group_num - 1] = false;
 
-    for (uint i = 0; i < TOP_K; i++) {
-        bool merge_buf_done = false;
-        uint merge_buf_index = 0;
-        for (uint j = 0; j < group_num; j++) {
-            if (subgroup_done[j])
-                continue;
+        for (uint i = 0; i < TOP_K; i++) {
+            bool merge_buf_done = false;
+            uint merge_buf_index = 0;
+            for (uint j = 0; j < group_num; j++) {
+                if (subgroup_done[j])
+                    continue;
 
-            uint test_index = j * group_size + merge_counter[j];
+                uint test_index = j * group_size + merge_counter[j];
 
-            if (!merge_buf_done) {
-                merge_buf = temp_buf[test_index];
-                merge_buf_done = true;
-                merge_buf_index = j;
-                continue;
-            }
+                if (!merge_buf_done) {
+                    merge_buf = temp_buf[test_index];
+                    merge_buf_done = true;
+                    merge_buf_index = j;
+                    continue;
+                }
 
-            if (temp_buf[test_index].value COMPARE_MERGE_SIGN merge_buf.value) {
-                merge_buf = temp_buf[test_index];
-                merge_buf_index = j;
+                if (temp_buf[test_index].value COMPARE_MERGE_SIGN merge_buf.value) {
+                    merge_buf = temp_buf[test_index];
+                    merge_buf_index = j;
+                }
             }
-        }
 
-        merge_counter[merge_buf_index]++;
-        if (merge_counter[merge_buf_index] == max_merge_counter[merge_buf_index])
-            subgroup_done[merge_buf_index] = true;
+            merge_counter[merge_buf_index]++;
+            if (merge_counter[merge_buf_index] == max_merge_counter[merge_buf_index])
+                subgroup_done[merge_buf_index] = true;
 
-        result[i] = merge_buf;
+            result[i] = merge_buf;
+        }
     }
 #endif // SORT_BY_VALUE
 
 #if SORT_BY_VALUE
     indices[AXIS] = sort_position;
 #ifdef TOP_K_ORDER
-    output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result.value);
+    output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result.value);
 #else
-    output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result.index);
+    output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result.index);
 #endif
 #ifdef SECOND_OUTPUT_EXIST
 #ifdef MULTIPLE_OUTPUTS
     #ifdef TOP_K_ORDER
-    second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result.index);
+    second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result.index);
     #else
-    second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result.value);
+    second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result.value);
     #endif
 #else
     #ifdef TOP_K_ORDER
-    second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result.index);
+    second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result.index);
     #else
-    second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result.value);
+    second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result.value);
     #endif
 #endif
 #endif
@@ -445,22 +472,22 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
 
         indices[AXIS] = out_position;
 #ifdef TOP_K_ORDER
-        output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].value);
+        output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].value);
 #else
-        output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].index);
+        output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].index);
 #endif
 #ifdef SECOND_OUTPUT_EXIST
 #ifdef MULTIPLE_OUTPUTS
     #ifdef TOP_K_ORDER
-        second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result[top_k].index);
+        second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result[top_k].index);
     #else
-        second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result[top_k].value);
+        second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_OUTPUT1_TYPE(result[top_k].value);
     #endif
 #else
     #ifdef TOP_K_ORDER
-        second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].index);
+        second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].index);
     #else
-        second_output[FUNC_CALL(get_output_index)(indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].value);
+        second_output[FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR indices[0], indices[1], 0, indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].value);
     #endif
 #endif
 #endif
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_gpu_ref.cl
index 8732adf90e45dc..f5703ccd36c778 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_gpu_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/arg_max_min_gpu_ref.cl
@@ -16,7 +16,10 @@
 #endif
 
 __attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
-KERNEL(arg_max_gpu_top_k)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+KERNEL(arg_max_gpu_top_k)(
+    OPTIONAL_SHAPE_INFO_ARG
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output)
 {
 #include "include/arg_max_min_common.cl"
     uint results[TOP_K];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
index d9b2bbe392f45a..2e4ffc212b87c9 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -20,6 +20,25 @@ size_t getOperationNumber(const arg_max_min_params& params) {
     }
 }
 
+std::string getOperationNumberString(const arg_max_min_params& params) {
+    const auto& output = params.outputs[0];
+    auto x = toCodeString(output.X(), 11);
+    auto y = toCodeString(output.Y(), 10);
+    auto z = toCodeString(output.Z(), 9);
+    auto w = toCodeString(output.W(), 8);
+    auto f = toCodeString(output.Feature(), 7);
+    auto b = toCodeString(output.Batch(), 6);
+    switch (params.argMaxMinAxis) {
+        case ArgMaxMinAxis::BATCH: return toVectorMulString({x, y, z, f});
+        case ArgMaxMinAxis::FEATURE: return toVectorMulString({x, y, z, b});
+        case ArgMaxMinAxis::Z: return toVectorMulString({y, z, f, b});
+        case ArgMaxMinAxis::Y: return toVectorMulString({x, z, f, b});
+        case ArgMaxMinAxis::X: return toVectorMulString({y, z, f, b});
+        default:
+            throw std::invalid_argument("Unsupported axis");
+    }
+}
+
 size_t getSortSize(const arg_max_min_params& params) {
     switch (params.argMaxMinAxis) {
         case ArgMaxMinAxis::BATCH: return params.inputs[0].Batch().v;
@@ -65,6 +84,7 @@ ParamsKey ArgMaxMinKernelAxis::GetSupportedKey() const {
     k.EnableBatching();
     k.EnableTensorPitches();
     k.EnableTensorOffset();
+    k.EnableDynamicShapesSupport();
     return k;
 }
 
@@ -83,38 +103,83 @@ bool ArgMaxMinKernelAxis::Validate(const Params& p, const optional_params& o) co
     return true;
 }
 
+ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelAxis::SetDefault(const arg_max_min_params& params) const {
+    DispatchData dispatchData;
+
+    if (!params.has_dynamic_tensors()) {
+        size_t ops_size = getOperationNumber(params);
+        ops_size = ops_size > 1 ? Align(ops_size, 32) : 1;
+        size_t sort_size = params.argMaxMinSortType == ArgMaxMinSortType::VALUE ? getSortSize(params) : 1;
+
+        dispatchData.gws = { ops_size, sort_size, 1 };
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+    }
+
+    return dispatchData;
+}
+
 KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const optional_params& options) const {
     if (!Validate(params, options)) {
         return {};
     }
     const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
+    bool is_dynamic = orgParams.has_dynamic_tensors();
 
-    size_t ops_size = getOperationNumber(orgParams);
-    ops_size = ops_size > 1 ? Align(ops_size, 32) : 1;
-    size_t sort_size = orgParams.argMaxMinSortType == ArgMaxMinSortType::VALUE ? getSortSize(orgParams) : 1;
-
-    DispatchData dispatchData;
-
-    dispatchData.gws = { ops_size, sort_size, 1 };
-    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
-
+    auto dispatchData = SetDefault(orgParams);
     KernelData kd = KernelData::Default<arg_max_min_params>(params);
+    kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
+        const auto& prim_params = static_cast<const arg_max_min_params&>(params);
+        auto dispatchData = SetDefault(prim_params);
+        OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
+        kd.kernels[0].params.workGroups.global = dispatchData.gws;
+        kd.kernels[0].params.workGroups.local = dispatchData.lws;
+
+        const size_t elem_size = prim_params.inputs[0].ElementSize();
+        const size_t iav_type_size = elem_size + 4;
+        const size_t sort_size = getSortSize(prim_params);
+        const size_t ops_size = getOperationNumber(prim_params);
+        const size_t group_size = prim_params.topK >= 8 ? prim_params.topK : 8;
+        const size_t group_num = ((sort_size - 1) / group_size) + 1;
+
+        kd.internalBufferSizes.clear();
+        kd.internalBufferSizes.push_back(iav_type_size * sort_size * ops_size * 2);
+        kd.internalBufferSizes.push_back(4 * group_num * ops_size * 2);
+        kd.internalBufferSizes.push_back(ops_size * elem_size);
+        kd.internalBufferDataType = prim_params.inputs[0].GetDType();
+    };
 
     auto cldnn_jit = GetJitConstants(orgParams);
     auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
     auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
     auto& kernel = kd.kernels[0];
-    if (!orgParams.use_multiple_outputs) {
-        FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
-    } else {
-        FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
-                         "", false, false, 1, GetFusedPrimitiveInputsCount(params), 2);
-    }
+    FillCLKernelData(kernel,
+                     dispatchData,
+                     params.engineInfo,
+                     kernelName,
+                     jit,
+                     entry_point,
+                     EXE_MODE_DEFAULT,
+                     false,
+                     false,
+                     1,
+                     GetFusedPrimitiveInputsCount(params),
+                     orgParams.use_multiple_outputs ? 2 : 1,
+                     is_dynamic);
 
     if (orgParams.has_second_output && !orgParams.use_multiple_outputs)
         kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
 
+    if (is_dynamic) {
+        kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+        kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
+        kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2});
+        kd.internalBufferSizes.push_back(orgParams.inputs[0].PhysicalSizeInBytes());
+        kd.internalBufferSizes.push_back(orgParams.inputs[0].PhysicalSizeInBytes());
+        kd.internalBufferSizes.push_back(orgParams.inputs[0].PhysicalSizeInBytes());
+        kd.internalBufferDataType = orgParams.inputs[0].GetDType();
+    }
+
     return {kd};
 }
 
@@ -125,7 +190,13 @@ KernelsPriority ArgMaxMinKernelAxis::GetKernelsPriority(const Params& /*params*/
 JitConstants ArgMaxMinKernelAxis::GetJitConstants(const arg_max_min_params& params) const {
     auto jit = ArgMaxMinKernelBase::GetJitConstants(params);
 
-    jit.AddConstant(MakeJitConstant("OPERATION_NUM", getOperationNumber(params)));
+    if (params.has_dynamic_tensors()) {
+        const std::string operation_num = getOperationNumberString(params);
+        jit.AddConstant(MakeJitConstant("OPERATION_NUM", operation_num));
+    } else {
+        const size_t operation_num = getOperationNumber(params);
+        jit.AddConstant(MakeJitConstant("OPERATION_NUM", operation_num));
+    }
     if (params.argMaxMinSortType == ArgMaxMinSortType::VALUE)
         jit.AddConstant(MakeJitConstant("SORT_BY_VALUE", 1));
     else
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.h
index d0c717689d0282..dd1afa41d1bbcb 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.h
@@ -13,6 +13,7 @@ class ArgMaxMinKernelAxis : public ArgMaxMinKernelBase {
     virtual ~ArgMaxMinKernelAxis() {}
 
     JitConstants GetJitConstants(const arg_max_min_params& params) const override;
+    DispatchData SetDefault(const arg_max_min_params& params) const override;
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
     KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
     ParamsKey GetSupportedKey() const override;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
index 90b9fa9a5e8cba..59af1a8712cd5e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
@@ -27,8 +27,10 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para
 ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_min_params& params) const {
     DispatchData dispatchData;
 
-    dispatchData.gws = { 128, params.inputs[0].Batch().v, 1 };
-    dispatchData.lws = { 128, 1, 1 };
+    if (!params.has_dynamic_inputs()) {
+        dispatchData.gws = { 128, params.inputs[0].Batch().v, 1 };
+        dispatchData.lws = { 128, 1, 1 };
+    }
 
     return dispatchData;
 }
@@ -43,13 +45,32 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
     DispatchData dispatchData = SetDefault(orgParams);
 
     KernelData kd = KernelData::Default<arg_max_min_params>(params);
+    kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
+        const auto& prim_params = static_cast<const arg_max_min_params&>(params);
+        auto dispatchData = SetDefault(prim_params);
+        OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
+        kd.kernels[0].params.workGroups.global = dispatchData.gws;
+        kd.kernels[0].params.workGroups.local = dispatchData.lws;
+    };
 
     auto cldnn_jit = GetJitConstants(orgParams);
     auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
     auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
     auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel,
+                     dispatchData,
+                     params.engineInfo,
+                     kernelName,
+                     jit,
+                     entry_point,
+                     EXE_MODE_DEFAULT,
+                     false,
+                     false,
+                     (uint32_t)orgParams.inputs.size(),
+                     GetFusedPrimitiveInputsCount(params),
+                     (uint32_t)orgParams.outputs.size(),
+                     orgParams.has_dynamic_tensors());
 
     return {kd};
 }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_gpu_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_gpu_ref.cpp
index af341efa818210..297475dce5e21f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_gpu_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_gpu_ref.cpp
@@ -18,6 +18,7 @@ ParamsKey ArgMaxMinKernelGPURef::GetSupportedKey() const {
     k.EnableDifferentTypes();
     k.EnableBatching();
     k.EnableTensorPitches();
+    k.EnableDynamicShapesSupport();
     return k;
 }
 
diff --git a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
index 752ab7270cbe92..1bf43241c15ecc 100644
--- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
@@ -9,6 +9,8 @@
 #include <intel_gpu/primitives/input_layout.hpp>
 #include <intel_gpu/primitives/mutable_data.hpp>
 
+#include <arg_max_min_inst.h>
+
 #include "test_utils.h"
 
 using namespace cldnn;
@@ -870,3 +872,52 @@ TEST(top_k_layer_tests, md_sync) {
 TEST(export_import_top_k_layer_tests, md_sync) {
     test_top_k_layer_md_sync<int>(true);
 }
+
+TEST(arg_max_min_gpu, dynamic) {
+    static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
+    auto& engine = get_test_engine();
+    const int top_k = 2;
+    auto input_layout_dynamic = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
+    auto input_layout_static = layout{ov::PartialShape{batch_num, feature_num, y_size, x_size}, data_types::f32, format::bfyx};
+    auto input = engine.allocate_memory(input_layout_static);
+
+    topology topology;
+    topology.add(input_layout("input", input_layout_dynamic));
+    topology.add(arg_max_min("arg_max", { input_info("input") }, ov::op::TopKMode::MIN, top_k, 0));
+
+    std::vector<float> input_vec = {// y0x0 y0x1 y1x0 y1x1
+                                    /*b0f0*/ 0.1f, -0.1f, 0.9f, 1.5f,
+                                    /*b0f1*/ 0.2f, 0.2f, -10.f, 5.2f,
+                                    /*b0f2*/ 0.2f, 0.2f, -10.f, 5.2f,
+                                    /*b0f3*/ 0.2f, 0.2f, -10.f, 4.2f,
+
+                                    /*b1f0*/ 3.f,  0.5f, 7.f, 10.f,
+                                    /*b1f1*/ 4.f,  0.5f, 8.f, 8.2f,
+                                    /*b1f2*/ 0.2f, 0.2f, -10.f, 5.2f,
+                                    /*b1f3*/ 4.f,  0.5f, 8.f, 8.2f};
+
+    set_values(input, input_vec);
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    network network(engine, topology, config);
+    network.set_input_data("input", input);
+
+    auto inst = network.get_primitive("arg_max");
+    auto impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_TRUE(impl->is_dynamic());
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "arg_max");
+
+    const int out_size = y_size * feature_num * x_size * top_k;
+    auto output = outputs.at("arg_max").get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    ASSERT_EQ(output_ptr.size(), out_size);
+    for (uint32_t i = 0; i < out_size; i++) {
+        ASSERT_FLOAT_EQ(output_ptr[i], i < (out_size / 2) ? 0 : 1);
+    }
+}

From 43ef89e62517c4029322b6955e433f6ef332e6ec Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Sun, 26 Mar 2023 19:38:15 +0200
Subject: [PATCH 096/296] [MO] remove deprecated: data_type,
 disable_nhwc_to_nchw, tensorflow_use_custom_operations_config (#16394)

* removed deprecated MO options: data_type, disable_nhwc_to_nchw, tensorflow_use_custom_operations_config

* fix layer_test_class.py

* data_type -> precision in layer_test_class.py

* typo fix

* corrected layer tests for compress_to_fp16 argument
---
 tests/layer_tests/common/layer_test_class.py  |  5 +++--
 .../layer_tests/common/utils/common_utils.py  |  4 ++++
 .../openvino/tools/mo/analysis/tf_od_api.py   |  2 +-
 .../tools/mo/analysis/tf_retinanet.py         |  4 ++--
 .../mo/openvino/tools/mo/analysis/tf_yolo.py  |  6 ++---
 .../tools/mo/back/offline_transformations.py  |  2 +-
 tools/mo/openvino/tools/mo/convert_impl.py    | 15 +------------
 .../tools/mo/front/tf/ObjectDetectionAPI.py   |  2 +-
 tools/mo/openvino/tools/mo/load/tf/loader.py  |  4 ++--
 tools/mo/openvino/tools/mo/main.py            |  3 +--
 .../tools/mo/moc_frontend/serialize.py        |  2 +-
 .../mo/openvino/tools/mo/utils/cli_parser.py  | 22 +------------------
 12 files changed, 21 insertions(+), 50 deletions(-)

diff --git a/tests/layer_tests/common/layer_test_class.py b/tests/layer_tests/common/layer_test_class.py
index a43c2e742298d3..99ae5a8b61fc9a 100644
--- a/tests/layer_tests/common/layer_test_class.py
+++ b/tests/layer_tests/common/layer_test_class.py
@@ -40,10 +40,11 @@ def _test(self, framework_model, ref_net, ie_device, precision, ir_version, temp
         os.environ['MO_ENABLED_TRANSFORMS'] = enabled_transforms
         os.environ['MO_DISABLED_TRANSFORMS'] = disabled_transforms
 
+        compress_to_fp16 = False if precision == 'FP32' else True
         mo_params = {self.input_model_key: model_path,
                      "output_dir": temp_dir,
-                     "data_type": precision, "model_name": 'model'
-                     }
+                     "compress_to_fp16": compress_to_fp16,
+                     "model_name": 'model'}
 
         if 'input_shapes' in kwargs and len(kwargs['input_shapes']):
             input_shapes_str = []
diff --git a/tests/layer_tests/common/utils/common_utils.py b/tests/layer_tests/common/utils/common_utils.py
index 419acdd262a06f..dda6286c504a12 100644
--- a/tests/layer_tests/common/utils/common_utils.py
+++ b/tests/layer_tests/common/utils/common_utils.py
@@ -26,6 +26,10 @@ def generate_ir(coverage=False, **kwargs):
             params.extend(("-b", str(value)))
         elif key == "k":
             params.extend(("-k", str(value)))
+        # for FP32 set explicitly compress_to_fp16=False,
+        # if we omit this argument for FP32, it will be set implicitly to True as the default
+        elif key == 'compress_to_fp16':
+            params.append("--{}={}".format(key, value))
         elif isinstance(value, bool) and value:
             params.append("--{}".format(key))
         elif isinstance(value, bool) and not value:
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
index bf70a78f48f886..7f75ca6872dcd7 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
@@ -70,7 +70,7 @@ def analyze(self, graph: Graph):
                 if all([graph_contains_scope(graph, scope) for scope in scopes]):
                     result = dict()
                     result['flavor'] = flavor
-                    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
+                    result['mandatory_parameters'] = {'transformations_config':
                                                           files_by_pattern(get_mo_root_dir() + '/openvino/tools/mo/front/tf',
                                                                            __class__.file_patterns[flavor],
                                                                            add_prefix=True),
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
index 8ee620322b7707..9c36e08a1a805a 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
@@ -52,14 +52,14 @@ def analyze(self, graph: Graph):
 
         if pattern_instance_counter.counter > 0:
             result = dict()
-            result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
+            result['mandatory_parameters'] = {'transformations_config':
                                                   'openvino/tools/mo/front/tf/retinanet.json'}
 
             message = "Your model looks like TensorFlow RetinaNet Model.\n" \
                       "To generate the IR, provide model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model>.pb\n" \
                       "\t--input_shape [1,600,600,3]\n" \
-                      "\t--tensorflow_use_custom_operations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
+                      "\t--transformations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
                       "\t--reverse_input_channels"
 
             return {'model_type': {'TF_RetinaNet': result}}, message
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
index 7379546eda1ecf..86f4822c932549 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
@@ -47,7 +47,7 @@ def pattern_instance_counter(graph: Graph, match: dict):
 def get_YOLO_params_by_flavor(flavor: str):
     result = dict()
     result['flavor'] = flavor
-    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config': YOLO_CONFIGS[flavor]}
+    result['mandatory_parameters'] = {'transformations_config': YOLO_CONFIGS[flavor]}
     return result
 
 
@@ -72,7 +72,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv1 or YOLOv2 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model_name>.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
+                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
                       "All detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
@@ -99,7 +99,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv3 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/yolo_v3.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
+                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
                       "Detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
diff --git a/tools/mo/openvino/tools/mo/back/offline_transformations.py b/tools/mo/openvino/tools/mo/back/offline_transformations.py
index 519bbcc726dd8a..9d270c32311304 100644
--- a/tools/mo/openvino/tools/mo/back/offline_transformations.py
+++ b/tools/mo/openvino/tools/mo/back/offline_transformations.py
@@ -65,7 +65,7 @@ def apply_offline_transformations(func: Model, argv: argparse.Namespace):
     apply_moc_legacy_transformations(func, params_with_custom_types)
     apply_user_transformations(func, parse_transform(argv.transform))
 
-    if "compress_fp16" in argv and argv.compress_fp16:
+    if "compress_to_fp16" in argv and argv.compress_to_fp16:
         compress_model(func)
 
     apply_fused_names_cleanup(func)
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index f773639ae07295..440af2341b5279 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -214,13 +214,6 @@ def raise_ie_not_found():
         log.error(e)
         raise_ie_not_found()
 
-    # Turn off compression only if it's disabled explicitly by --compress_to_fp16=False or --data_type=FP32.
-    # By default, in all other cases compression is enabled
-    if ('data_type' in argv and argv.data_type in ['FP32', 'float']) or \
-            ('compress_to_fp16' in argv and argv.compress_to_fp16 is False):
-        argv.compress_fp16 = False
-    else:
-        argv.compress_fp16 = True
     argv.data_type = 'FP32'  # if compression was enabled will be restored back to 'FP16' after apply_offline_transformations
 
     # This is just to check that transform key is valid and transformations are available
@@ -234,12 +227,6 @@ def raise_ie_not_found():
     if ret_code:
         raise Error('check_requirements exited with return code {}'.format(ret_code))
 
-    if hasattr(argv, 'tensorflow_use_custom_operations_config') and \
-            argv.tensorflow_use_custom_operations_config is not None:
-        # update command-line arguments even for new TensorFlow Frontend
-        # because it should fallback to the Legacy Frontend in this case
-        argv.transformations_config = argv.tensorflow_use_custom_operations_config
-
     if argv.scale and argv.scale_values:
         raise Error(
             'Both --scale and --scale_values are defined. Specify either scale factor or scale values per input ' +
@@ -509,7 +496,7 @@ def clear_tmp_ir_files():
         try:
             from openvino.tools.mo.back.offline_transformations import apply_offline_transformations
             func = apply_offline_transformations(func, argv)
-            if "compress_fp16" in argv and argv.compress_fp16:
+            if "compress_to_fp16" in argv and argv.compress_to_fp16:
                 # restore data_type cmd parameter
                 argv.data_type = 'FP16'
             return_code = 0
diff --git a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
index c769cbcea55f82..df316b81868cb9 100644
--- a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
+++ b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
@@ -98,7 +98,7 @@ def _value_or_raise(match: SubgraphMatch, pipeline_config: PipelineConfig, key:
         raise Error('The sub-graph replacer "[REPLACEMENT_ID]" was not able to find the value for key "{}" in the '
                     'pipeline configuration file specified with the --tensorflow_object_detection_api_pipeline_config '
                     'command line parameter. Update the sub-graph replacement configuration file specified with the '
-                    '--tensorflow_use_custom_operations_config command line parameter by adding key "{}" with required '
+                    '--transformations_config command line parameter by adding key "{}" with required '
                     'value to the "custom_attributes" dictionary of the "[REPLACEMENT_ID]" replacer.'.format(key, key))
     return value
 
diff --git a/tools/mo/openvino/tools/mo/load/tf/loader.py b/tools/mo/openvino/tools/mo/load/tf/loader.py
index 590d52a5762d65..425c509fa5ea8f 100644
--- a/tools/mo/openvino/tools/mo/load/tf/loader.py
+++ b/tools/mo/openvino/tools/mo/load/tf/loader.py
@@ -91,7 +91,7 @@ def load(self, graph: Graph):
         graph.__setattr__('name', argv.model_name)
         # 'layout' parameter change may cause an issue in EltwiseInputReshape replacer
         # and convert_nhwc_to_nchw(graph)
-        graph.graph['layout'] = 'NCHW' if argv.disable_nhwc_to_nchw else 'NHWC'
+        graph.graph['layout'] = 'NHWC'
         graph.graph['fw'] = 'tf'
 
         graph.graph['variables_values'] = variables_values
@@ -114,7 +114,7 @@ def load(self, graph: Graph):
 
         # try to detect layout from the nodes of the graph. If there are no convolution nodes in N(D)HWC layout then we
         # consider that the graph is in NCHW layout and no layout conversion should be performed
-        if not argv.disable_nhwc_to_nchw and not graph_or_sub_graph_has_nhwc_ops(graph):
+        if not graph_or_sub_graph_has_nhwc_ops(graph):
             if not argv.silent:
                 log.debug('disable_nhwc_to_nchw" was automatically enabled.')
             for_graph_and_each_sub_graph_recursively(graph, update_cmd_params_and_layout)
diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py
index 927122722ba4a9..08d02514130ba3 100644
--- a/tools/mo/openvino/tools/mo/main.py
+++ b/tools/mo/openvino/tools/mo/main.py
@@ -37,8 +37,7 @@ def main(cli_parser: argparse.ArgumentParser, framework=None):
     try:
         ngraph_function, argv = _convert(cli_parser, framework, {})
         is_tf, _, _, _, _ = deduce_legacy_frontend_by_namespace(argv)
-        if ('compress_to_fp16' in argv and argv.compress_to_fp16) \
-                or ('data_type' in argv and argv.data_type in ['half', 'FP16']):
+        if 'compress_to_fp16' in argv and argv.compress_to_fp16:
             print(get_compression_message())
 
         ov_update_message = get_ov_update_message()
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
index 34eb377a7a2648..90b87f750feb83 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
@@ -32,7 +32,7 @@ def moc_emit_ir(ngraph_function: Model, argv: argparse.Namespace):
 
     apply_user_transformations(ngraph_function, parse_transform(argv.transform))
 
-    if argv.compress_fp16:
+    if argv.compress_to_fp16:
         from openvino.tools.mo.back.offline_transformations import compress_model
         compress_model(ngraph_function)
 
diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py
index 083e412be14e8f..a497006e6488a7 100644
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -836,8 +836,6 @@ class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExiste
     def __call__(self, parser, namespace, values, option_string=None):
         dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
             option_string)
-        if 'tensorflow_use_custom_operations_config' in option_string:
-            dep_msg += 'Please use --transformations_config cli option instead'
         log.error(dep_msg, extra={'is_warning': True})
         super().__call__(parser, namespace, values, option_string)
 
@@ -1021,15 +1019,6 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               help=mo_convert_params_common['layout'].description.format(
                                   mo_convert_params_common['layout'].possible_types_command_line),
                               default=())
-    # TODO: isn't it a weights precision type
-    common_group.add_argument('--data_type',
-                              help='[DEPRECATED] Data type for model weights and biases. '
-                                   'If original model has FP32 weights or biases and --data_type=FP16 is specified, '
-                                   'FP32 model weights and biases are compressed to FP16. '
-                                   'All intermediate data is kept in original precision.',
-                              choices=["FP16", "FP32", "half", "float"],
-                              default='FP16',
-                              action=DeprecatedOptionCommon)
     common_group.add_argument('--compress_to_fp16',
                               help=mo_convert_params_common['compress_to_fp16'].description,
                               type=check_bool,
@@ -1104,7 +1093,6 @@ def get_common_cli_options(model_name):
     d['mean_values'] = ['- Mean values', lambda x: x if x else 'Not specified']
     d['scale_values'] = ['- Scale values', lambda x: x if x else 'Not specified']
     d['scale'] = ['- Scale factor', lambda x: x if x else 'Not specified']
-    d['data_type'] = ['- Precision of IR', lambda x: 'FP32' if x == 'float' else 'FP16' if x == 'half' else x]
     d['transform'] = ['- User transformations', lambda x: x if x else 'Not specified']
     d['reverse_input_channels'] = '- Reverse input channels'
     d['static_shape'] = '- Enable IR generation for fixed input shape'
@@ -1133,7 +1121,6 @@ def get_tf_cli_options():
     d = {
         'input_model_is_text': '- Input model in text protobuf format',
         'tensorflow_custom_operations_config_update': '- Update the configuration file with input/output node names',
-        'tensorflow_use_custom_operations_config': '- Use the config file',
         'tensorflow_object_detection_api_pipeline_config': '- Use configuration file used to generate the model with '
                                                            'Object Detection API',
         'tensorflow_custom_layer_libraries': '- List of shared libraries with TensorFlow custom layers implementation',
@@ -1177,7 +1164,7 @@ def get_params_with_paths_list():
             'input_checkpoint', 'input_meta_graph', 'input_proto', 'input_symbol',
             'pretrained_model_name', 'saved_model_dir', 'tensorboard_logdir',
             'tensorflow_custom_layer_libraries', 'tensorflow_custom_operations_config_update',
-            'tensorflow_object_detection_api_pipeline_config', 'tensorflow_use_custom_operations_config',
+            'tensorflow_object_detection_api_pipeline_config',
             'transformations_config']
 
 
@@ -1256,9 +1243,6 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
     tf_group.add_argument('--tensorflow_custom_operations_config_update',
                           help=mo_convert_params_tf['tensorflow_custom_operations_config_update'].description,
                           action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--tensorflow_use_custom_operations_config',
-                          help='Use the configuration file with custom operation description.',
-                          action=DeprecatedCanonicalizePathCheckExistenceAction)
     tf_group.add_argument('--tensorflow_object_detection_api_pipeline_config',
                           help=mo_convert_params_tf['tensorflow_object_detection_api_pipeline_config'].description,
                           action=CanonicalizePathCheckExistenceAction)
@@ -1270,10 +1254,6 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
                           help=mo_convert_params_tf['tensorflow_custom_layer_libraries'].description,
                           default=None,
                           action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--disable_nhwc_to_nchw',
-                          help='[DEPRECATED] Disables the default translation from NHWC to NCHW. Since 2022.1 this option '
-                               'is deprecated and used only to maintain backward compatibility with previous releases.',
-                          action=DeprecatedStoreTrue, default=False)
     return parser
 
 

From 6eb8f4b2b7d10e8dcf35aeda444155187611c32f Mon Sep 17 00:00:00 2001
From: Rajat U Krishna <krishna.ra@northeastern.edu>
Date: Sun, 26 Mar 2023 13:58:52 -0400
Subject: [PATCH 097/296] [Docs][PyOV] Fix broken link to section (#16553)

* [Docs][PyOV] Minor change to fix a broken link in code_examples.md
---
 src/bindings/python/docs/code_examples.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/python/docs/code_examples.md b/src/bindings/python/docs/code_examples.md
index 9194a8c5b38634..2c18ddf99cf3ac 100644
--- a/src/bindings/python/docs/code_examples.md
+++ b/src/bindings/python/docs/code_examples.md
@@ -9,7 +9,7 @@ Instructions can be found in ["Building the OpenVINO™ Python API"](./build.md)
 General guide:
 * Snake case (also known as the *lower_case_with_underscores* style) is used across the codebase. That includes modules (`runtime`, `offline_transformations`), function names, and arguments/variables (`async_infer`, `wait`, `path_to_xml`).
 * Naming of classes is an exception to the above rule. The *CamelCase* style is used in this case, for example: `Core`, `InferRequest` or `AsyncInferQueue`.
-* If bindings (explained later in the [Pure pybind11 solution](#Pure pybind11 solution) section) are created to expose existing C++ code, make them similar to their C++ counterparts, regarding both names and placement, for example, C++'s `ov::InferRequest` and Python's `openvino.runtime.InferRequest`. If alignment is not possible, try to describe your class/function/module as well as possible, such as the pair of `openvino.runtime.ConstOutput/openvino.runtime.Output` which relates to `ov::Output<const ov::Node>/ov::Output<ov::Node>`. This naming points out the functional difference between both classes - one is an immutable and the other a mutable version.
+* If bindings (explained later in the [Pure pybind11 solution](#pure-pybind11-solution) section) are created to expose existing C++ code, make them similar to their C++ counterparts, regarding both names and placement, for example, C++'s `ov::InferRequest` and Python's `openvino.runtime.InferRequest`. If alignment is not possible, try to describe your class/function/module as well as possible, such as the pair of `openvino.runtime.ConstOutput/openvino.runtime.Output` which relates to `ov::Output<const ov::Node>/ov::Output<ov::Node>`. This naming points out the functional difference between both classes - one is an immutable and the other a mutable version.
 
 <!-- Pure Python solution describes Python based approach -->
 #### Pure Python solution

From 1df14c6a6cdd3bc704cb7bbf6be6301763d67d6f Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Sun, 26 Mar 2023 20:00:43 +0200
Subject: [PATCH 098/296] Add docs for OPENVINO_FRAMEWORK_MAP macro (#14928)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add docs for OPENVINO_FRAMEWORK_MAP macro

Ticket: 98762

* Apply suggestions from code review

Co-authored-by: Piotr Krzemiński <piotrkrzeminski1234@gmail.com>

---------

Co-authored-by: Piotr Krzemiński <piotrkrzeminski1234@gmail.com>
---
 docs/Extensibility_UG/frontend_extensions.md | 27 ++++++++++++++++
 docs/snippets/CMakeLists.txt                 |  5 ++-
 docs/snippets/ov_extensions.cpp              | 34 ++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/docs/Extensibility_UG/frontend_extensions.md b/docs/Extensibility_UG/frontend_extensions.md
index ba24f9edc83d55..e791766a6114d6 100644
--- a/docs/Extensibility_UG/frontend_extensions.md
+++ b/docs/Extensibility_UG/frontend_extensions.md
@@ -116,6 +116,33 @@ So the conclusion is that each attribute of target OpenVINO operation should be
 
 This is achieved by specifying maps as arguments for `OpExtension` constructor.
 
+### Mapping custom operations to frontends with OPENVINO_FRAMEWORK_MAP macro
+
+> **NOTE**: Below solution works only for ONNX and Tensorflow frontends.
+
+`OPENVINO_FRAMEWORK_MAP` is a macro that should be used inside OpenVINO operation's class definition and that lets you specify the mapping between this operation to a frontend operation.
+
+Let's consider the following example. Imagine you have an ONNX model with `CustomOp` operation (and this operation has `mode` attribute) and a Tensorflow model with `CustomOpV3` operation (this operation has `axis` attribute) and both of them can be implemented with a single OpenVINO operation `CustomOp` like follows:
+
+@snippet ov_extensions.cpp frontend_extension_framework_map_macro_headers
+@snippet ov_extensions.cpp frontend_extension_framework_map_macro_CustomOp
+
+Let's take a closer look at the parameters this macro takes:
+```cpp
+OPENVINO_FRAMEWORK_MAP(framework, name, attributes_map, attributes_values)
+```
+- `framework` - framework name.
+- `name` - the framework operation name. It's optional if the OpenVINO custom operation name (that is the name that is passed as the first parameter to `OPENVINO_OP` macro) is the same as the framework operation name and both `attributes_map` and `attributes_values` are not provided.
+- `attributes_map` - used to provide a mapping between OpenVINO operation attribute and framework operation attribute. Contains key-value pairs, where key is an OpenVINO operation attribute name and value is its corresponding framework operation attribute name. This parameter is optional if the number of OpenVINO operation attributes and their names match one-to-one with framework operation attributes.
+- `attributes_values` - used to provide default values for OpenVINO operation attributes that are not specified in `attributes_map`. Contains key-value pairs, where key is an OpenVINO operation attribute name and the value is this attribute value. This parameter cannot be provided if `attributes_map` contains all of OpenVINO operation attributes or if `attributes_map` is not provided.
+
+In the example above, `OPENVINO_FRAMEWORK_MAP` is used twice.
+First, OpenVINO `CustomOp` is mapped to ONNX `CustomOp` operation, `m_mode` attribute is mapped to `mode` attribute, while `m_axis` attribute gets the default value `-1`.
+Secondly, OpenVINO `CustomOp` is mapped to Tensorflow `CustomOpV3` operation, `m_axis` attribute is mapped to `axis` attribute, while `m_mode` attribute gets the default value `"linear"`.
+
+The last step is to register this custom operation by following:
+@snippet ov_extensions.cpp frontend_extension_framework_map_macro_add_extension
+
 Mapping to Multiple Operations with ConversionExtension
 #######################################################
 
diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt
index 425e7475038c8a..4d824e7c2bc32f 100644
--- a/docs/snippets/CMakeLists.txt
+++ b/docs/snippets/CMakeLists.txt
@@ -60,7 +60,10 @@ target_include_directories(${TARGET_NAME} PRIVATE "${OpenVINO_SOURCE_DIR}/src/in
                                                   "${OpenVINO_SOURCE_DIR}/src/common/util/include"
                                                   "${OpenVINO_SOURCE_DIR}/src/common/low_precision_transformations/include"
                                                   "${OpenVINO_SOURCE_DIR}/src/frontends/common/include"
-                                                  "${OpenVINO_SOURCE_DIR}/src/core/template_extension/new/")
+                                                  "${OpenVINO_SOURCE_DIR}/src/core/template_extension/new"
+                                                  "${OpenVINO_SOURCE_DIR}/src/frontends/onnx/frontend/include"
+                                                  "${OpenVINO_SOURCE_DIR}/src/frontends/tensorflow/include")
+
 ov_mark_target_as_cc(${TARGET_NAME})
 
 if(TARGET OpenCL::OpenCL)
diff --git a/docs/snippets/ov_extensions.cpp b/docs/snippets/ov_extensions.cpp
index 895ee6e59d9e55..697224c400b70a 100644
--- a/docs/snippets/ov_extensions.cpp
+++ b/docs/snippets/ov_extensions.cpp
@@ -17,6 +17,12 @@
 #include <openvino/opsets/opset8.hpp>
 //! [frontend_extension_ThresholdedReLU_header]
 
+//! [frontend_extension_framework_map_macro_headers]
+#include <openvino/frontend/extension/op.hpp>
+#include <openvino/frontend/onnx/extension/op.hpp>
+#include <openvino/frontend/tensorflow/extension/op.hpp>
+//! [frontend_extension_framework_map_macro_headers]
+
 #include <identity.hpp>
 
 //! [frontend_extension_CustomOperation]
@@ -40,6 +46,27 @@ class CustomOperation : public ov::op::Op {
     std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector&) const override { return nullptr; }
 };
 
+//! [frontend_extension_framework_map_macro_CustomOp]
+class CustomOp : public ov::op::Op {
+    std::string m_mode;
+    int m_axis;
+
+public:
+    OPENVINO_OP("CustomOp");
+    OPENVINO_FRAMEWORK_MAP(onnx, "CustomOp", { {"m_mode", "mode"} }, { {"m_axis", -1} });
+    OPENVINO_FRAMEWORK_MAP(tensorflow, "CustomOpV3", { {"m_axis", "axis"} }, { {"m_mode", "linear"} });
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        visitor.on_attribute("m_mode", m_mode);
+        visitor.on_attribute("m_axis", m_axis);
+        return true;
+    }
+
+    // ... implement other required methods
+//! [frontend_extension_framework_map_macro_CustomOp]
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector&) const override { return nullptr; }
+};
+
 int main() {
 {
 //! [add_extension]
@@ -125,5 +152,12 @@ ov::Core core;
 core.add_extension("openvino_template_extension.so");
 //! [add_extension_lib]
 }
+
+{
+//! [frontend_extension_framework_map_macro_add_extension]
+ov::Core core;
+core.add_extension(ov::frontend::OpExtension<CustomOp>());
+//! [frontend_extension_framework_map_macro_add_extension]
+}
 return 0;
 }

From ab151fd357c4a4037b9efa9d5941ffac6fd64a15 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Sun, 26 Mar 2023 20:36:14 +0200
Subject: [PATCH 099/296] [CPU] Temporal object access fix (#16546)

---
 src/plugins/intel_cpu/src/cpu_memory.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp
index 88fdfbd8c11f62..0d82f0c842b729 100644
--- a/src/plugins/intel_cpu/src/cpu_memory.cpp
+++ b/src/plugins/intel_cpu/src/cpu_memory.cpp
@@ -74,7 +74,8 @@ void Memory::SetData(const Memory& src, bool ftz) const {
     node::Reorder::reorderData(src, *this);
 
     auto localPrim = GetPrimitive();
-    dnnl::impl::memory_desc_wrapper wrapper(localPrim.get_desc().get());
+    auto desc = localPrim.get_desc();
+    dnnl::impl::memory_desc_wrapper wrapper(desc.get());
 
     if (ftz
         && src.GetDataType() == memory::data_type::f32

From ce67ac09d310a3a41181cf26c3922497ba02f96f Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Sun, 26 Mar 2023 23:29:47 +0400
Subject: [PATCH 100/296] [GPU] Disable OneDNN primitive cache (#16525)

---
 src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
index f54f0a1d80853d..3d81054936a2d7 100644
--- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
@@ -83,7 +83,7 @@ if(ENABLE_ONEDNN_FOR_GPU)
                 "-DCMAKE_INSTALL_PREFIX=${ONEDNN_INSTALL_DIR}"
                 "-DCMAKE_INSTALL_LIBDIR=lib/$<CONFIG>"
                 "-DDNNL_ENABLE_CONCURRENT_EXEC=ON"
-                "-DDNNL_ENABLE_PRIMITIVE_CACHE=ON"
+                "-DDNNL_ENABLE_PRIMITIVE_CACHE=OFF"
                 "-DDNNL_ENABLE_WORKLOAD=INFERENCE"
                 "-DDNNL_ENABLE_JIT_PROFILING=${BUILD_SHARED_LIBS}"
                 "-DDNNL_ENABLE_ITT_TASKS=${BUILD_SHARED_LIBS}"

From 3f3bda592b468145cdb3012145dc3e4a8df445c7 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 27 Mar 2023 09:04:41 +0400
Subject: [PATCH 101/296] Revert "[MO] remove deprecated: data_type,
 disable_nhwc_to_nchw, tensorflow_use_custom_operations_config (#16394)"
 (#16555)

This reverts commit 43ef89e62517c4029322b6955e433f6ef332e6ec.
---
 tests/layer_tests/common/layer_test_class.py  |  5 ++---
 .../layer_tests/common/utils/common_utils.py  |  4 ----
 .../openvino/tools/mo/analysis/tf_od_api.py   |  2 +-
 .../tools/mo/analysis/tf_retinanet.py         |  4 ++--
 .../mo/openvino/tools/mo/analysis/tf_yolo.py  |  6 ++---
 .../tools/mo/back/offline_transformations.py  |  2 +-
 tools/mo/openvino/tools/mo/convert_impl.py    | 15 ++++++++++++-
 .../tools/mo/front/tf/ObjectDetectionAPI.py   |  2 +-
 tools/mo/openvino/tools/mo/load/tf/loader.py  |  4 ++--
 tools/mo/openvino/tools/mo/main.py            |  3 ++-
 .../tools/mo/moc_frontend/serialize.py        |  2 +-
 .../mo/openvino/tools/mo/utils/cli_parser.py  | 22 ++++++++++++++++++-
 12 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/tests/layer_tests/common/layer_test_class.py b/tests/layer_tests/common/layer_test_class.py
index 99ae5a8b61fc9a..a43c2e742298d3 100644
--- a/tests/layer_tests/common/layer_test_class.py
+++ b/tests/layer_tests/common/layer_test_class.py
@@ -40,11 +40,10 @@ def _test(self, framework_model, ref_net, ie_device, precision, ir_version, temp
         os.environ['MO_ENABLED_TRANSFORMS'] = enabled_transforms
         os.environ['MO_DISABLED_TRANSFORMS'] = disabled_transforms
 
-        compress_to_fp16 = False if precision == 'FP32' else True
         mo_params = {self.input_model_key: model_path,
                      "output_dir": temp_dir,
-                     "compress_to_fp16": compress_to_fp16,
-                     "model_name": 'model'}
+                     "data_type": precision, "model_name": 'model'
+                     }
 
         if 'input_shapes' in kwargs and len(kwargs['input_shapes']):
             input_shapes_str = []
diff --git a/tests/layer_tests/common/utils/common_utils.py b/tests/layer_tests/common/utils/common_utils.py
index dda6286c504a12..419acdd262a06f 100644
--- a/tests/layer_tests/common/utils/common_utils.py
+++ b/tests/layer_tests/common/utils/common_utils.py
@@ -26,10 +26,6 @@ def generate_ir(coverage=False, **kwargs):
             params.extend(("-b", str(value)))
         elif key == "k":
             params.extend(("-k", str(value)))
-        # for FP32 set explicitly compress_to_fp16=False,
-        # if we omit this argument for FP32, it will be set implicitly to True as the default
-        elif key == 'compress_to_fp16':
-            params.append("--{}={}".format(key, value))
         elif isinstance(value, bool) and value:
             params.append("--{}".format(key))
         elif isinstance(value, bool) and not value:
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
index 7f75ca6872dcd7..bf70a78f48f886 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
@@ -70,7 +70,7 @@ def analyze(self, graph: Graph):
                 if all([graph_contains_scope(graph, scope) for scope in scopes]):
                     result = dict()
                     result['flavor'] = flavor
-                    result['mandatory_parameters'] = {'transformations_config':
+                    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
                                                           files_by_pattern(get_mo_root_dir() + '/openvino/tools/mo/front/tf',
                                                                            __class__.file_patterns[flavor],
                                                                            add_prefix=True),
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
index 9c36e08a1a805a..8ee620322b7707 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
@@ -52,14 +52,14 @@ def analyze(self, graph: Graph):
 
         if pattern_instance_counter.counter > 0:
             result = dict()
-            result['mandatory_parameters'] = {'transformations_config':
+            result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
                                                   'openvino/tools/mo/front/tf/retinanet.json'}
 
             message = "Your model looks like TensorFlow RetinaNet Model.\n" \
                       "To generate the IR, provide model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model>.pb\n" \
                       "\t--input_shape [1,600,600,3]\n" \
-                      "\t--transformations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
+                      "\t--tensorflow_use_custom_operations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
                       "\t--reverse_input_channels"
 
             return {'model_type': {'TF_RetinaNet': result}}, message
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
index 86f4822c932549..7379546eda1ecf 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
@@ -47,7 +47,7 @@ def pattern_instance_counter(graph: Graph, match: dict):
 def get_YOLO_params_by_flavor(flavor: str):
     result = dict()
     result['flavor'] = flavor
-    result['mandatory_parameters'] = {'transformations_config': YOLO_CONFIGS[flavor]}
+    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config': YOLO_CONFIGS[flavor]}
     return result
 
 
@@ -72,7 +72,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv1 or YOLOv2 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model_name>.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
+                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
                       "All detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
@@ -99,7 +99,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv3 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/yolo_v3.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
+                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
                       "Detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
diff --git a/tools/mo/openvino/tools/mo/back/offline_transformations.py b/tools/mo/openvino/tools/mo/back/offline_transformations.py
index 9d270c32311304..519bbcc726dd8a 100644
--- a/tools/mo/openvino/tools/mo/back/offline_transformations.py
+++ b/tools/mo/openvino/tools/mo/back/offline_transformations.py
@@ -65,7 +65,7 @@ def apply_offline_transformations(func: Model, argv: argparse.Namespace):
     apply_moc_legacy_transformations(func, params_with_custom_types)
     apply_user_transformations(func, parse_transform(argv.transform))
 
-    if "compress_to_fp16" in argv and argv.compress_to_fp16:
+    if "compress_fp16" in argv and argv.compress_fp16:
         compress_model(func)
 
     apply_fused_names_cleanup(func)
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index 440af2341b5279..f773639ae07295 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -214,6 +214,13 @@ def raise_ie_not_found():
         log.error(e)
         raise_ie_not_found()
 
+    # Turn off compression only if it's disabled explicitly by --compress_to_fp16=False or --data_type=FP32.
+    # By default, in all other cases compression is enabled
+    if ('data_type' in argv and argv.data_type in ['FP32', 'float']) or \
+            ('compress_to_fp16' in argv and argv.compress_to_fp16 is False):
+        argv.compress_fp16 = False
+    else:
+        argv.compress_fp16 = True
     argv.data_type = 'FP32'  # if compression was enabled will be restored back to 'FP16' after apply_offline_transformations
 
     # This is just to check that transform key is valid and transformations are available
@@ -227,6 +234,12 @@ def raise_ie_not_found():
     if ret_code:
         raise Error('check_requirements exited with return code {}'.format(ret_code))
 
+    if hasattr(argv, 'tensorflow_use_custom_operations_config') and \
+            argv.tensorflow_use_custom_operations_config is not None:
+        # update command-line arguments even for new TensorFlow Frontend
+        # because it should fallback to the Legacy Frontend in this case
+        argv.transformations_config = argv.tensorflow_use_custom_operations_config
+
     if argv.scale and argv.scale_values:
         raise Error(
             'Both --scale and --scale_values are defined. Specify either scale factor or scale values per input ' +
@@ -496,7 +509,7 @@ def clear_tmp_ir_files():
         try:
             from openvino.tools.mo.back.offline_transformations import apply_offline_transformations
             func = apply_offline_transformations(func, argv)
-            if "compress_to_fp16" in argv and argv.compress_to_fp16:
+            if "compress_fp16" in argv and argv.compress_fp16:
                 # restore data_type cmd parameter
                 argv.data_type = 'FP16'
             return_code = 0
diff --git a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
index df316b81868cb9..c769cbcea55f82 100644
--- a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
+++ b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
@@ -98,7 +98,7 @@ def _value_or_raise(match: SubgraphMatch, pipeline_config: PipelineConfig, key:
         raise Error('The sub-graph replacer "[REPLACEMENT_ID]" was not able to find the value for key "{}" in the '
                     'pipeline configuration file specified with the --tensorflow_object_detection_api_pipeline_config '
                     'command line parameter. Update the sub-graph replacement configuration file specified with the '
-                    '--transformations_config command line parameter by adding key "{}" with required '
+                    '--tensorflow_use_custom_operations_config command line parameter by adding key "{}" with required '
                     'value to the "custom_attributes" dictionary of the "[REPLACEMENT_ID]" replacer.'.format(key, key))
     return value
 
diff --git a/tools/mo/openvino/tools/mo/load/tf/loader.py b/tools/mo/openvino/tools/mo/load/tf/loader.py
index 425c509fa5ea8f..590d52a5762d65 100644
--- a/tools/mo/openvino/tools/mo/load/tf/loader.py
+++ b/tools/mo/openvino/tools/mo/load/tf/loader.py
@@ -91,7 +91,7 @@ def load(self, graph: Graph):
         graph.__setattr__('name', argv.model_name)
         # 'layout' parameter change may cause an issue in EltwiseInputReshape replacer
         # and convert_nhwc_to_nchw(graph)
-        graph.graph['layout'] = 'NHWC'
+        graph.graph['layout'] = 'NCHW' if argv.disable_nhwc_to_nchw else 'NHWC'
         graph.graph['fw'] = 'tf'
 
         graph.graph['variables_values'] = variables_values
@@ -114,7 +114,7 @@ def load(self, graph: Graph):
 
         # try to detect layout from the nodes of the graph. If there are no convolution nodes in N(D)HWC layout then we
         # consider that the graph is in NCHW layout and no layout conversion should be performed
-        if not graph_or_sub_graph_has_nhwc_ops(graph):
+        if not argv.disable_nhwc_to_nchw and not graph_or_sub_graph_has_nhwc_ops(graph):
             if not argv.silent:
                 log.debug('disable_nhwc_to_nchw" was automatically enabled.')
             for_graph_and_each_sub_graph_recursively(graph, update_cmd_params_and_layout)
diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py
index 08d02514130ba3..927122722ba4a9 100644
--- a/tools/mo/openvino/tools/mo/main.py
+++ b/tools/mo/openvino/tools/mo/main.py
@@ -37,7 +37,8 @@ def main(cli_parser: argparse.ArgumentParser, framework=None):
     try:
         ngraph_function, argv = _convert(cli_parser, framework, {})
         is_tf, _, _, _, _ = deduce_legacy_frontend_by_namespace(argv)
-        if 'compress_to_fp16' in argv and argv.compress_to_fp16:
+        if ('compress_to_fp16' in argv and argv.compress_to_fp16) \
+                or ('data_type' in argv and argv.data_type in ['half', 'FP16']):
             print(get_compression_message())
 
         ov_update_message = get_ov_update_message()
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
index 90b87f750feb83..34eb377a7a2648 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
@@ -32,7 +32,7 @@ def moc_emit_ir(ngraph_function: Model, argv: argparse.Namespace):
 
     apply_user_transformations(ngraph_function, parse_transform(argv.transform))
 
-    if argv.compress_to_fp16:
+    if argv.compress_fp16:
         from openvino.tools.mo.back.offline_transformations import compress_model
         compress_model(ngraph_function)
 
diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py
index a497006e6488a7..083e412be14e8f 100644
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -836,6 +836,8 @@ class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExiste
     def __call__(self, parser, namespace, values, option_string=None):
         dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
             option_string)
+        if 'tensorflow_use_custom_operations_config' in option_string:
+            dep_msg += 'Please use --transformations_config cli option instead'
         log.error(dep_msg, extra={'is_warning': True})
         super().__call__(parser, namespace, values, option_string)
 
@@ -1019,6 +1021,15 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               help=mo_convert_params_common['layout'].description.format(
                                   mo_convert_params_common['layout'].possible_types_command_line),
                               default=())
+    # TODO: isn't it a weights precision type
+    common_group.add_argument('--data_type',
+                              help='[DEPRECATED] Data type for model weights and biases. '
+                                   'If original model has FP32 weights or biases and --data_type=FP16 is specified, '
+                                   'FP32 model weights and biases are compressed to FP16. '
+                                   'All intermediate data is kept in original precision.',
+                              choices=["FP16", "FP32", "half", "float"],
+                              default='FP16',
+                              action=DeprecatedOptionCommon)
     common_group.add_argument('--compress_to_fp16',
                               help=mo_convert_params_common['compress_to_fp16'].description,
                               type=check_bool,
@@ -1093,6 +1104,7 @@ def get_common_cli_options(model_name):
     d['mean_values'] = ['- Mean values', lambda x: x if x else 'Not specified']
     d['scale_values'] = ['- Scale values', lambda x: x if x else 'Not specified']
     d['scale'] = ['- Scale factor', lambda x: x if x else 'Not specified']
+    d['data_type'] = ['- Precision of IR', lambda x: 'FP32' if x == 'float' else 'FP16' if x == 'half' else x]
     d['transform'] = ['- User transformations', lambda x: x if x else 'Not specified']
     d['reverse_input_channels'] = '- Reverse input channels'
     d['static_shape'] = '- Enable IR generation for fixed input shape'
@@ -1121,6 +1133,7 @@ def get_tf_cli_options():
     d = {
         'input_model_is_text': '- Input model in text protobuf format',
         'tensorflow_custom_operations_config_update': '- Update the configuration file with input/output node names',
+        'tensorflow_use_custom_operations_config': '- Use the config file',
         'tensorflow_object_detection_api_pipeline_config': '- Use configuration file used to generate the model with '
                                                            'Object Detection API',
         'tensorflow_custom_layer_libraries': '- List of shared libraries with TensorFlow custom layers implementation',
@@ -1164,7 +1177,7 @@ def get_params_with_paths_list():
             'input_checkpoint', 'input_meta_graph', 'input_proto', 'input_symbol',
             'pretrained_model_name', 'saved_model_dir', 'tensorboard_logdir',
             'tensorflow_custom_layer_libraries', 'tensorflow_custom_operations_config_update',
-            'tensorflow_object_detection_api_pipeline_config',
+            'tensorflow_object_detection_api_pipeline_config', 'tensorflow_use_custom_operations_config',
             'transformations_config']
 
 
@@ -1243,6 +1256,9 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
     tf_group.add_argument('--tensorflow_custom_operations_config_update',
                           help=mo_convert_params_tf['tensorflow_custom_operations_config_update'].description,
                           action=CanonicalizePathCheckExistenceAction)
+    tf_group.add_argument('--tensorflow_use_custom_operations_config',
+                          help='Use the configuration file with custom operation description.',
+                          action=DeprecatedCanonicalizePathCheckExistenceAction)
     tf_group.add_argument('--tensorflow_object_detection_api_pipeline_config',
                           help=mo_convert_params_tf['tensorflow_object_detection_api_pipeline_config'].description,
                           action=CanonicalizePathCheckExistenceAction)
@@ -1254,6 +1270,10 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
                           help=mo_convert_params_tf['tensorflow_custom_layer_libraries'].description,
                           default=None,
                           action=CanonicalizePathCheckExistenceAction)
+    tf_group.add_argument('--disable_nhwc_to_nchw',
+                          help='[DEPRECATED] Disables the default translation from NHWC to NCHW. Since 2022.1 this option '
+                               'is deprecated and used only to maintain backward compatibility with previous releases.',
+                          action=DeprecatedStoreTrue, default=False)
     return parser
 
 

From 0765fa108a79a3ff242d814e77f306dad00e6066 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Mon, 27 Mar 2023 07:28:43 +0200
Subject: [PATCH 102/296] [CPU] Debug Caps build fix (#16536)

---
 src/plugins/intel_cpu/src/dnnl_extension_utils.cpp | 6 ++++++
 src/plugins/intel_cpu/src/dnnl_extension_utils.h   | 1 +
 src/plugins/intel_cpu/src/graph.cpp                | 7 -------
 src/plugins/intel_cpu/src/node.cpp                 | 7 -------
 src/plugins/intel_cpu/src/nodes/concat.cpp         | 6 ++++++
 src/plugins/intel_cpu/src/nodes/conv.cpp           | 3 ++-
 src/plugins/intel_cpu/src/nodes/deconv.cpp         | 3 ++-
 src/plugins/intel_cpu/src/nodes/fullyconnected.cpp | 3 ++-
 src/plugins/intel_cpu/src/nodes/interaction.cpp    | 6 ++++++
 src/plugins/intel_cpu/src/nodes/lrn.cpp            | 6 ++++++
 src/plugins/intel_cpu/src/nodes/matmul.cpp         | 6 ++++++
 src/plugins/intel_cpu/src/nodes/pooling.cpp        | 7 +++++++
 src/plugins/intel_cpu/src/nodes/reorder.cpp        | 7 +++++++
 src/plugins/intel_cpu/src/nodes/softmax.cpp        | 6 ++++++
 src/plugins/intel_cpu/src/nodes/transpose.cpp      | 6 ++++++
 src/plugins/intel_cpu/src/utils/verbose.cpp        | 4 +---
 16 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
index e78c4513790f24..8719853e96f7f2 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -7,6 +7,8 @@
 #include <vector>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "onednn/iml_type_mapper.h"
+#include <common/primitive_desc.hpp>
+#include <common/primitive_desc_iface.hpp>
 
 using namespace dnnl;
 
@@ -197,5 +199,9 @@ dnnl_memory_desc_t DnnlExtensionUtils::clone_desc(const_dnnl_memory_desc_t cdesc
     return cloned_md;
 }
 
+const char* DnnlExtensionUtils::query_pd_info(const_dnnl_primitive_desc_t pd) {
+    return pd->info();
+}
+
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h
index 08b511b78555b7..8ae9bbac90555f 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.h
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.h
@@ -56,6 +56,7 @@ class DnnlExtensionUtils {
     static std::string query_impl_info_str(const const_dnnl_primitive_desc_t& pd);
     static bool hasProperImplementationType(dnnl::primitive_desc& desc, impl_desc_type implType);
     static dnnl_memory_desc_t clone_desc(const_dnnl_memory_desc_t cdesc);
+    static const char* query_pd_info(const_dnnl_primitive_desc_t pd);
 };
 
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index 62f33e02575db7..0c275fc183bc19 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -883,13 +883,6 @@ void Graph::CreatePrimitives() {
         OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.createPrimitive);
         DEBUG_LOG(*node);
         node->createPrimitive();
-#ifdef CPU_DEBUG_CAPS
-        if (node->prim) {
-            auto pd_c = node->prim.get_primitive_desc();
-            auto* pd = reinterpret_cast<const dnnl_primitive_desc*>(pd_c);
-            DEBUG_LOG("verbose##", node->getName(), "##", pd->info(), "\n");
-        }
-#endif
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 64752ea8692fdd..72b77f4215bc9d 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -569,13 +569,6 @@ void Node::updateDynamicParams() {
             DEBUG_LOG(" prepareParams() on #", getExecIndex(), " ", getTypeStr(), " ", algToString(getAlgorithm()),
                       " ", getName(), " ", getOriginalLayers());
             prepareParams();
-#ifdef CPU_DEBUG_CAPS
-            if (prim) {
-                auto pd_c = prim.get_primitive_desc();
-                auto* pd = reinterpret_cast<const dnnl_primitive_desc*>(pd_c);
-                DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n");
-            }
-#endif
         }
     }
 }
diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp
index 5fd61b62a1bf26..792d8005a41a89 100644
--- a/src/plugins/intel_cpu/src/nodes/concat.cpp
+++ b/src/plugins/intel_cpu/src/nodes/concat.cpp
@@ -433,6 +433,12 @@ void Concat::prepareParams() {
 
         auto primitive_desc = concat::primitive_desc(getEngine(), desc, static_cast<int>(axis), srcs_d);
         prim = concat(primitive_desc);
+#ifdef CPU_DEBUG_CAPS
+        if (prim) {
+            auto pd = prim.get_primitive_desc();
+            DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+        }
+#endif
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 6d993da7a9cd38..a7b8666782522b 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -1484,7 +1484,8 @@ void Convolution::prepareParams() {
 
 #ifdef CPU_DEBUG_CAPS
         if (result.second == CacheEntryBase::LookUpStatus::Miss) {
-            DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n");
+            auto pd = execPtr->getPrimitiveDesc();
+            DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
         }
 #endif
     } else {
diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp
index 2395a4a6af2a8d..afcd521acda931 100644
--- a/src/plugins/intel_cpu/src/nodes/deconv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp
@@ -995,7 +995,8 @@ void Deconvolution::prepareParams() {
         primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
 #ifdef CPU_DEBUG_CAPS
         if (result.second == CacheEntryBase::LookUpStatus::Miss) {
-            DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n");
+            auto pd = execPtr->getPrimitiveDesc();
+            DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
         }
 #endif
     } else {
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 3d9cb3035cdf55..608ed26ac458d6 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -436,7 +436,8 @@ void FullyConnected::prepareParams() {
         primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive();
 #ifdef CPU_DEBUG_CAPS
         if (result.second == CacheEntryBase::LookUpStatus::Miss) {
-            DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n");
+            auto pd = execPtr->getPrimitiveDesc();
+            DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
         }
 #endif
     } else {
diff --git a/src/plugins/intel_cpu/src/nodes/interaction.cpp b/src/plugins/intel_cpu/src/nodes/interaction.cpp
index 9f64e7176efc4b..2e6336ea447a19 100644
--- a/src/plugins/intel_cpu/src/nodes/interaction.cpp
+++ b/src/plugins/intel_cpu/src/nodes/interaction.cpp
@@ -341,6 +341,12 @@ void Interaction::prepareParams() {
         moveFeatureKernel->create_ker();
         moveInteractKernel->create_ker();
     }
+#ifdef CPU_DEBUG_CAPS
+    if (prim) {
+        auto pd = prim.get_primitive_desc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 void Interaction::executeDynamicImpl(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp
index f5f8995626d3e4..adf68045fe32bc 100644
--- a/src/plugins/intel_cpu/src/nodes/lrn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp
@@ -223,6 +223,12 @@ void Lrn::prepareParams() {
     primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
     primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
     primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
+#ifdef CPU_DEBUG_CAPS
+    if (result.second == CacheEntryBase::LookUpStatus::Miss) {
+        auto pd = execPtr->getPrimitiveDesc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 bool Lrn::created() const {
diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp
index 4027c2d08e30b8..15ec4d6d206fc3 100644
--- a/src/plugins/intel_cpu/src/nodes/matmul.cpp
+++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp
@@ -654,6 +654,12 @@ void MatMul::prepareParams() {
         primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive();
 
     appendPostOpArgs(*attr, primArgs, postOpsArgs);
+#ifdef CPU_DEBUG_CAPS
+    if (result.second == CacheEntryBase::LookUpStatus::Miss) {
+        auto pd = execPtr->getPrimitiveDesc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 void MatMul::execute(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp
index b31c358911904a..b5d1775a6779e5 100644
--- a/src/plugins/intel_cpu/src/nodes/pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp
@@ -411,6 +411,13 @@ void Pooling::prepareParams() {
     primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
 
     Node::appendPostOpArgs(*attr, primArgs, postOpsArgs);
+
+#ifdef CPU_DEBUG_CAPS
+    if (result.second == CacheEntryBase::LookUpStatus::Miss) {
+        auto pd = execPtr->getPrimitiveDesc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 void Pooling::execute(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp
index 5dd5674abd9814..02cb9d78a91259 100644
--- a/src/plugins/intel_cpu/src/nodes/reorder.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp
@@ -245,6 +245,13 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc,
     auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}};
+
+#ifdef CPU_DEBUG_CAPS
+    if (prim) {
+        auto pd = prim.get_primitive_desc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 const std::vector<impl_desc_type>& Reorder::getPrimitivesPriority() {
diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp
index 65176e4a7c7907..38a040f04ac840 100644
--- a/src/plugins/intel_cpu/src/nodes/softmax.cpp
+++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp
@@ -214,6 +214,12 @@ void SoftMax::prepareParams() {
     primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive();
     primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+#ifdef CPU_DEBUG_CAPS
+    if (result.second == CacheEntryBase::LookUpStatus::Miss) {
+        auto pd = execPtr->getPrimitiveDesc();
+        DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+    }
+#endif
 }
 
 void SoftMax::execute(dnnl::stream strm) {
diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp
index 82c289d720c86a..a04975ecd9aedb 100644
--- a/src/plugins/intel_cpu/src/nodes/transpose.cpp
+++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp
@@ -202,6 +202,12 @@ void Transpose::prepareParams() {
             parse_impl_name(DnnlExtensionUtils::query_impl_info_str(prim.get_primitive_desc())));
 
         primArgs = {{DNNL_ARG_SRC, srcMemPtr->GetPrimitive()}, {DNNL_ARG_DST, dstMemPtr->GetPrimitive()}};
+#ifdef CPU_DEBUG_CAPS
+        if (prim) {
+            auto pd = prim.get_primitive_desc();
+            DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+        }
+#endif
         return;
     }
 
diff --git a/src/plugins/intel_cpu/src/utils/verbose.cpp b/src/plugins/intel_cpu/src/utils/verbose.cpp
index 6cccdaf0aeaddb..c2def680322cf0 100644
--- a/src/plugins/intel_cpu/src/utils/verbose.cpp
+++ b/src/plugins/intel_cpu/src/utils/verbose.cpp
@@ -142,9 +142,7 @@ void Verbose::printInfo() {
     }
 
     std::string nodeImplementer = "cpu";
-    if (node->prim)
-        nodeImplementer = "dnnl"; // oneDNN
-    else if (node->getType() == Type::Reference)
+    if (node->getType() == Type::Reference)
         nodeImplementer = "ngraph_ref"; // ngraph reference
 
     const std::string& nodeName = colorize(GREEN, node->getName());

From 2638014d00acfaf82288df83593809ee4c808a6f Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 27 Mar 2023 10:01:11 +0400
Subject: [PATCH 103/296] Enable build with system version of snappy (#16549)

* Enable build with system version of snappy

* Create Snappy::snappy alia
---
 .ci/azure/linux.yml                         |  1 +
 .ci/azure/linux_debian.yml                  |  1 +
 cmake/features.cmake                        | 11 +++--
 install_build_dependencies.sh               | 10 +++-
 src/core/tests/CMakeLists.txt               |  5 --
 src/frontends/tensorflow/src/CMakeLists.txt | 16 ++----
 thirdparty/CMakeLists.txt                   | 55 ++++++++++++++-------
 7 files changed, 56 insertions(+), 43 deletions(-)

diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
index 026f21f6eaddfd..d20e48b4d47831 100644
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@@ -243,6 +243,7 @@ jobs:
         -DCMAKE_C_LINKER_LAUNCHER=ccache
         -DCMAKE_CXX_COMPILER=clang++
         -DCMAKE_C_COMPILER=clang
+        -DENABLE_SYSTEM_SNAPPY=ON
         -DCPACK_GENERATOR=$(CMAKE_CPACK_GENERATOR)
         -DBUILD_nvidia_plugin=OFF
         -S $(REPO_DIR)
diff --git a/.ci/azure/linux_debian.yml b/.ci/azure/linux_debian.yml
index 6a636be467ca01..c2b590bbf9398f 100644
--- a/.ci/azure/linux_debian.yml
+++ b/.ci/azure/linux_debian.yml
@@ -159,6 +159,7 @@ jobs:
         -DENABLE_TESTS=ON
         -DENABLE_FASTER_BUILD=ON
         -DENABLE_STRICT_DEPENDENCIES=OFF
+        -DENABLE_SYSTEM_SNAPPY=ON
         -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
         -DCMAKE_C_COMPILER_LAUNCHER=ccache
         -DCMAKE_CXX_LINKER_LAUNCHER=ccache
diff --git a/cmake/features.cmake b/cmake/features.cmake
index c4b6caea5bd660..402c848a2b2bc8 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -154,15 +154,16 @@ ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
 ie_option(ENABLE_OV_PYTORCH_FRONTEND "Enable PyTorch FrontEnd" ON)
 ie_option(ENABLE_OV_TF_FRONTEND "Enable TensorFlow FrontEnd" ON)
 ie_option(ENABLE_OV_TF_LITE_FRONTEND "Enable TensorFlow Lite FrontEnd" ON)
-ie_dependent_option(ENABLE_SYSTEM_PROTOBUF "Use system protobuf" OFF
-    "ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_TF_FRONTEND;BUILD_SHARED_LIBS" OFF)
+ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
+
 ie_dependent_option(ENABLE_SNAPPY_COMPRESSION "Enables compression support for TF FE" ON
     "ENABLE_OV_TF_FRONTEND" ON)
-ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
-ie_dependent_option(ENABLE_SYSTEM_FLATBUFFERS "Use system flatbuffers" ON
+ie_dependent_option(ENABLE_SYSTEM_PROTOBUF "Enables use of system protobuf" OFF
+    "ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_TF_FRONTEND;BUILD_SHARED_LIBS" OFF)
+ie_dependent_option(ENABLE_SYSTEM_FLATBUFFERS "Enables use of system flatbuffers" ON
     "ENABLE_OV_TF_LITE_FRONTEND" OFF)
+ie_dependent_option(ENABLE_SYSTEM_SNAPPY "Enables use of system version of snappy" OFF "ENABLE_SNAPPY_COMPRESSION;BUILD_SHARED_LIBS" OFF)
 
-ie_dependent_option(ENABLE_OV_CORE_UNIT_TESTS "Enables OpenVINO core unit tests" ON "ENABLE_TESTS" OFF)
 ie_option(ENABLE_OPENVINO_DEBUG "Enable output for OPENVINO_DEBUG statements" OFF)
 
 if(NOT BUILD_SHARED_LIBS AND ENABLE_OV_TF_FRONTEND)
diff --git a/install_build_dependencies.sh b/install_build_dependencies.sh
index 542a4d465b10aa..d307327e93b8a7 100755
--- a/install_build_dependencies.sh
+++ b/install_build_dependencies.sh
@@ -55,6 +55,8 @@ if [ -f /etc/lsb-release ] || [ -f /etc/debian_version ] ; then
         opencl-headers \
         `# GPU plugin extensions` \
         libva-dev \
+        `# For TF FE saved models` \
+        libsnappy-dev \
         `# python API` \
         python3-pip \
         python3-venv \
@@ -116,6 +118,8 @@ elif [ -f /etc/redhat-release ] || grep -q "rhel" /etc/os-release ; then
         pugixml-devel \
         `# GPU plugin dependency` \
         libva-devel \
+        `# For TF FE saved models` \
+        snappy-devel \
         `# OpenCL for GPU` \
         ocl-icd-devel \
         opencl-headers \
@@ -148,11 +152,13 @@ elif [ -f /etc/os-release ] && grep -q "SUSE" /etc/os-release ; then
         rpmlint \
         `# check bash scripts for correctness` \
         ShellCheck \
-          `# main openvino dependencies` \
+        `# main openvino dependencies` \
         tbb-devel \
         pugixml-devel \
         `# GPU plugin dependency` \
-         libva-devel \
+        libva-devel \
+        `# For TF FE saved models` \
+        snappy-devel \
         `# OpenCL for GPU` \
         ocl-icd-devel \
         opencl-cpp-headers \
diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt
index 6cd2bc51af1259..a2ccb021abb139 100644
--- a/src/core/tests/CMakeLists.txt
+++ b/src/core/tests/CMakeLists.txt
@@ -11,11 +11,6 @@ ov_try_use_gold_linker()
 
 add_definitions(-DSERIALIZED_ZOO=\"${TEST_MODEL_ZOO}/core/models\")
 
-if(NOT ENABLE_OV_CORE_UNIT_TESTS)
-    message(STATUS "OpenVINO Core unit tests disabled")
-    return()
-endif()
-
 message(STATUS "OpenVINO Core unit tests enabled")
 
 # For type relaxed types
diff --git a/src/frontends/tensorflow/src/CMakeLists.txt b/src/frontends/tensorflow/src/CMakeLists.txt
index bcfa65658b0f7b..af1cb8b00cdbd0 100644
--- a/src/frontends/tensorflow/src/CMakeLists.txt
+++ b/src/frontends/tensorflow/src/CMakeLists.txt
@@ -2,22 +2,12 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-list(APPEND CUSTOM_LINK_LIBRARIES
-	openvino::core::dev
-	openvino::frontend::tensorflow_common
-)
-
-if(ENABLE_SNAPPY_COMPRESSION)
-	list(APPEND CUSTOM_LINK_LIBRARIES
-		snappy
-	)
-endif()
-
 ov_add_frontend(NAME tensorflow
                 LINKABLE_FRONTEND
                 FILEDESCRIPTION "FrontEnd to load and convert TensorFlow file format"
-                LINK_LIBRARIES ${CUSTOM_LINK_LIBRARIES})
+                LINK_LIBRARIES openvino::core::dev openvino::frontend::tensorflow_common)
 
 if(ENABLE_SNAPPY_COMPRESSION)
-	target_compile_definitions(openvino_tensorflow_frontend PUBLIC ENABLE_SNAPPY_COMPRESSION)
+    target_link_libraries(openvino_tensorflow_frontend PRIVATE Snappy::snappy)
+    target_compile_definitions(openvino_tensorflow_frontend PRIVATE ENABLE_SNAPPY_COMPRESSION)
 endif()
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index c95826acddc4a3..cc53f33cbbc605 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -419,27 +419,46 @@ endif()
 #
 # Snappy Compression
 #
+
 if(ENABLE_SNAPPY_COMPRESSION)
-    function(tf_build_snappy)
-        set(BUILD_SHARED_LIBS OFF)
-        set(SNAPPY_BUILD_BENCHMARKS OFF)
-        set(SNAPPY_BUILD_TESTS OFF)
-        set(INSTALL_GTEST OFF)
-        set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
-        set(CMAKE_CXX_STANDARD 14)
-        if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-            # Removes 3rd party errors which may affect OpenVINO CI
-            if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-                if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror=return-type")
-                    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type")
-                endif()
+    if(ENABLE_SYSTEM_SNAPPY)
+        find_package(Snappy REQUIRED)
+        set_target_properties(Snappy::snappy PROPERTIES IMPORTED_GLOBAL ON)
+    endif()
+
+    if(NOT TARGET Snappy::snappy)
+        function(tf_build_snappy)
+            set(BUILD_SHARED_LIBS OFF)
+            set(SNAPPY_BUILD_BENCHMARKS OFF)
+            set(SNAPPY_BUILD_TESTS OFF)
+            set(INSTALL_GTEST OFF)
+            set(CMAKE_CXX_STANDARD 14)
+            if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+                # '<': signed/unsigned mismatch
+                ie_add_compiler_flags(/wd4018)
+                # conditional expression is constant
+                ie_add_compiler_flags(/wd4127)
+                # 'conversion' conversion from 'type1' to 'type2', possible loss of data
+                ie_add_compiler_flags(/wd4244)
+                # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+                ie_add_compiler_flags(/wd4245)
+                # 'var' : conversion from 'size_t' to 'type', possible loss of data
+                ie_add_compiler_flags(/wd4267)
+            elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
+                # we need to pass -Wextra first, then -Wno-sign-compare
+                # otherwise, snappy's CMakeLists.txt will do it for us
+                ie_add_compiler_flags(-Wextra)
+                ie_add_compiler_flags(-Wno-sign-compare)
             endif()
-        endif()
-        add_subdirectory(snappy EXCLUDE_FROM_ALL)
-    endfunction()
 
-    tf_build_snappy()
-    ov_install_static_lib(snappy ${OV_CPACK_COMP_CORE})
+            add_subdirectory(snappy EXCLUDE_FROM_ALL)
+            # need to create alias Snappy::snappy
+            add_library(Snappy::snappy ALIAS snappy)
+        endfunction()
+
+        tf_build_snappy()
+        ov_install_static_lib(snappy ${OV_CPACK_COMP_CORE})
+    endif()
 endif()
 
 #

From 64e9dc32cd695d55abed1d995dbdb35666101d99 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Mar 2023 11:09:21 +0400
Subject: [PATCH 104/296] Bump awalsh128/cache-apt-pkgs-action from 1.2.4 to
 1.3.0 (#16562)

Bumps [awalsh128/cache-apt-pkgs-action](https://github.com/awalsh128/cache-apt-pkgs-action) from 1.2.4 to 1.3.0.
- [Release notes](https://github.com/awalsh128/cache-apt-pkgs-action/releases)
- [Commits](https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1.2.4...v1.3.0)

---
updated-dependencies:
- dependency-name: awalsh128/cache-apt-pkgs-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_doc.yml     | 2 +-
 .github/workflows/code_snippets.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 16936e2a3559e0..6c7d2c26f86970 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -25,7 +25,7 @@ jobs:
           lfs: true
 
       - name: Install apt-get dependencies
-        uses: awalsh128/cache-apt-pkgs-action@v1.2.4
+        uses: awalsh128/cache-apt-pkgs-action@v1.3.0
         with:
           packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9
           version: 3.0
diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
index 783202d1c908bf..559d3c064a7942 100644
--- a/.github/workflows/code_snippets.yml
+++ b/.github/workflows/code_snippets.yml
@@ -31,7 +31,7 @@ jobs:
           lfs: true
 
       - name: Install OpenCL
-        uses: awalsh128/cache-apt-pkgs-action@v1.2.4
+        uses: awalsh128/cache-apt-pkgs-action@v1.3.0
         if: runner.os == 'Linux'
         with:
           packages: ocl-icd-opencl-dev opencl-headers

From a1b8a6a9419ab0028622207413b6620c3a3deafa Mon Sep 17 00:00:00 2001
From: cecilia peng <cecilia.peng@intel.com>
Date: Mon, 27 Mar 2023 15:22:53 +0800
Subject: [PATCH 105/296] [CPU] Disable ConvertNMSToNMSIEInternal (#16128)

---
 src/plugins/intel_cpu/src/node.cpp            | 25 ++++++-------
 .../intel_cpu/src/nodes/matrix_nms.cpp        | 11 ++++--
 src/plugins/intel_cpu/src/nodes/matrix_nms.h  |  2 +
 .../intel_cpu/src/nodes/multiclass_nms.cpp    | 10 +++--
 .../intel_cpu/src/nodes/multiclass_nms.hpp    |  2 +
 .../src/nodes/non_max_suppression.cpp         | 10 ++---
 .../intel_cpu/src/nodes/non_max_suppression.h |  2 +
 .../intel_cpu/src/transformation_pipeline.cpp | 25 ++++---------
 .../single_layer_tests/matrix_nms.cpp         |  4 ++
 .../single_layer_tests/multiclass_nms.cpp     |  6 +++
 .../skip_tests_config.cpp                     |  5 +--
 .../single_layer_tests/matrix_nms.cpp         |  3 ++
 .../single_layer_tests/multiclass_nms.cpp     |  5 +++
 .../single_layer/matrix_nms.hpp               |  1 +
 .../single_layer/multiclass_nms.hpp           |  1 +
 .../src/single_layer/matrix_nms.cpp           | 31 +++-------------
 .../src/single_layer/multiclass_nms.cpp       | 37 ++++---------------
 17 files changed, 77 insertions(+), 103 deletions(-)

diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 72b77f4215bc9d..f80ed309383447 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -1043,28 +1043,27 @@ void Node::initOptimalPrimitiveDescriptor() {
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
     auto config = selected_pd->getConfig();
-    if (isDynamicNode()) {
-        // it is assumed that the nodes will define dense tensors on output edges
-        // if it is not the case the implementation must redefine this behaviour
-        for (size_t i = 0; i < config.outConfs.size(); i++) {
-            auto outMemDesc = config.outConfs[i].getMemDesc();
-            if (outMemDesc && (outMemDesc->getType() & Blocked)) {
-                config.outConfs[i].setMemDesc(std::dynamic_pointer_cast<BlockedMemoryDesc>(outMemDesc), BLOCKED_DESC_FULL_MASK);
-            }
-        }
-    } else {
-        for (size_t i = 0; i < config.inConfs.size(); i++) {
+    for (size_t i = 0; i < config.inConfs.size(); i++) {
+        if (!isDynamicNode() || config.inConfs[i].getMemDesc()->isDefined()) {
             auto inpPortDesc = getConsistentInputDesc(config, i);
             DEBUG_LOG(getName(), ": input PortDesc before: ", *inpPortDesc->getMemDesc());
             config.inConfs[i].setMemDesc(inpPortDesc->getMemDesc());
             DEBUG_LOG(getName(), ": input PortDesc after: ", *config.inConfs[i].getMemDesc());
         }
+    }
 
-        for (size_t i = 0; i < config.outConfs.size(); i++) {
+    for (size_t i = 0; i < config.outConfs.size(); i++) {
+        auto outMemDesc = config.outConfs[i].getMemDesc();
+        if (!isDynamicNode() || outMemDesc->isDefined()) {
             auto outPortDesc = getConsistentOutputDesc(config, i);
             DEBUG_LOG(getName(), ": output PortDesc before: ", *outPortDesc->getMemDesc());
             config.outConfs[i].setMemDesc(outPortDesc->getMemDesc());
-            DEBUG_LOG(getName(), ": output PortDesc after: ", *config.outConfs[i].getMemDesc());
+        } else {
+            // it is assumed that the nodes will define dense tensors on output edges
+            // if it is not the case the implementation must redefine this behaviour
+            if (outMemDesc->getType() & Blocked) {
+                config.outConfs[i].setMemDesc(std::dynamic_pointer_cast<BlockedMemoryDesc>(outMemDesc), BLOCKED_DESC_FULL_MASK);
+            }
         }
     }
 
diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp
index 29dce9760ac70b..fe825800f34d7c 100644
--- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp
+++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "matrix_nms.h"
+#include "ov_ops/nms_static_shape_ie.hpp"
 
 #include <algorithm>
 #include <chrono>
@@ -57,6 +58,9 @@ MatrixNms::MatrixNms(const std::shared_ptr<ngraph::Node>& op, const GraphContext
 
     m_errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";
 
+    if (one_of(op->get_type_info(), ov::op::internal::NmsStaticShapeIE<ngraph::op::v8::MatrixNms>::get_type_info_static()))
+        m_outStaticShape = true;
+
     if (getOriginalInputsNumber() != 2)
         IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
 
@@ -370,7 +374,8 @@ void MatrixNms::execute(dnnl::stream strm) {
     auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr();
     auto validOutputsMemPtr = getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr();
 
-    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    // NMS-alike nodes are always transformed to NMSIEInternal node in case of legacy api, for compatibility.
+    // And on the other hand in case of api 2.0, keep them internal dynamic for better performance and functionality.
     if (isDynamicNode()) {
         size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), size_t(0));
         redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}});
@@ -396,8 +401,8 @@ void MatrixNms::execute(dnnl::stream strm) {
             selectedBase[4] = m_filteredBoxes[originalIndex].box.x2;
             selectedBase[5] = m_filteredBoxes[originalIndex].box.y2;
         }
-        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
-        if (!isDynamicNode()) {
+
+        if (m_outStaticShape) {
             std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1.f);
             std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
             outputOffset += m_maxBoxesPerBatch;
diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.h b/src/plugins/intel_cpu/src/nodes/matrix_nms.h
index 58b9c9aa94fd31..bcb0bcac6ebaa0 100644
--- a/src/plugins/intel_cpu/src/nodes/matrix_nms.h
+++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.h
@@ -66,6 +66,8 @@ class MatrixNms : public Node {
     float m_postThreshold;
     bool m_normalized;
 
+    bool m_outStaticShape = false;
+
     struct Rectangle {
         Rectangle(float x_left, float y_left, float x_right, float y_right) : x1 {x_left}, y1 {y_left}, x2 {x_right}, y2 {y_right} {}
 
diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp
index 54c95b2114c733..805740ca4ed992 100644
--- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp
+++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp
@@ -50,6 +50,9 @@ MultiClassNms::MultiClassNms(const std::shared_ptr<ov::Node>& op, const GraphCon
     }
     m_errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";
 
+    if (one_of(op->get_type_info(), ov::op::internal::MulticlassNmsIEInternal::get_type_info_static()))
+        m_outStaticShape = true;
+
     if (getOriginalInputsNumber() != 2 && getOriginalInputsNumber() != 3)
         IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
 
@@ -321,8 +324,7 @@ void MultiClassNms::execute(dnnl::stream strm) {
         m_selected_num[m_filtBoxes[idx].batch_index]++;
     }
 
-    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
-    if (isDynamicNode()) {
+    if (!m_outStaticShape) {
         size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), size_t(0));
         redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}});
     }
@@ -371,8 +373,8 @@ void MultiClassNms::execute(dnnl::stream strm) {
                 selected_base[5] = curboxes[4 * box_info.box_index + 3];
             }
         }
-        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
-        if (!isDynamicNode()) {
+
+        if (m_outStaticShape) {
             std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1.f);
             std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
             output_offset += selectedBoxesNum_perBatch;
diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp
index e044204537cdad..6383cf29986df6 100644
--- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp
+++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp
@@ -65,6 +65,8 @@ class MultiClassNms : public Node {
     float m_nmsEta = 0.0f;
     bool m_normalized = true;
 
+    bool m_outStaticShape = false;
+
     std::string m_errorPrefix;
 
     std::vector<std::vector<size_t>> m_numFiltBox; // number of rois after nms for each class in each image
diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
index 67400590ce40a4..48fbea67c150a0 100644
--- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
+++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
@@ -554,7 +554,6 @@ struct jit_uni_nms_kernel_f32 : public jit_uni_nms_kernel, public jit_generator
 
 bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
-        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
         using NonMaxSuppressionV9 = ngraph::op::v9::NonMaxSuppression;
         if (!one_of(op->get_type_info(), NonMaxSuppressionV9::get_type_info_static(),
                     ov::op::internal::NonMaxSuppressionIEInternal::get_type_info_static())) {
@@ -584,6 +583,8 @@ NonMaxSuppression::NonMaxSuppression(const std::shared_ptr<ngraph::Node>& op, co
     }
 
     errorPrefix = "NMS layer with name '" + op->get_friendly_name() + "' ";
+    if (one_of(op->get_type_info(), ov::op::internal::NonMaxSuppressionIEInternal::get_type_info_static()))
+        m_outStaticShape = true;
 
     if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 6)
         IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
@@ -594,7 +595,6 @@ NonMaxSuppression::NonMaxSuppression(const std::shared_ptr<ngraph::Node>& op, co
     if (const auto nms9 = std::dynamic_pointer_cast<const ngraph::op::v9::NonMaxSuppression>(op)) {
         boxEncodingType = static_cast<NMSBoxEncodeType>(nms9->get_box_encoding());
         sortResultDescending = nms9->get_sort_result_descending();
-        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
         } else if (const auto nmsIe = std::dynamic_pointer_cast<const ov::op::internal::NonMaxSuppressionIEInternal>(op)) {
             boxEncodingType = nmsIe->m_center_point_box ? NMSBoxEncodeType::CENTER : NMSBoxEncodeType::CORNER;
             sortResultDescending = nmsIe->m_sort_result_descending;
@@ -795,8 +795,7 @@ void NonMaxSuppression::execute(dnnl::stream strm) {
     auto scoresMemPtr =  getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr();
     const size_t validOutputs = std::min(filtBoxes.size(), maxNumberOfBoxes);
 
-    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
-    if (isDynamicNode()) {
+    if (!m_outStaticShape) {
         VectorDims newDims{validOutputs, 3};
         redefineOutputMemory({newDims, newDims, {1}});
     }
@@ -819,8 +818,7 @@ void NonMaxSuppression::execute(dnnl::stream strm) {
         selectedScoresPtr += selectedIndicesStride;
     }
 
-    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
-    if (!isDynamicNode()) {
+    if (m_outStaticShape) {
         std::fill(selectedIndicesPtr, selectedIndicesPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1);
         std::fill(selectedScoresPtr, selectedScoresPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1.f);
     }
diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h
index 031196d9d37695..a0666f66b63331 100644
--- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h
+++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h
@@ -136,6 +136,8 @@ class NonMaxSuppression : public Node {
     // control placeholder for NMS in new opset.
     bool isSoftSuppressedByIOU = false;
 
+    bool m_outStaticShape = false;
+
     std::string errorPrefix;
 
     std::vector<std::vector<size_t>> numFiltBox;
diff --git a/src/plugins/intel_cpu/src/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
index 582248ab472394..174a10cd6d0363 100644
--- a/src/plugins/intel_cpu/src/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
@@ -358,23 +358,14 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
                 return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
             });
 
-    if (!isLegacyApi) {
-        auto nmsCallback = [](const_node_ptr &node) -> bool {
-            for (size_t i = 0; i < node->get_output_size(); i++) {
-                const auto outputs = node->get_output_target_inputs(i);
-                for (const auto &out : outputs) {
-                    if (!ov::op::util::is_output(out.get_node())) {
-                        return false;
-                    }
-                }
-            }
-            return true;
-        };
-
-        pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
-        pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
-        pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
-    }
+    // NMS-alike nodes are always transformed to NMSIEInternal node in case of legacy api, for compatibility.
+    // And on the other hand in case of api 2.0, keep them internal dynamic for better performance and functionality.
+    auto nmsCallback = [isLegacyApi](const_node_ptr &node) -> bool {
+        return isLegacyApi ?  false : true;
+    };
+    pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
+    pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
+    pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
 
     // List of enabled/disabled transformations
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/matrix_nms.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/matrix_nms.cpp
index 7530d6a08f90d6..c4c42cf5b6d946 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/matrix_nms.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/matrix_nms.cpp
@@ -48,6 +48,8 @@ const std::vector<bool> normalized = {true, false};
 const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
                                                 op::v8::MatrixNms::DecayFunction::LINEAR};
 
+const std::vector<bool> outStaticShape = {false};   // always be false for cpu plugin with ov2.0.
+
 const auto nmsParamsStatic = ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
                                                 ::testing::Combine(::testing::Values(ov::element::f32),
                                                                    ::testing::Values(ov::element::i32),
@@ -59,6 +61,7 @@ const auto nmsParamsStatic = ::testing::Combine(::testing::ValuesIn(ov::test::st
                                                 ::testing::ValuesIn(backgroudClass),
                                                 ::testing::ValuesIn(normalized),
                                                 ::testing::ValuesIn(decayFunction),
+                                                ::testing::ValuesIn(outStaticShape),
                                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
@@ -73,6 +76,7 @@ const auto nmsParamsDynamic = ::testing::Combine(::testing::ValuesIn(inDynamicSh
                                                  ::testing::ValuesIn(backgroudClass),
                                                  ::testing::ValuesIn(normalized),
                                                  ::testing::ValuesIn(decayFunction),
+                                                ::testing::ValuesIn(outStaticShape),
                                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/multiclass_nms.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
index af65e2ca2d20d0..6956d33e79fc20 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
@@ -88,6 +88,8 @@ const std::vector<bool> sortResDesc = {true, false};
 const std::vector<float> nmsEta = {0.6f, 1.0f};
 const std::vector<bool> normalized = {true, false};
 
+const std::vector<bool> outStaticShape = {false};   // only be false for cpu plugin with ov2.0.
+
 const auto nmsParamsStatic_smoke1 = ::testing::Combine(
     ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams1)),
     ::testing::Combine(::testing::Values(ov::element::f32),
@@ -101,6 +103,7 @@ const auto nmsParamsStatic_smoke1 = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_CPU));
 
 const auto nmsParamsDynamic_smoke1 = ::testing::Combine(
@@ -116,6 +119,7 @@ const auto nmsParamsDynamic_smoke1 = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_CPU));
 
 INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_static1, MulticlassNmsLayerTest, nmsParamsStatic_smoke1, MulticlassNmsLayerTest::getTestCaseName);
@@ -134,6 +138,7 @@ const auto nmsParamsStatic_smoke2 = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_CPU));
 
 const auto nmsParamsDynamic_smoke2 = ::testing::Combine(
@@ -149,6 +154,7 @@ const auto nmsParamsDynamic_smoke2 = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_CPU));
 
 INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_static2, MulticlassNmsLayerTest, nmsParamsStatic_smoke2, MulticlassNmsLayerTest::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 3b1aaa55cd1a99..acbd2310908f61 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -157,10 +157,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*Nms.*)",
         // Issue: 76980
         R"(.*smoke_Auto_BehaviorTests.*InferDynamicNetwork/.*)",
-        // enable after other plugins support nms9 as setup with nms5 in
-        // tests/functional/shared_test_classes/include/shared_test_classes/single_layer/non_max_suppression.hpp
-        // is shared across plugins
-        // passed local test and cpu has specific test cases with nms9 to cover
+        // Issue: 105838
         R"(smoke_NmsLayerTest.*)",
         // Issue: 95590
         R"(.*CachingSupportCase.*CompileModelCacheTestBase.*(TIwithLSTMcell1|MatMulBias|2InputSubtract)_(u|i).*)",
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
index edecb942091ca6..b1a606a248cc77 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
@@ -30,6 +30,8 @@ const std::vector<bool> normalized = {true, false};
 const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
                                                                      op::v8::MatrixNms::DecayFunction::LINEAR};
 
+const std::vector<bool> outStaticShape = {true};   // only be true as gpu plugin not support nms with internal dynamic yet.
+
 const auto nmsParamsStatic =
     ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
                        ::testing::Values(inputPrecisions),
@@ -40,6 +42,7 @@ const auto nmsParamsStatic =
                        ::testing::ValuesIn(backgroudClass),
                        ::testing::ValuesIn(normalized),
                        ::testing::ValuesIn(decayFunction),
+                       ::testing::ValuesIn(outStaticShape),
                        ::testing::Values(CommonTestUtils::DEVICE_GPU));
 
 INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_static,
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
index a7cff946b1f5ff..63de2b7dc00333 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
@@ -41,6 +41,8 @@ const std::vector<bool> sortResDesc = {true, false};
 const std::vector<float> nmsEta = {0.6f, 1.0f};
 const std::vector<bool> normalized = {true, false};
 
+const std::vector<bool> outStaticShape = {true};   // only be true as gpu plugin not support nms with internal dynamic yet.
+
 const auto params_v9_2Inputs = ::testing::Combine(
     ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes2Inputs)),
     ::testing::Combine(::testing::Values(ov::element::f32),
@@ -54,6 +56,7 @@ const auto params_v9_2Inputs = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_GPU));
 
 
@@ -75,6 +78,7 @@ const auto params_v9_3Inputs = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_GPU));
 
 INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_v9_3inputs,
@@ -95,6 +99,7 @@ const auto params_v8 = ::testing::Combine(
     ::testing::ValuesIn(outType),
     ::testing::ValuesIn(sortResultType),
     ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::ValuesIn(outStaticShape),
     ::testing::Values(CommonTestUtils::DEVICE_GPU));
 
 
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
index e525a7162466d2..03e68e4a2c004c 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
@@ -36,6 +36,7 @@ using NmsParams = std::tuple<std::vector<InputShape>,
                              int,                                                // Background class id
                              bool,                                               // If boxes are normalized
                              ngraph::op::v8::MatrixNms::DecayFunction,           // Decay function
+                             bool,                                               // make output shape static
                              std::string>;                                       // Device name
 
 class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>,
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
index 516d9490fcfea7..a425db3ced0e02 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
@@ -38,6 +38,7 @@ using MulticlassNmsParams = std::tuple<std::vector<InputShape>,
                                        ngraph::element::Type,                      // Output type
                                        ngraph::op::util::MulticlassNmsBase::SortResultType,  // SortResultType
                                        InputboolVar,                               // Sort result across batch, normalized
+                                       bool,                                       // make output shape static
                                        std::string>;
 
 class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>,
diff --git a/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp b/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
index 9c7498662e93f7..995574a726331c 100644
--- a/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
@@ -27,9 +27,10 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
     TopKParams topKParams;
     ThresholdParams thresholdParams;
     bool normalized;
+    bool outStaticShape;
     std::string targetDevice;
     std::tie(shapes, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
-        backgroudClass, normalized, decayFunction, targetDevice) = obj.param;
+        backgroudClass, normalized, decayFunction, outStaticShape, targetDevice) = obj.param;
 
     ElementType paramsPrec, maxBoxPrec, thrPrec;
     std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
@@ -57,7 +58,7 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
     result << "outType=" << outType << "_nmsTopK=" << nmsTopK << "_keepTopK=" << keepTopK << "_";
     result << "backgroudClass=" << backgroudClass << "_decayFunction=" << decayFunction << "_";
     result << "score_threshold=" << score_threshold << "_gaussian_sigma=" << gaussian_sigma << "_";
-    result << "post_threshold=" << post_threshold << "_TargetDevice=" << targetDevice;
+    result << "post_threshold=" << post_threshold << "_outStaticShape=" << outStaticShape <<"_TargetDevice=" << targetDevice;
     return result.str();
 }
 
@@ -305,20 +306,13 @@ void MatrixNmsLayerTest::SetUp() {
     ThresholdParams thresholdParams;
 
     std::tie(shapes, inPrecisions, m_attrs.sort_result_type, m_attrs.output_type, topKParams, thresholdParams,
-        m_attrs.background_class, m_attrs.normalized, m_attrs.decay_function, targetDevice) = this->GetParam();
+        m_attrs.background_class, m_attrs.normalized, m_attrs.decay_function, m_outStaticShape, targetDevice) = this->GetParam();
 
     std::tie(m_attrs.nms_top_k, m_attrs.keep_top_k) = topKParams;
     std::tie(m_attrs.score_threshold, m_attrs.gaussian_sigma, m_attrs.post_threshold) = thresholdParams;
 
     init_input_shapes(shapes);
 
-    // input is dynamic shape -> output will be dynamic shape
-    // input is static shape -> output will be static shape
-    const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
-    m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
-        return shape.rank() == 0;
-    });
-
     ElementType paramsPrec, maxBoxPrec, thrPrec;
     std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
     const auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
@@ -326,22 +320,7 @@ void MatrixNmsLayerTest::SetUp() {
             ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
     auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], m_attrs);
 
-    if (targetDevice == CommonTestUtils::DEVICE_GPU) {
-        function = std::make_shared<Function>(nms, params, "MatrixNMS");
-    } else if (!m_outStaticShape) {
-        auto result = std::make_shared<opset5::Result>(nms);
-        function = std::make_shared<Function>(result, params, "MatrixNMS");
-    } else {
-        auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
-        auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
-        auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
-        OutputVector results = {
-            std::make_shared<opset5::Result>(nms_0_identity),
-            std::make_shared<opset5::Result>(nms_1_identity),
-            std::make_shared<opset5::Result>(nms_2_identity)
-        };
-        function = std::make_shared<Function>(results, params, "MatrixNMS");
-    }
+    function = std::make_shared<Function>(nms, params, "MatrixNMS");
 }
 
 } // namespace subgraph
diff --git a/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp b/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
index 16fa42c1a4daab..82aba72a0bf63e 100644
--- a/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
@@ -28,9 +28,11 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
     InputfloatVar inFloatVar;
     InputboolVar inboolVar;
 
+    bool outputStatic;
+
     std::string targetDevice;
 
-    std::tie(shapes, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;
+    std::tie(shapes, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, outputStatic, targetDevice) = obj.param;
 
     ElementType paramsPrec, roisnumPrec, maxBoxPrec, thrPrec;
     std::tie(paramsPrec, roisnumPrec, maxBoxPrec, thrPrec) = inPrecisions;
@@ -58,6 +60,7 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
     result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_backgroundClass=" << backgroundClass << "_";
     result << "keepTopK=" << keepTopK << "_outType=" << outType << "_";
     result << "sortResultType=" << sortResultType << "_sortResCrossBatch=" << sortResCB << "_nmsEta=" << nmsEta << "_normalized=" << normalized << "_";
+    result << "outputStatic=" << outputStatic;
     result << "TargetDevice=" << targetDevice;
     return result.str();
 }
@@ -348,18 +351,11 @@ void MulticlassNmsLayerTest::SetUp() {
     InputfloatVar inFloatVar;
     InputboolVar inboolVar;
 
-    std::tie(shapes, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
-        this->GetParam();
+    std::tie(shapes, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar,
+             m_outStaticShape, targetDevice) = this->GetParam();
 
     init_input_shapes(shapes);
 
-    // input is dynamic shape -> output will be dynamic shape
-    // input is static shape -> output will be static shape
-    const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
-    m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
-        return shape.rank() == 0;
-    });
-
     ElementType paramsPrec, roisnumPrec, maxBoxPrec, thrPrec;
     std::tie(paramsPrec, roisnumPrec, maxBoxPrec, thrPrec) = inPrecisions;
 
@@ -391,26 +387,7 @@ void MulticlassNmsLayerTest::SetUp() {
 
     const auto nms = CreateNmsOp(paramOuts);
 
-    if (targetDevice == CommonTestUtils::DEVICE_GPU) {
-        function = std::make_shared<Function>(nms, params, "MulticlassNMS");
-    } else if (!m_outStaticShape) {
-        OutputVector results = {
-            std::make_shared<opset5::Result>(nms->output(0)),
-            std::make_shared<opset5::Result>(nms->output(1)),
-            std::make_shared<opset5::Result>(nms->output(2))
-        };
-        function = std::make_shared<Function>(results, params, "MulticlassNMS");
-    } else {
-        auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(paramsPrec, Shape {1}, {1}));
-        auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
-        auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
-        OutputVector results = {
-            std::make_shared<opset5::Result>(nms_0_identity),
-            std::make_shared<opset5::Result>(nms_1_identity),
-            std::make_shared<opset5::Result>(nms_2_identity)
-        };
-        function = std::make_shared<Function>(results, params, "MulticlassNMS");
-    }
+    function = std::make_shared<Function>(nms, params, "MulticlassNMS");
 }
 
 } // namespace subgraph

From bb9de29062164938c05cad10658a3005535a9b48 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Mon, 27 Mar 2023 12:03:07 +0400
Subject: [PATCH 106/296] [TF FE] Add layer test for Bucketize (#16556)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../tensorflow_tests/test_tf_Bucketize.py     | 97 +++++--------------
 1 file changed, 25 insertions(+), 72 deletions(-)

diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
index a1a4e4fadcea95..bf0c2ef7bc3b85 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
@@ -4,90 +4,43 @@
 import numpy as np
 import pytest
 import tensorflow as tf
-from common.layer_test_class import check_ir_version
 from common.tf_layer_test_class import CommonTFLayerTest
 
-from unit_tests.utils.graph import build_graph
-
 
 class TestBucketize(CommonTFLayerTest):
-    def create_bucketize_net(self, input_shape, input_type, boundaries_size, ir_version,
-                             use_new_frontend):
-        """
-            Tensorflow net:                     IR net:
-                 Input            =>      Input        Boundaries
-                   |                           \       /
-               Bucketize                       Bucketize
-           {attrs: boundaries}
-        """
-
+    def _prepare_input(self, inputs_info):
+        assert 'input' in inputs_info, "Test error: inputs_info must contain `input`"
+        input_shape = inputs_info['input']
+        input_type = self.input_type
+        inputs_data = {}
+        input_data = np.random.randint(-20, 20, input_shape).astype(input_type)
+        inputs_data['input'] = input_data
+        return inputs_data
+
+    def create_bucketize_net(self, input_shape, input_type, boundaries_size):
+        self.input_type = input_type
         tf.compat.v1.reset_default_graph()
         with tf.compat.v1.Session() as sess:
-            x = tf.compat.v1.placeholder(input_type, input_shape, 'Input')
-            constant_value = np.arange(-boundaries_size * 5, boundaries_size * 5, 10,
-                                       dtype=np.float32)
-            # TODO: Bucketize is not tested here. Need to re-write the test
+            input = tf.compat.v1.placeholder(input_type, input_shape, 'input')
+            # generate boundaries list
+            boundaries = np.sort(np.unique(np.random.randint(-200, 200, [boundaries_size]).astype(np.float32))).tolist()
+            tf.raw_ops.Bucketize(input=input, boundaries=boundaries)
             tf.compat.v1.global_variables_initializer()
             tf_net = sess.graph_def
 
-        # create reference IR net
-        ref_net = None
-
-        if check_ir_version(10, None, ir_version) and not use_new_frontend:
-            nodes_attributes = {
-                'input': {'kind': 'op', 'type': 'Parameter'},
-                'input_data': {'shape': input_shape, 'kind': 'data'},
-                'boundaries_input_data': {'shape': constant_value.shape, 'kind': 'data'},
-                'boundaries': {'type': 'Const', 'kind': 'op'},
-                'boundaries_data': {'kind': 'data', 'shape': constant_value.shape},
-                'bucketize': {'kind': 'op', 'type': 'Bucketize'},
-                'bucketize_data': {'shape': input_shape, 'kind': 'data'},
-                'result': {'kind': 'op', 'type': 'Result'}
-            }
-
-            ref_net = build_graph(nodes_attributes,
-                                  [('input', 'input_data'),
-                                   ('input_data', 'bucketize', {'in': 0}),
-                                   ('boundaries_input_data', 'boundaries'),
-                                   ('boundaries', 'boundaries_data'),
-                                   ('boundaries_data', 'bucketize', {'in': 1}),
-                                   ('bucketize', 'bucketize_data'),
-                                   ('bucketize_data', 'result')
-                                   ])
-
-        return tf_net, ref_net
-
-    test_data_float32 = [
-        dict(input_shape=[5], input_type=tf.float32, boundaries_size=1),
-        dict(input_shape=[5], input_type=tf.float32, boundaries_size=3),
-        pytest.param(dict(input_shape=[4, 8], input_type=tf.float32, boundaries_size=5),
-                     marks=pytest.mark.precommit_tf_fe),
-        dict(input_shape=[2, 4, 7], input_type=tf.float32, boundaries_size=10),
-        dict(input_shape=[2, 4, 7, 8], input_type=tf.float32, boundaries_size=12),
-        dict(input_shape=[2, 4, 7, 8, 10], input_type=tf.float32, boundaries_size=14)]
-
-    @pytest.mark.parametrize("params", test_data_float32)
-    @pytest.mark.nightly
-    def test_bucketize_float32(self, params, ie_device, precision, ir_version, temp_dir,
-                               use_new_frontend, use_old_api):
-        self._test(*self.create_bucketize_net(**params, ir_version=ir_version,
-                                              use_new_frontend=use_new_frontend),
-                   ie_device, precision, ir_version, temp_dir=temp_dir,
-                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
+        return tf_net, None
 
-    test_data_int32 = [
-        dict(input_shape=[5], input_type=tf.int32, boundaries_size=1),
-        dict(input_shape=[5], input_type=tf.int32, boundaries_size=3),
-        dict(input_shape=[4, 8], input_type=tf.int32, boundaries_size=5),
-        dict(input_shape=[2, 4, 7], input_type=tf.int32, boundaries_size=10),
-        dict(input_shape=[2, 4, 7, 8], input_type=tf.float32, boundaries_size=12),
-        dict(input_shape=[2, 4, 7, 8, 10], input_type=tf.float32, boundaries_size=14)]
+    test_data_basic = [
+        dict(input_shape=[5], input_type=np.int32, boundaries_size=1),
+        dict(input_shape=[3, 4], input_type=np.float32, boundaries_size=0),
+        dict(input_shape=[2, 3, 4], input_type=np.float32, boundaries_size=300),
+    ]
 
-    @pytest.mark.parametrize("params", test_data_int32)
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
     @pytest.mark.nightly
-    def test_bucketize_int32(self, params, ie_device, precision, ir_version, temp_dir,
+    def test_bucketize_basic(self, params, ie_device, precision, ir_version, temp_dir,
                              use_new_frontend, use_old_api):
-        self._test(*self.create_bucketize_net(**params, ir_version=ir_version,
-                                              use_new_frontend=use_new_frontend),
+        self._test(*self.create_bucketize_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_new_frontend=use_new_frontend, use_old_api=use_old_api)

From 7d16ee18352e78d37acac39fd11115f00194f6ed Mon Sep 17 00:00:00 2001
From: Mateusz Mikolajczyk <mateusz.mikolajczyk@intel.com>
Date: Mon, 27 Mar 2023 11:13:32 +0200
Subject: [PATCH 107/296] [PT FE] Add torchvision::deform_conv2d translation
 (#16450)

* Initial commit

* Initial commit

* Cleanup

* Improve tests

* Make NodeContext const
---
 src/frontends/pytorch/src/op/deform_conv.cpp  |  83 ++++++++
 src/frontends/pytorch/src/op_table.cpp        |   2 +
 .../test_deformable_convolution.py            | 189 ++++++++++++++++++
 3 files changed, 274 insertions(+)
 create mode 100644 src/frontends/pytorch/src/op/deform_conv.cpp
 create mode 100644 tests/layer_tests/pytorch_tests/test_deformable_convolution.py

diff --git a/src/frontends/pytorch/src/op/deform_conv.cpp b/src/frontends/pytorch/src/op/deform_conv.cpp
new file mode 100644
index 00000000000000..1a9ae5900ce738
--- /dev/null
+++ b/src/frontends/pytorch/src/op/deform_conv.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/deformable_convolution.hpp"
+#include "pt_framework_node.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace op {
+
+using namespace ov::op;
+
+OutputVector translate_deform_conv(const NodeContext& context) {
+    // torchvision::deform_conv2d(Tensor input, Tensor weight, Tensor offset,
+    //                            Tensor mask, Tensor bias, int64_t stride_h, int64_t stride_w,
+    //                            int64_t pad_h, int64_t pad_w, int64_t dilation_h, int64_t dilation_w,
+    //                            int64_t n_weight_grps, int64_t n_offset_grps, bool use_mask) -> Tensor
+    num_inputs_check(context, 14, 14);
+    auto pt_input = context.get_input(0);
+    auto pt_weight = context.get_input(1);
+    auto pt_offset = context.get_input(2);
+    auto pt_mask = context.get_input(3);
+
+    int32_t pt_stride_h = context.const_input<int32_t>(5);
+    int32_t pt_stride_w = context.const_input<int32_t>(6);
+    auto strides = Strides({(size_t)pt_stride_h, (size_t)pt_stride_w});
+
+    int32_t pt_pad_h = context.const_input<int32_t>(7);
+    int32_t pt_pad_w = context.const_input<int32_t>(8);
+    auto pads = CoordinateDiff({pt_pad_h, pt_pad_w});
+
+    int32_t pt_dilation_h = context.const_input<int32_t>(9);
+    int32_t pt_dilation_w = context.const_input<int32_t>(10);
+    auto dilations = Strides({(size_t)pt_dilation_h, (size_t)pt_dilation_w});
+
+    int32_t pt_n_weight_grps = context.const_input<int32_t>(11);
+    int32_t pt_n_offset_grps = context.const_input<int32_t>(12);
+    bool pt_use_mask = context.const_input<bool>(13);
+
+    std::shared_ptr<ov::Node> deformable_convolution;
+    if (!pt_use_mask) {
+        deformable_convolution = context.mark_node(std::make_shared<v8::DeformableConvolution>(pt_input,
+                                                                                               pt_offset,
+                                                                                               pt_weight,
+                                                                                               strides,
+                                                                                               pads,
+                                                                                               pads,
+                                                                                               dilations,
+                                                                                               PadType::EXPLICIT,
+                                                                                               pt_n_weight_grps,
+                                                                                               pt_n_offset_grps,
+                                                                                               true));
+    } else {
+        deformable_convolution = context.mark_node(std::make_shared<v8::DeformableConvolution>(pt_input,
+                                                                                               pt_offset,
+                                                                                               pt_weight,
+                                                                                               pt_mask,
+                                                                                               strides,
+                                                                                               pads,
+                                                                                               pads,
+                                                                                               dilations,
+                                                                                               PadType::EXPLICIT,
+                                                                                               pt_n_weight_grps,
+                                                                                               pt_n_offset_grps,
+                                                                                               true));
+    }
+
+    if (!context.input_is_none(4)) {
+        auto bias = context.get_input(4);
+        bias = reshape_channelwise(context, bias, deformable_convolution);
+        deformable_convolution = context.mark_node(std::make_shared<v1::Add>(deformable_convolution, bias));
+    }
+    return {context.mark_output(deformable_convolution)};
+}
+}  // namespace op
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
index 80e1db8b3544e4..590c8e6706a1ba 100644
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -34,6 +34,7 @@ OP_CONVERTER(translate_convnd);
 OP_CONVERTER(translate_convolution);
 OP_CONVERTER(translate_convolution_mode);
 OP_CONVERTER(translate_cumsum);
+OP_CONVERTER(translate_deform_conv);
 OP_CONVERTER(translate_dim);
 OP_CONVERTER(translate_div);
 OP_CONVERTER(translate_elu);
@@ -348,6 +349,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"prim::requires_grad", op::return_false_scalar},
         {"prim::PythonOp", op::translate_pythonop},
         {"prim::type", op::skip_node},  // Used with prim::device, pass PtFrameworkNode.
+        {"torchvision::deform_conv2d", op::translate_deform_conv},
         {"torchvision::nms", op::translate_nms},
         {"torchvision::roi_align", op::translate_roi_align},
     };
diff --git a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py
new file mode 100644
index 00000000000000..8fa207efe85b41
--- /dev/null
+++ b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py
@@ -0,0 +1,189 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from copy import deepcopy
+
+import numpy as np
+import pytest
+import torch
+from pytorch_layer_test_class import PytorchLayerTest
+from torchvision.ops import deform_conv2d
+
+
+def xfail_106712(test_param):
+    return pytest.param(
+        test_param,
+        marks=pytest.mark.xfail(
+            reason="Depending on number of groups and number of output channels, deformable convolution may return incorrect reasults. Ticket 106712"
+        ),
+    )
+
+
+params = [
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 64, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 62, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (2, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 66, 64],
+        "stride": (1, 1),
+        "padding": (2, 1),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 32, 64],
+        "stride": (2, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 62, 62],
+        "stride": (1, 1),
+        "padding": (0, 0),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 18, 66, 66],
+        "stride": (1, 1),
+        "padding": (2, 2),
+        "dilation": (1, 1),
+    },
+    xfail_106712(
+        {
+            "weights_shape": [64, 16, 3, 3],
+            "offset_shape": [1, 18, 64, 64],
+            "stride": (1, 1),
+            "padding": (1, 1),
+            "dilation": (1, 1),
+        }
+    ),
+    {
+        "weights_shape": [60, 16, 3, 3],
+        "offset_shape": [1, 18, 64, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 1, 3, 3],
+        "offset_shape": [1, 18, 64, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [64, 64, 3, 3],
+        "offset_shape": [1, 36, 64, 64],
+        "stride": (1, 1),
+        "padding": (1, 1),
+        "dilation": (1, 1),
+    },
+    xfail_106712(
+        {
+            "weights_shape": [64, 32, 3, 3],
+            "offset_shape": [1, 36, 68, 68],
+            "stride": (1, 1),
+            "padding": (3, 3),
+            "dilation": (1, 1),
+        }
+    ),
+    {
+        "weights_shape": [62, 32, 3, 3],
+        "offset_shape": [1, 36, 68, 68],
+        "stride": (1, 1),
+        "padding": (3, 3),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [2, 32, 3, 3],
+        "offset_shape": [1, 36, 68, 68],
+        "stride": (1, 1),
+        "padding": (3, 3),
+        "dilation": (1, 1),
+    },
+    {
+        "weights_shape": [1, 64, 3, 3],
+        "offset_shape": [1, 18, 68, 68],
+        "stride": (1, 1),
+        "padding": (3, 3),
+        "dilation": (1, 1),
+    },
+]
+
+
+class TestDeformableConvolution(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.rand(1, 64, 64, 64).astype(np.float32),)
+
+    def create_model(
+        self,
+        offset_shape,
+        weights_shape,
+        stride,
+        padding,
+        dilation,
+        bias,
+        mask,
+        mask_shape=None,
+        bias_shape=None,
+    ):
+        class aten_deform_convolution(torch.nn.Module):
+            def __init__(self):
+                super(aten_deform_convolution, self).__init__()
+                self.weight = torch.rand(weights_shape)
+                self.offset = torch.rand(offset_shape)
+                if mask_shape is None:
+                    self.mask_shape = deepcopy(offset_shape)
+                    self.mask_shape[1] = self.mask_shape[1] // 2
+                else:
+                    self.mask_shape = mask_shape
+                if mask:
+                    self.mask = torch.rand(self.mask_shape)
+                else:
+                    self.mask = None
+                self.stride = stride
+                self.padding = padding
+                self.dilation = dilation
+                self.bias_shape = bias_shape
+                if self.bias_shape is None:
+                    self.bias_shape = weights_shape[0]
+                self.bias = torch.rand(self.bias_shape) if bias else None
+
+            def forward(self, x):
+                return deform_conv2d(
+                    x,
+                    self.offset,
+                    self.weight,
+                    bias=self.bias,
+                    mask=self.mask,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    padding=self.padding,
+                )
+
+        ref_net = None
+        return aten_deform_convolution(), ref_net, "torchvision::deform_conv2d"
+
+    @pytest.mark.parametrize("params", params)
+    @pytest.mark.parametrize("bias", [True, False])
+    @pytest.mark.parametrize("mask", [True, False])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_deformable_convolution2d(self, params, bias, mask, ie_device, precision, ir_version):
+        self._test(
+            *self.create_model(**params, bias=bias, mask=mask), ie_device, precision, ir_version, trace_model=True
+        )

From 6b70c449ba4c1792ffbe679d1ad4d00abd539e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Krzemi=C5=84ski?= <piotr.krzeminski@intel.com>
Date: Mon, 27 Mar 2023 11:16:16 +0200
Subject: [PATCH 108/296] [PT FE] Add aten::Chunk implementation (#16035)

* [PT FE] Add chunk implementation:

* [PT FE] Fix chunk int64 instead of const node errors, add tests for chunking

* [PT FE] Test Chunk-If implementation

* [PT FE] Change the translate to replace chunk implementation, use VariadicSplit instead of Slice

* [PT FE] Reduce artifacts from debugging

* Update test_chunk.py

* [PT FE] Improve & debug chunk implementation:

* [PT FE] Simplify implementation, fix remaining bugs

* [PT FE] Statify the split lenghts output

* [PT FE] Clear code, remove debugging artifacts
---
 .../transforms/prim_list_unpack_replacer.cpp  | 53 ++++++++++--
 tests/layer_tests/pytorch_tests/test_chunk.py | 84 +++++++++++++++++++
 2 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 tests/layer_tests/pytorch_tests/test_chunk.py

diff --git a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
index cb7704a99a6dea..931901b19337e1 100644
--- a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp
@@ -74,14 +74,51 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() {
         }
 
         if (auto chunk = cast_fw_node(input_node, "aten::chunk")) {
-            // Using number of ListUnpack outputs instead of 1st input to chunk.
-            // TODO: confirm it works for all cases
-            auto split = std::make_shared<opset10::Split>(chunk->get_input_source_output(0),
-                                                          chunk->get_input_source_output(2),
-                                                          list_unpack->get_output_size());
-
-            copy_runtime_info({list_unpack, input_node}, split);
-            replace_node(list_unpack, split);
+            auto input_tensor = chunk->get_input_source_output(0);
+            auto chunks_i32 = chunk->get_input_source_output(1);
+            auto dim = chunk->get_input_source_output(2);
+
+            auto chunks = std::make_shared<opset10::Convert>(chunks_i32, element::i64);
+            auto const_0 = opset10::Constant::create(element::i64, Shape{1}, {0});
+            auto const_1 = opset10::Constant::create(element::i64, Shape{1}, {1});
+            auto const_0_nodim = opset10::Constant::create(element::i64, Shape{}, {0});
+            auto const_1_nodim = opset10::Constant::create(element::i64, Shape{}, {1});
+            auto const_shape = opset10::Constant::create(element::i64, Shape{1}, {list_unpack->get_output_size()});
+
+            auto input_shape = std::make_shared<opset10::ShapeOf>(input_tensor);
+            auto input_dimension = std::make_shared<opset10::Gather>(input_shape, dim, const_0);
+            auto input_size = std::make_shared<opset10::Squeeze>(input_dimension);
+
+            auto chunk_size = std::make_shared<opset10::Divide>(input_size, chunks, true);
+            auto last_chunk_size = std::make_shared<opset10::Mod>(input_size, chunks);
+            auto is_last_nonzero = std::make_shared<opset10::Greater>(last_chunk_size, const_0_nodim);
+            auto is_last_nonzero_int = std::make_shared<opset10::Convert>(is_last_nonzero, element::i64);
+
+            auto computed_chunk_size = std::make_shared<opset10::Add>(chunk_size, is_last_nonzero_int);
+            auto computed_chunk_size_incr = std::make_shared<opset10::Add>(computed_chunk_size, const_1_nodim);
+            auto computed_last_chunk_size = std::make_shared<opset10::Mod>(input_size, computed_chunk_size);
+            auto computed_is_last_nonzero = std::make_shared<opset10::Greater>(computed_last_chunk_size, const_0_nodim);
+            auto computed_is_last_nonzero_int =
+                std::make_shared<opset10::Convert>(computed_is_last_nonzero, element::i64);
+            auto computed_is_last_nonzero_int_unsq =
+                std::make_shared<opset10::Unsqueeze>(computed_is_last_nonzero_int, const_0);
+            auto computed_chunks = std::make_shared<opset10::Divide>(input_size, computed_chunk_size, true);
+            auto computed_chunks_unsq = std::make_shared<opset10::Unsqueeze>(computed_chunks, const_0);
+
+            auto chunk_lengths = std::make_shared<opset10::RandomUniform>(computed_chunks_unsq,
+                                                                          computed_chunk_size,
+                                                                          computed_chunk_size_incr,
+                                                                          element::i64);
+            auto split_lengths = std::make_shared<opset10::Pad>(chunk_lengths,
+                                                                const_0,
+                                                                computed_is_last_nonzero_int_unsq,
+                                                                computed_last_chunk_size,
+                                                                ov::op::PadMode::CONSTANT);
+            auto split_lengths_static = std::make_shared<opset10::Reshape>(split_lengths, const_shape, false);
+            auto sliced_chunks = std::make_shared<opset10::VariadicSplit>(input_tensor, dim, split_lengths_static);
+
+            copy_runtime_info({list_unpack, input_node}, sliced_chunks);
+            replace_node(list_unpack, sliced_chunks);
 
             return true;
         }
diff --git a/tests/layer_tests/pytorch_tests/test_chunk.py b/tests/layer_tests/pytorch_tests/test_chunk.py
new file mode 100644
index 00000000000000..76b2776b10ad48
--- /dev/null
+++ b/tests/layer_tests/pytorch_tests/test_chunk.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import torch
+
+from pytorch_layer_test_class import PytorchLayerTest
+
+class aten_chunk_2(torch.nn.Module):
+    def __init__(self, dim) -> None:
+        torch.nn.Module.__init__(self)
+        self.dim = dim
+
+    def forward(self, input_tensor):
+        a,b = torch.chunk(input_tensor, 
+            chunks = 2,
+            dim = self.dim
+        )
+        return a,b
+
+class aten_chunk_3(torch.nn.Module):
+    def __init__(self, dim) -> None:
+        torch.nn.Module.__init__(self)
+        self.dim = dim
+
+    def forward(self, input_tensor):
+        a,b,c = torch.chunk(input_tensor, 
+            chunks = 3,
+            dim = self.dim
+        )
+        return a,b,c
+
+class aten_chunk_4(torch.nn.Module):
+    def __init__(self, dim) -> None:
+        torch.nn.Module.__init__(self)
+        self.dim = dim
+
+    def forward(self, input_tensor):
+        a,b,c,d = torch.chunk(input_tensor, 
+            chunks = 4,
+            dim = self.dim
+        )
+        return a,b,c,d
+
+class TestChunk(PytorchLayerTest):
+    def _prepare_input(self):
+        return (self.input_tensor,)
+
+    @pytest.mark.parametrize("input_tensor", [
+        np.random.rand(4, 4),
+        np.random.rand(5, 9, 7),
+        np.random.rand(10, 13, 11),
+        np.random.rand(8, 7, 6, 5, 4),
+        np.random.rand(11, 11),
+        np.random.rand(7, 7),
+    ])
+    @pytest.mark.parametrize("chunks", [
+        # 1, Does not work for 1 without translate
+        2,
+        3,
+        4
+    ])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_chunk(self, input_tensor, chunks, ie_device, precision, ir_version):
+        self.input_tensor = input_tensor
+        
+        for dim in range(len(input_tensor.shape)):
+            chunk_size = input_tensor.shape[dim] // chunks
+            chunk_size += 1 if input_tensor.shape[dim] % chunks > 0 else 0
+
+            output_chunks = input_tensor.shape[dim] // chunk_size
+            output_chunks += 1 if input_tensor.shape[dim] % chunk_size > 0 else 0
+            
+            if output_chunks == 2:
+                cls = aten_chunk_2
+            elif output_chunks == 3:
+                cls = aten_chunk_3
+            elif output_chunks == 4:
+                cls = aten_chunk_4
+
+            self._test(cls(dim), None, "aten::chunk", 
+                    ie_device, precision, ir_version)

From 5e835e327b235b17e605de7f18eaf49a7c806bc7 Mon Sep 17 00:00:00 2001
From: Mang Guo <mang.guo@intel.com>
Date: Mon, 27 Mar 2023 05:20:51 -0400
Subject: [PATCH 109/296] [CPU] Fix edge memory share issue (#16202)

---
 src/plugins/intel_cpu/src/edge.cpp            |  6 +-
 .../src/concat_reorder_inplace.cpp            | 66 +++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reorder_inplace.cpp

diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
index d502413a79bde1..4b5f784f57ec78 100644
--- a/src/plugins/intel_cpu/src/edge.cpp
+++ b/src/plugins/intel_cpu/src/edge.cpp
@@ -596,8 +596,11 @@ EdgePtr Edge::getBaseEdge(int look) {
         for (auto &ch_edge : ch_edges) {
             auto &chch_conf = ch_edge->getChild()->getSelectedPrimitiveDescriptor()->getConfig();
 
-            if (chch_conf.inConfs[ch_edge->getOutputNum()].inPlace() >= 0)
+            if (chch_conf.inConfs[ch_edge->getOutputNum()].inPlace() >= 0) {
                 next_ch_edge = ch_edge;
+                // To align with upstream-inplace, we stop searching once found the first inplace consumer
+                break;
+            }
         }
         return next_ch_edge->getBaseEdge(LOOK_DOWN);
     } else if (parentConfig.outConfs[inputNum].inPlace() >= 0 && (look & LOOK_UP)) {
@@ -614,6 +617,7 @@ EdgePtr Edge::getBaseEdge(int look) {
         for (auto edge : edges_for_same_port) {
             if (edge.get() != this) {
                 auto base = edge->getBaseEdge(LOOK_BOTH | LOOK_NO_RECURRENT);
+                // Return once found the first inplace consumer
                 if (base != edge && base != edges_for_same_port[0]) return base;
             }
         }
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reorder_inplace.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reorder_inplace.cpp
new file mode 100644
index 00000000000000..495942f7bcabef
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reorder_inplace.cpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset8.hpp>
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace CPUTestUtils;
+using namespace ngraph;
+
+namespace SubgraphTestsDefinitions {
+// Subgraph:
+/*
+ *              paramter1   parameter2
+ *                    \       /
+ *                     \     /
+ *                     Concat (inPlace)
+ *                    /   |   \
+ *                   /    |    \
+ *             Reorder Reorder Reorder (the reorder nodes are optimized and use inplace memory mode)
+ *                /       |       \
+ *               /        |        \
+ *         Multiply    Multiply    Multiply
+ *            /           |           \
+ *           /            |            \
+ *        Result        Result         Result
+ */
+
+class ConcatReorderInPlaceTest : virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    void SetUp() override {
+        const std::vector<size_t> inputShape = {1, 100, 1, 1};
+        auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShape, inputShape});
+        auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{inputParams[0], inputParams[1]}, 1);
+        const auto targetFormat = nhwc;
+        auto mul1 = std::make_shared<ngraph::opset8::Multiply>(
+            concat,
+            ngraph::builder::makeConstant(ngraph::element::f32, Shape{1}, std::vector<float>{4}));
+        mul1->get_rt_info() = CPUTestsBase::makeCPUInfo({targetFormat}, {targetFormat}, {});
+        auto mul2 = std::make_shared<ngraph::opset8::Multiply>(
+            concat,
+            ngraph::builder::makeConstant(ngraph::element::f32, Shape{1}, std::vector<float>{5}));
+        mul2->get_rt_info() = CPUTestsBase::makeCPUInfo({targetFormat}, {targetFormat}, {});
+        auto mul3 = std::make_shared<ngraph::opset8::Multiply>(
+            concat,
+            ngraph::builder::makeConstant(ngraph::element::f32, Shape{1}, std::vector<float>{6}));
+        mul3->get_rt_info() = CPUTestsBase::makeCPUInfo({targetFormat}, {targetFormat}, {});
+
+        ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(mul1),
+                                     std::make_shared<ngraph::opset8::Result>(mul2),
+                                     std::make_shared<ngraph::opset8::Result>(mul3)};
+        function = std::make_shared<ngraph::Function>(results, inputParams, "ConcatReorderInPlace");
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+    }
+};
+
+namespace {
+TEST_F(ConcatReorderInPlaceTest, smoke_ConcatReorderInPlace_CPU) {
+    Run();
+}
+}  // namespace
+}  // namespace SubgraphTestsDefinitions

From 4936d4bb1d655087d1ce1e69ab73d02ec0bfce01 Mon Sep 17 00:00:00 2001
From: Tomasz Adamowicz <tomasz.adamowicz@intel.com>
Date: Mon, 27 Mar 2023 11:42:34 +0200
Subject: [PATCH 110/296] [GNA] Introduce 16Byte memory alignment for LNL
 (GNA3.6) (#16363)

* [GNA] Introduce 16Byte memory alignment for LNL (GNA3.6)

* update after review
---
 .../intel_gna/src/backend/gna_limitations.cpp |  14 ++
 .../intel_gna/src/backend/gna_limitations.hpp |   5 +-
 src/plugins/intel_gna/src/gna_device.cpp      |   5 +-
 src/plugins/intel_gna/src/gna_device.hpp      |   4 +-
 src/plugins/intel_gna/src/gna_plugin.cpp      |   7 +-
 .../tests/unit/gna_memory_alignment.cpp       | 164 ++++++++++++++++++
 6 files changed, 190 insertions(+), 9 deletions(-)
 create mode 100644 src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp

diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.cpp b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
index d9927ceb319f53..4636201be17018 100644
--- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
@@ -41,6 +41,20 @@ const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_typ
                                                                                       ov::element::i16,
                                                                                       ov::element::f32};
 
+size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
+    static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
+        {target::DeviceVersion::GNA1_0, 64},
+        {target::DeviceVersion::GNA2_0, 64},
+        {target::DeviceVersion::GNA3_0, 64},
+        {target::DeviceVersion::GNA3_1, 64},
+        {target::DeviceVersion::GNA3_5, 64},
+        {target::DeviceVersion::GNAEmbedded3_5, 64},
+        {target::DeviceVersion::GNA3_6, 16},
+        {target::DeviceVersion::GNA4_0, 16}};
+
+    return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
+}
+
 bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
     if (supported_parameter_types.count(elem_type) == 0) {
         if (is_exception_allowed) {
diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.hpp b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
index 09dfca3223e352..2d84cde3c440c2 100644
--- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
@@ -11,6 +11,7 @@
 #include <ie_algorithm.hpp>
 
 #include "common/gna_target.hpp"
+#include "common/misc_utils.hpp"
 #include "dnn_types.hpp"
 #include "gna_lib_ver_selector.hpp"
 #include "legacy/ngraph_ops/convolution_ie.hpp"
@@ -55,7 +56,7 @@ constexpr uint32_t bytesPerSplitElement = 2;
 // In fp32 mode this is not necessary but is useful for testing
 constexpr uint32_t bytesPerCropElement = 2;
 
-constexpr uint32_t kMemoryAlignmentBytes = 64;
+constexpr uint32_t kMemoryPageSize = 4096;
 
 inline bool isCropAffinedOffset(size_t numberOfElements) {
     const auto cropOffset = numberOfElements * bytesPerCropElement;
@@ -78,6 +79,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
     return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
 }
 
+size_t getMemoryAlignmentBytes(target::DeviceVersion target);
+
 class SupportedElementTypes {
 public:
     static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
diff --git a/src/plugins/intel_gna/src/gna_device.cpp b/src/plugins/intel_gna/src/gna_device.cpp
index 5545e5ef0ee96d..6fa38adaf5651f 100644
--- a/src/plugins/intel_gna/src/gna_device.cpp
+++ b/src/plugins/intel_gna/src/gna_device.cpp
@@ -38,7 +38,8 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
     : target(targetIn),
       nGnaDeviceIndex{selectGnaDevice()},
       useDeviceEmbeddedExport(deviceEmbedded),
-      isPerformanceMeasuring(isPerformanceMeasuring) {
+      isPerformanceMeasuring(isPerformanceMeasuring),
+      m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
     per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
     per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
     open();
@@ -48,8 +49,6 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
     GetGnaLibraryVersion();
 
     maxLayersCount_ = retrieveMaxLayersCount();
-
-    m_mem_alignment = limitations::kMemoryAlignmentBytes;
 }
 
 GNADeviceHelper::~GNADeviceHelper() {
diff --git a/src/plugins/intel_gna/src/gna_device.hpp b/src/plugins/intel_gna/src/gna_device.hpp
index f8c5f8b8b66043..1f6d12c0a35d57 100644
--- a/src/plugins/intel_gna/src/gna_device.hpp
+++ b/src/plugins/intel_gna/src/gna_device.hpp
@@ -47,7 +47,6 @@ class GNADeviceHelper : public GNADevice {
     uint32_t nGnaDeviceIndex = 0;
     bool useDeviceEmbeddedExport = false;
     uint32_t maxLayersCount_ = 0;
-    size_t m_mem_alignment = 0;
 
     static const uint32_t TotalGna2InstrumentationPoints = 2;
     Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@@ -68,6 +67,7 @@ class GNADeviceHelper : public GNADevice {
     uint64_t debugLogIndexRequestWait = 0;
     static constexpr const char* kDumpExt = ".bin";
     static constexpr const char* kDumpDelimiter = ".";
+    const size_t m_mem_alignment;
 
 public:
     explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
@@ -128,7 +128,7 @@ class GNADeviceHelper : public GNADevice {
         return allAllocations;
     }
 
-    const size_t getMemAlignment() const {
+    size_t getMemAlignment() const {
         return m_mem_alignment;
     }
 
diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp
index 61dab542b600ed..85977792ecea06 100644
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@@ -375,9 +375,10 @@ void GNAPlugin::InitGNADevice() {
         gnadevice = std::make_shared<GNADeviceHelper>(config.target,
                                                       gnaFlags->performance_counting,
                                                       !config.embedded_export_path.empty());
-        size_t page_size_bytes = 4096;
-        size_t mem_alignment = gnadevice->getMemAlignment();
-        gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), mem_alignment, page_size_bytes);
+
+        gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
+                                                     gnadevice->getMemAlignment(),
+                                                     limitations::kMemoryPageSize);
     }
     graphCompiler.setGNAMemoryPtr(gnamem);
 }
diff --git a/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp b/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
new file mode 100644
index 00000000000000..7530644a1f5955
--- /dev/null
+++ b/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "any_copy.hpp"
+#include "backend/gna_limitations.hpp"
+#include "common/gna_target.hpp"
+#include "gna_data_types.hpp"
+#include "gna_plugin.hpp"
+#include "memory/gna_memory.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace InferenceEngine;
+using namespace ov::intel_gna::target;
+using namespace ov::intel_gna::limitations;
+namespace testing {
+
+using MemAlignmentTestParams =
+    std::tuple<ExecutionMode,                    // execution mode
+               HWGeneration,                     // compile target
+               std::pair<ngraph::Shape, size_t>  // input shape vs expected memory size of the input region in bytes.
+                                                 // For this specific model and when the value of input_shape_H = 1,
+                                                 // the memory input region size can be calculated using below formula:
+                                                 // mem_input_region_size = ALIGN8(input_shape_W)*inputPrecInBytes.
+                                                 // Refer to GNAGraphCompiler::AffinePrimitive for more details.
+               >;
+
+const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_fp32{{{1, 2}, 32},
+                                                                                  {{1, 8}, 32},
+                                                                                  {{1, 9}, 64}};
+
+const std::vector<std::pair<ngraph::Shape, size_t>> param_64B_alignment_prec_int16{{{1, 2}, 64},
+                                                                                   {{1, 32}, 64},
+                                                                                   {{1, 33}, 128}};
+
+const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_int16{{{1, 2}, 16},
+                                                                                   {{1, 8}, 16},
+                                                                                   {{1, 9}, 32},
+                                                                                   {{1, 33}, 80}};
+
+class GNAPluginForMemoryAlignmentTest : public GNAPlugin {
+public:
+    GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
+        if (gnadevice) {
+            gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
+                                              gnadevice->getMemAlignment(),
+                                              limitations::kMemoryPageSize));
+            graphCompiler.setGNAMemoryPtr(gnamem);
+            gnadevice.reset();
+        }
+    }
+
+    const size_t get_memory_REGION_INPUTS_size() const {
+        return this->gnamem->getQueue(ov::intel_gna::memory::REGION_INPUTS)->calcSize();
+    }
+};
+
+class GNAPluginLoadNetworkTests : public ::testing::TestWithParam<MemAlignmentTestParams> {
+public:
+    static std::string GetTestCaseName(const testing::TestParamInfo<MemAlignmentTestParams>& obj) {
+        ExecutionMode exe_mode;
+        HWGeneration hw_gen;
+        std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
+        tie(exe_mode, hw_gen, inp_shape_vs_mem) = obj.param;
+
+        std::ostringstream result;
+        result << "inp=" << inp_shape_vs_mem.first.to_string() << "_";
+        result << "mem_region_size=" << inp_shape_vs_mem.second;
+        return result.str();
+    }
+
+protected:
+    void Run() {
+        ExecutionMode exe_mode;
+        HWGeneration hw_gen;
+        std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
+        tie(exe_mode, hw_gen, inp_shape_vs_mem) = this->GetParam();
+        ngraph::Shape inp_shape = inp_shape_vs_mem.first;
+        size_t mem_region_size = inp_shape_vs_mem.second;
+
+        const ov::AnyMap gna_config = {ov::intel_gna::execution_mode(exe_mode), ov::intel_gna::compile_target(hw_gen)};
+
+        auto plugin = GNAPluginForMemoryAlignmentTest(any_copy(gna_config));
+        auto function = getMulFunction(inp_shape);
+        CNNNetwork cnnNetwork(function);
+        plugin.LoadNetwork(cnnNetwork);
+        EXPECT_EQ(plugin.get_memory_REGION_INPUTS_size(), mem_region_size);
+    }
+
+    void SetUp() override {
+        test_params = GetParam();
+    }
+
+private:
+    std::shared_ptr<ov::Model> getMulFunction(const ngraph::Shape input_shape) {
+        const ngraph::element::Type net_precision = ngraph::element::f32;
+
+        auto input = std::make_shared<ngraph::opset8::Parameter>(net_precision, input_shape);
+        auto multiplier = std::make_shared<ngraph::opset8::Constant>(net_precision, input_shape);
+        auto matmul = std::make_shared<ngraph::opset8::MatMul>(input, multiplier, false, true);
+        auto result = std::make_shared<ngraph::opset8::Result>(matmul);
+        auto function = std::make_shared<ov::Model>(ov::ResultVector({result}), ov::ParameterVector({input}), "MatMul");
+        return function;
+    }
+
+    MemAlignmentTestParams test_params;
+};
+
+TEST_P(GNAPluginLoadNetworkTests, CompareInpShapeVsReservedMemRegion) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(MemoryAlignment_FP32,
+                         GNAPluginLoadNetworkTests,
+                         ::testing::Combine(::testing::Values(ExecutionMode::SW_FP32),
+                                            ::testing::Values(HWGeneration::UNDEFINED),
+                                            ::testing::ValuesIn(param_16B_alignment_prec_fp32)),
+                         GNAPluginLoadNetworkTests::GetTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_0,
+                         GNAPluginLoadNetworkTests,
+                         ::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
+                                            ::testing::Values(HWGeneration::GNA_3_0),
+                                            ::testing::ValuesIn(param_64B_alignment_prec_int16)),
+                         GNAPluginLoadNetworkTests::GetTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_5,
+                         GNAPluginLoadNetworkTests,
+                         ::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
+                                            ::testing::Values(HWGeneration::GNA_3_5),
+                                            ::testing::ValuesIn(param_64B_alignment_prec_int16)),
+                         GNAPluginLoadNetworkTests::GetTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_6,
+                         GNAPluginLoadNetworkTests,
+                         ::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
+                                            ::testing::Values(HWGeneration::GNA_3_6),
+                                            ::testing::ValuesIn(param_16B_alignment_prec_int16)),
+                         GNAPluginLoadNetworkTests::GetTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
+                         GNAPluginLoadNetworkTests,
+                         ::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
+                                            ::testing::Values(HWGeneration::GNA_4_0),
+                                            ::testing::ValuesIn(param_16B_alignment_prec_int16)),
+                         GNAPluginLoadNetworkTests::GetTestCaseName);
+
+class MemoryAlignmentTest : public ::testing::Test {};
+
+TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
+    EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
+}
+
+TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
+    EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
+}
+
+TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
+    EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
+}
+
+}  // namespace testing

From 5113a5538c8fca90d651dc8cd91c191c6f7cf45f Mon Sep 17 00:00:00 2001
From: Roman Lyamin <Roman.Lyamin@intel.com>
Date: Mon, 27 Mar 2023 15:02:06 +0400
Subject: [PATCH 111/296] [GPU] Added shape canonicalization mechanism (#16166)

---
 .../intel_gpu/primitives/fully_connected.hpp  |  16 +-
 .../include/intel_gpu/runtime/layout.hpp      |   2 +-
 src/plugins/intel_gpu/src/graph/broadcast.cpp |  82 ------
 src/plugins/intel_gpu/src/graph/eltwise.cpp   |  24 +-
 src/plugins/intel_gpu/src/graph/gemm.cpp      |  25 +-
 .../src/graph/impls/ocl/broadcast.cpp         | 229 +++++------------
 .../intel_gpu/src/graph/impls/ocl/eltwise.cpp |  40 +++
 .../src/graph/impls/ocl/fully_connected.cpp   |  11 +-
 .../intel_gpu/src/graph/impls/ocl/gather.cpp  |  11 +-
 .../intel_gpu/src/graph/impls/ocl/gemm.cpp    |  29 ++-
 .../graph/impls/ocl/kernel_selector_helper.h  |  54 ++++
 .../src/graph/impls/ocl/primitive_base.hpp    |   2 +-
 .../intel_gpu/src/graph/impls/ocl/select.cpp  |  48 ++--
 .../src/graph/impls/ocl/shape_of.cpp          |  16 ++
 .../src/graph/impls/onednn/gemm_onednn.cpp    |   2 +-
 .../src/graph/include/broadcast_inst.h        |   2 -
 .../intel_gpu/src/graph/include/gemm_inst.h   |   5 +-
 .../src/graph/include/primitive_inst.h        |  26 +-
 .../src/graph/include/primitive_type.h        |   2 -
 .../src/graph/include/primitive_type_base.h   |   8 +-
 .../intel_gpu/src/graph/include/select_inst.h |   3 -
 .../src/graph/include/shape_of_inst.h         |   2 -
 .../intel_gpu/src/graph/primitive_inst.cpp    |  32 ++-
 src/plugins/intel_gpu/src/graph/select.cpp    |  17 --
 src/plugins/intel_gpu/src/graph/shape_of.cpp  |  12 -
 .../kernels/gemm/gemm_kernel_base.cpp         |   2 +-
 .../intel_gpu/src/plugin/ops/matmul.cpp       |   3 +-
 src/plugins/intel_gpu/src/runtime/layout.cpp  |  11 +-
 .../fusions/fully_connected_fusion_test.cpp   |  26 +-
 .../test_cases/canonicalization_gpu_test.cpp  | 238 ++++++++++++++++++
 .../tests/test_cases/eltwise_gpu_test.cpp     | 118 +++++++++
 .../tests/test_cases/hash_key_gpu_test.cpp    |   4 +-
 .../tests/test_cases/shape_of_gpu_test.cpp    |   2 +-
 33 files changed, 674 insertions(+), 430 deletions(-)
 create mode 100644 src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp
index c5db87cf776fa0..a71acf380c5e2e 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp
@@ -43,11 +43,13 @@ struct fully_connected : public primitive_base<fully_connected> {
                     const primitive_id& weights,
                     const primitive_id& bias = "",
                     const padding& output_padding = padding(),
-                    const size_t input_size = 2)
+                    const size_t input_size = 2,
+                    const size_t weights_rank = 2)
         : primitive_base(id, {input}, {output_padding}),
           weights(weights),
           bias(bias),
-          input_size(input_size)
+          input_size(input_size),
+          weights_rank(weights_rank)
     {}
 
     /// @brief Constructs fully connected layer.
@@ -61,11 +63,13 @@ struct fully_connected : public primitive_base<fully_connected> {
                     const primitive_id& bias,
                     const data_types data_type,
                     const padding& output_padding = padding(),
-                    const size_t input_size = 2)
+                    const size_t input_size = 2,
+                    const size_t weights_rank = 2)
         : primitive_base(id, { input }, {output_padding}, {optional_data_type{data_type}}),
           weights(weights),
           bias(bias),
-          input_size(input_size)
+          input_size(input_size),
+          weights_rank(weights_rank)
     {}
 
     /// @brief Primitive id containing weights data.
@@ -74,10 +78,13 @@ struct fully_connected : public primitive_base<fully_connected> {
     primitive_id bias;
     /// @brief Primitive dimension size.
     size_t input_size;
+    /// @brief Primitive weights rank.
+    size_t weights_rank;
 
     size_t hash() const override {
         size_t seed = primitive::hash();
         seed = hash_combine(seed, input_size);
+        seed = hash_combine(seed, weights_rank);
         seed = hash_combine(seed, bias.empty());
         return seed;
     }
@@ -89,6 +96,7 @@ struct fully_connected : public primitive_base<fully_connected> {
         auto rhs_casted = downcast<const fully_connected>(rhs);
 
         return input_size == rhs_casted.input_size &&
+               weights_rank == rhs_casted.weights_rank &&
                bias.empty() == rhs_casted.bias.empty();
     }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
index 41911e7c01ac17..344fa5a19636a4 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
@@ -521,7 +521,7 @@ struct layout {
     // for smaller buffer which, currently, should always be performed
     bool identical(const layout& other) const;
 
-    ov::PartialShape transform(cldnn::format new_fmt) const;
+    static ov::PartialShape transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt);
 
     size_t hash() const {
         size_t seed = 0;
diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp
index d20698f483d21a..bdb2bb331ce3b2 100644
--- a/src/plugins/intel_gpu/src/graph/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp
@@ -98,88 +98,6 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
 
 template std::vector<layout> broadcast_inst::calc_output_layouts<ov::PartialShape>(broadcast_node const& node, const kernel_impl_params& impl_param);
 
-std::vector<size_t> broadcast_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
-    ov::PartialShape ps;
-
-    auto orig_input_layout = orig_impl_param.get_input_layout();
-    auto updated_param = orig_impl_param;
-    const auto& primitive = updated_param.typed_desc<broadcast>();
-
-    // Extend input dimensions with ones
-    auto i_layout = updated_param.input_layouts[0];
-    auto o_layout = updated_param.output_layouts[0];
-
-    auto input_shape = i_layout.get_shape();
-    auto output_shape = o_layout.get_shape();
-
-    if (primitive->axes_mapping.empty()) {
-        auto broadcastable = [&](layout a, layout b) {
-            auto dims_a = a.get_dims();
-            auto dims_b = b.get_dims();
-            size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size();
-
-            for (size_t i = 0; i < min_size; i++) {
-                if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
-                    return false;
-                }
-            }
-            return true;
-        };
-
-        auto input_rank = input_shape.size();
-        auto output_rank = output_shape.size();
-
-        if (!broadcastable(i_layout, o_layout)) {
-            input_shape.insert(input_shape.begin(), output_rank - input_rank, 1ul);
-        }
-    } else {
-        // If axis_mapping is specified, then ones are inserted according to it.
-        ov::Shape tmp_shape;
-        int prev_axis = -1;
-        int next_axis = -1;
-        size_t currentRank = 0;
-        int axe_idx = 0;
-        for (auto& axis : primitive->axes_mapping) {
-            prev_axis = next_axis;
-            next_axis = static_cast<int>(axis);
-
-            int ones_count = std::max(next_axis - prev_axis - 1, 0);
-            tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
-            tmp_shape.push_back(input_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape
-
-            currentRank += ones_count + 1;
-            axe_idx += 1;
-        }
-
-        // insert 1 to match with output shape
-        if (o_layout.get_rank() > tmp_shape.size()) {
-            tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
-        }
-        input_shape = tmp_shape;
-    }
-
-    ps = ov::PartialShape(input_shape);
-
-
-    if (ps.size() < 4) {
-        ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
-    }
-
-    layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-    return l.transform(format::bfwzyx).to_shape();
-}
-
-std::vector<size_t> broadcast_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
-    ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
-
-    if (ps.size() < 4) {
-        ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
-    }
-
-    layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-    return l.transform(format::bfwzyx).to_shape();
-}
-
 std::string broadcast_inst::to_string(broadcast_node const& node) {
     auto desc = node.get_primitive();
     auto node_info = node.desc_to_json();
diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp
index bbe6d631e6bc50..76d33ce32c190c 100644
--- a/src/plugins/intel_gpu/src/graph/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp
@@ -115,7 +115,6 @@ std::vector<layout> eltwise_inst::calc_output_layouts(eltwise_node const& /*node
     auto out_data_type = desc->output_data_types[0].value_or(input_layout.data_type);
 
     auto get_output_layout = [&]() {
-        auto out_pshape = input_layout.get<ShapeType>();
         cldnn::format out_format = input_layout.format;
 
         // We create dummy Add op as shape infer is exactly the same for any eltwise op type, so there is no need to have correct op type
@@ -373,13 +372,24 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
                                       "");
         }
     } else {
-        std::vector<int32_t> input0_size = node.input().get_output_layout().get_tensor().raw.vector();
-        for (size_t i = 1; i < inputs_count; i++) {
-            std::vector<int32_t> input_size = node.input(i).get_output_layout().get_tensor().raw.vector();
-            for (size_t d = 0; d < input0_size.size(); d++) {
-                bool sizes_equal = input0_size[d] == input_size[d];
+        bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
+        auto input0_pshape = node.input().get_output_layout().get_partial_shape();
+
+        for (size_t i = 1; i < inputs_count; ++i) {
+            auto input_pshape = node.input(i).get_output_layout().get_partial_shape();
+
+            if (input0_pshape.size() > input_pshape.size()) {
+                if (use_new_shape_infer) {
+                    input_pshape.insert(input_pshape.begin(), input0_pshape.size() - input_pshape.size(), 1);
+                } else {
+                    input_pshape.insert(input_pshape.end(), input0_pshape.size() - input_pshape.size(), 1);
+                }
+            }
+
+            for (size_t d = 0; d < input0_pshape.size(); ++d) {
+                bool sizes_equal = input0_pshape[d] == input_pshape[d];
                 bool broadcast =
-                    (input0_size[d] == 1 || input_size[d] == 1) && (input0_size[d] != 1 || input_size[d] != 1);
+                    (input0_pshape[d] == 1 || input_pshape[d] == 1) && (input0_pshape[d] != 1 || input_pshape[d] != 1);
                 CLDNN_ERROR_BOOL(node.id(),
                                  "Sizes equal or broadcast is possible",
                                  !(sizes_equal || broadcast),
diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp
index a43148c7ed9dcc..0234d11d08664b 100644
--- a/src/plugins/intel_gpu/src/graph/gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/gemm.cpp
@@ -114,31 +114,8 @@ std::vector<layout> gemm_inst::calc_output_layouts(gemm_node const& /*node*/, co
 
 template std::vector<layout> gemm_inst::calc_output_layouts<ov::PartialShape>(gemm_node const& node, const kernel_impl_params& impl_param);
 
-std::vector<size_t> gemm_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
-    ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
-
-    if (ps.size() < 4) {
-        ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
-    }
-
-    layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-    return l.transform(format::bfwzyx).to_shape();
-}
-
-std::vector<size_t> gemm_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
-    ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
-
-    if (ps.size() < 4) {
-        ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
-    }
-
-    layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-    return l.transform(format::bfwzyx).to_shape();
-}
-
 std::vector<layout> gemm_inst::transform_input_layouts(const std::shared_ptr<const gemm> primitive,
-                                                       const std::vector<layout>& input_layouts,
-                                                       const layout& output_layout) {
+                                                       const std::vector<layout>& input_layouts) {
     auto get_updated_input_shape = [&](const ov::PartialShape& input_pshape, size_t input_rank, size_t output_rank, bool transpose, bool first_input) {
         ov::PartialShape updated_input_pshape;
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
index e2f785a8d6fa05..520527be680d02 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
@@ -46,34 +46,25 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
             }
         }
 
-        // Extend input dimensions with ones
-        auto i_layout = impl_param.input_layouts[0];
-        auto o_layout = impl_param.output_layouts[0];
-        if (i_layout.is_static() && o_layout.is_static()) {
-            auto data_shape = i_layout.get_shape();
-            auto output_shape = o_layout.get_shape();
-
-            if (primitive->axes_mapping.empty()) {
-                auto broadcastable = [&](layout a, layout b) {
-                    auto dims_a = a.get_dims();
-                    auto dims_b = b.get_dims();
-                    size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size();
-
-                    for (size_t i = 0; i < min_size; i++) {
-                        if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
-                            return false;
-                        }
-                    }
-                    return true;
-                };
+        return {params, optional_params};
+    }
 
-                auto input_rank = data_shape.size();
-                auto output_rank = output_shape.size();
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+        const auto& primitive = impl_params.typed_desc<broadcast>();
 
-                if (!broadcastable(i_layout, o_layout)) {
-                    data_shape.insert(data_shape.begin(), output_rank - input_rank, 1ul);
-                }
-            } else {
+        auto i_layout = impl_params.input_layouts[0];
+        auto o_layout = impl_params.output_layouts[0];
+
+        auto input_pshape = i_layout.get_partial_shape();
+        auto output_pshape = o_layout.get_partial_shape();
+
+        auto output_rank = output_pshape.size();
+
+        if (primitive->axes_mapping.empty()) {
+            input_pshape = extend_shape_to_rank_from_begin(input_pshape, output_rank);
+        } else {
+            if (i_layout.is_static() && o_layout.is_static()) {
                 // If axis_mapping is specified, then ones are inserted according to it.
                 ov::Shape tmp_shape;
                 int prev_axis = -1;
@@ -86,37 +77,14 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
 
                     int ones_count = std::max(next_axis - prev_axis - 1, 0);
                     tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
-                    tmp_shape.push_back(data_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape
+                    tmp_shape.push_back(input_pshape[axe_idx].get_length()); // Consider the Broadcast kernel 'broadcast' input to output shape
 
                     currentRank += ones_count + 1;
                     axe_idx += 1;
                 }
-
-                if (o_layout.get_rank() > tmp_shape.size()) {
-                    tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
-                }
-                data_shape = tmp_shape;
-            }
-
-            layout new_layout = i_layout;
-            new_layout.format = format::adjust_to_rank(i_layout.format, data_shape.size());
-            new_layout.set_partial_shape(data_shape);
-            params.inputs[0] = convert_data_tensor(new_layout);
-        } else {
-            // dynamic input
-            if (primitive->axes_mapping.empty()) {
-                ov::PartialShape i_shape = i_layout.get_partial_shape();
-                ov::PartialShape o_shape = o_layout.get_partial_shape();
-
-                auto i_rank = i_shape.size();
-                auto o_rank = o_shape.size();
-                i_shape.insert(i_shape.begin(), o_rank - i_rank, 1ul);
-
-                layout new_layout = i_layout;
-                new_layout.format = format::adjust_to_rank(i_layout.format, i_shape.size());
-                new_layout.set_partial_shape(i_shape);
-                params.inputs[0] = convert_data_tensor(new_layout);
+                input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank);
             } else {
+                // dynamic input
                 // insert 1 to extend dimensions by axes_mapping
                 ov::Shape tmp_shape;
                 size_t idx = 0;
@@ -131,20 +99,20 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
                         idx += 1;
                     }
                 }
-
-                // insert 1 to match with output shape
-                if (o_layout.get_rank() > tmp_shape.size()) {
-                    tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
-                }
-
-                layout new_layout = i_layout;
-                new_layout.format = format::adjust_to_rank(i_layout.format, tmp_shape.size());
-                new_layout.set_partial_shape(tmp_shape);
-                params.inputs[0] = convert_data_tensor(new_layout);
+                input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank);
             }
         }
 
-        return {params, optional_params};
+        updated_impl_params.input_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(input_pshape));
+        updated_impl_params.input_layouts[0].format = format::adjust_to_rank(i_layout.format, input_pshape.size());
+
+        updated_impl_params.output_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(output_pshape));
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
     }
 
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
@@ -157,7 +125,7 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
 namespace detail {
 
 attach_broadcast_impl::attach_broadcast_impl() {
-    auto dyn_types = {
+    auto types = {
         data_types::f32,
         data_types::f16,
         data_types::i8,
@@ -175,115 +143,34 @@ attach_broadcast_impl::attach_broadcast_impl() {
     implementation_map<broadcast>::add(impl_types::ocl,
                                        shape_types::dynamic_shape,
                                        typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>,
-                                       dyn_types,
+                                       types,
                                        dyn_formats);
 
-    implementation_map<broadcast>::add(impl_types::ocl, shape_types::static_shape, typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>, {
-        std::make_tuple(data_types::f32, format::bfyx),
-        std::make_tuple(data_types::f16, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::u8, format::bfyx),
-        std::make_tuple(data_types::i32, format::bfyx),
-        std::make_tuple(data_types::i64, format::bfyx),
-
-        std::make_tuple(data_types::f32, format::bfzyx),
-        std::make_tuple(data_types::f16, format::bfzyx),
-        std::make_tuple(data_types::i8, format::bfzyx),
-        std::make_tuple(data_types::u8, format::bfzyx),
-        std::make_tuple(data_types::i32, format::bfzyx),
-        std::make_tuple(data_types::i64, format::bfzyx),
-
-        std::make_tuple(data_types::f32, format::bfwzyx),
-        std::make_tuple(data_types::f16, format::bfwzyx),
-        std::make_tuple(data_types::i8, format::bfwzyx),
-        std::make_tuple(data_types::u8, format::bfwzyx),
-        std::make_tuple(data_types::i32, format::bfwzyx),
-        std::make_tuple(data_types::i64, format::bfwzyx),
-
-        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
-        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
-        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
-        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
-        std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
-        std::make_tuple(data_types::i64, format::b_fs_yx_fsv4),
-
-        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
-        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
-        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
-        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
-        std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
-        std::make_tuple(data_types::i64, format::b_fs_yx_fsv16),
-
-        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
-        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
-        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
-        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
-        std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
-        std::make_tuple(data_types::i64, format::b_fs_yx_fsv32),
-
-        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
-        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
-        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
-        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
-        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
-        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
-
-        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
-        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
-        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
-        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
-        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
-        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv32),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv2),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv2),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv4),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv2),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv2),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv2),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv2),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv2),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv2),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv16_fsv16),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv16),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv32_fsv16),
-
-        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
-        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
-        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
-        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
-        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv32),
-        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv32_fsv32),
-    });
+    auto static_formats = {
+        format::bfyx,
+        format::b_fs_yx_fsv4,
+        format::b_fs_yx_fsv16,
+        format::b_fs_yx_fsv32,
+        format::bs_fs_yx_bsv4_fsv2,
+        format::bs_fs_yx_bsv4_fsv4,
+        format::bs_fs_yx_bsv8_fsv2,
+        format::bs_fs_yx_bsv8_fsv4,
+        format::bs_fs_yx_bsv16_fsv16,
+        format::bs_fs_yx_bsv32_fsv16,
+        format::bs_fs_yx_bsv32_fsv32,
+
+        format::bfzyx,
+        format::b_fs_zyx_fsv16,
+        format::b_fs_zyx_fsv32,
+
+        format::bfwzyx
+    };
+
+    implementation_map<broadcast>::add(impl_types::ocl,
+                                       shape_types::static_shape,
+                                       typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>,
+                                       types,
+                                       static_formats);
 }
 
 }  // namespace detail
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
index 0471c73895c265..7159f38956f0c1 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
@@ -119,6 +119,46 @@ struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
         return {params, optional_params};
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+        bool use_new_shape_infer = impl_params.prog->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
+
+        auto broadcastable = [use_new_shape_infer](const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape) {
+            if (first_pshape.is_dynamic() || second_pshape.is_dynamic()) {
+                return false;
+            }
+            if (first_pshape.size() != second_pshape.size() && use_new_shape_infer) {
+                return false;
+            }
+            size_t min_size = std::min(first_pshape.size(), second_pshape.size());
+
+            for (size_t i = 0; i < min_size; ++i) {
+                if (!(first_pshape[i] == 1 || second_pshape[i] == 1 || first_pshape[i] == second_pshape[i])) {
+                    return false;
+                }
+            }
+            return true;
+        };
+
+        auto& output_layout = updated_impl_params.output_layouts[0];
+        auto out_pshape = output_layout.get_partial_shape();
+        output_layout.set_partial_shape(extend_shape_to_rank_from_end(out_pshape));
+
+        for (auto& input_layout : updated_impl_params.input_layouts) {
+            auto input_pshape = input_layout.get_partial_shape();
+            if (!broadcastable(input_pshape, out_pshape)) {
+                input_pshape = extend_shape_to_rank_from_begin(input_pshape, out_pshape.size());
+            }
+            input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_pshape));
+        }
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
index 64d6ac2197b413..f1c4691195e81e 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
@@ -38,12 +38,11 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
         const auto& primitive = impl_param.typed_desc<fully_connected>();
 
         auto get_fc_input_layouts = [primitive](const std::vector<layout>& input_layouts) {
-            auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature) {
+            auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature, size_t rank) {
                 if (shape.is_static()) {
                     auto static_shape = shape.to_shape();
-                    size_t total =
-                        std::accumulate(static_shape.begin(), static_shape.end(), size_t(1), std::multiplies<size_t>());
-                    auto dim = feature.is_static() ? feature.get_length() : static_cast<int64_t>(static_shape.back());
+                    size_t total = std::accumulate(static_shape.begin(), static_shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
+                    auto dim = feature.is_static() ? feature.get_length() : static_cast<int64_t>(static_shape[rank - 1]);
                     return ov::PartialShape{ static_cast<int64_t>(total) / dim, dim };
                 } else {
                     return ov::PartialShape{ ov::Dimension::dynamic(), feature };
@@ -59,10 +58,10 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
             ov::Dimension feature = input0_pshape[std::min(primitive->input_size, static_cast<size_t>(4)) - 1ul];
 
             if (primitive->input_size > 3) {
-                input0_layout.set_partial_shape(reshape_to_2d(input0_pshape, feature));
+                input0_layout.set_partial_shape(reshape_to_2d(input0_pshape, feature, primitive->input_size));
             }
             if (input1_pshape.size() != 2) {
-                input1_layout.set_partial_shape(reshape_to_2d(input1_pshape, feature));
+                input1_layout.set_partial_shape(reshape_to_2d(input1_pshape, feature, primitive->weights_rank));
             }
 
             std::vector<layout> layouts{input0_layout, input1_layout};
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
index 27348e0b06d190..3be827b3b3b288 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
@@ -77,11 +77,13 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
         params.axis = convert_axis(primitive->axis, input_layout.get_rank());
         params.batch_dim = size_t(primitive->batch_dim);
         params.support_neg_ind = primitive->support_neg_ind;
+
         auto output_layout = impl_param.get_output_layout(0);
-        auto in_rank = impl_param.get_input_layout(0).get_rank();
-        auto out_rank = impl_param.get_output_layout(0).get_rank();
+        auto in_rank = input_layout.get_partial_shape().size();
+        auto out_rank = output_layout.get_partial_shape().size();
+
         if (in_rank > 4 && in_rank > out_rank) { // if in_rank <= 4, the dims are to be adjusted to 4 by convert_data_tensor
-            auto output_shape = impl_param.get_output_layout(0).get_partial_shape();
+            auto output_shape = output_layout.get_partial_shape();
             ov::PartialShape new_output_shape({output_shape[0], output_shape[1]});
             for (size_t i = 0; i < in_rank - out_rank; ++i)
                 new_output_shape.push_back(1);
@@ -89,8 +91,9 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
             for (size_t i = 2; i < out_rank; ++i) {
                 new_output_shape.push_back(output_shape[i]);
             }
-            output_layout = layout(new_output_shape, impl_param.get_output_layout(0).data_type, format::get_default_format(new_output_shape.size()));
+            output_layout = layout(new_output_shape, output_layout.data_type, format::get_default_format(new_output_shape.size()));
         }
+
         params.outputs[0] = convert_data_tensor(output_layout);
         params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
         return {params, optional_params};
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
index 911445a3dfa29f..93bf1b32dd05eb 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
@@ -26,17 +26,13 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
 public:
     static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
         const auto& primitive = impl_param.typed_desc<gemm>();
-        const auto input_layouts = gemm_inst::transform_input_layouts(primitive, impl_param.input_layouts, impl_param.output_layouts[0]);
-        const auto output_layout = gemm_inst::transform_output_layout(primitive, input_layouts, impl_param.output_layouts[0]);
 
         auto params = get_default_params<kernel_selector::gemm_params>(impl_param, is_shape_agnostic);
         auto optional_params = get_default_optional_params<kernel_selector::gemm_optional_params>(impl_param.get_program());
 
-        params.inputs.clear();
-        for (size_t i = 0; i < primitive->input_size(); ++i) {
-            params.inputs.push_back(convert_data_tensor(input_layouts[i]));
+        for (size_t i = 1; i < primitive->input_size(); ++i) {
+            params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
         }
-        params.outputs[0] = convert_data_tensor(output_layout);
 
         params.alpha = primitive->alpha;
         params.beta = primitive->beta;
@@ -55,6 +51,27 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
         return {params, optional_params};
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        const auto& primitive = impl_params.typed_desc<gemm>();
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+
+        updated_impl_params.input_layouts = gemm_inst::transform_input_layouts(primitive, impl_params.input_layouts);
+        updated_impl_params.output_layouts[0] = gemm_inst::transform_output_layout(primitive, updated_impl_params.input_layouts, impl_params.output_layouts[0]);
+
+        for (auto& input_layout : updated_impl_params.input_layouts) {
+            input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
+        }
+
+        auto& output_layout = updated_impl_params.output_layouts[0];
+        output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
index 36cba31d3b3dcc..2b184bc0a3f7b9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
@@ -7,6 +7,7 @@
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/graph/kernel_impl_params.hpp"
 #include "intel_gpu/graph/fused_primitive_desc.hpp"
+#include "intel_gpu/graph/program.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/utils.hpp"
 #include "intel_gpu/runtime/tensor.hpp"
@@ -291,4 +292,57 @@ switch (mode) {
     }
 }
 
+inline ov::PartialShape extend_shape_to_rank_from_end(ov::PartialShape pshape, size_t rank = 4) {
+    if (pshape.size() >= rank) {
+        return pshape;
+    }
+    pshape.insert(pshape.end(), rank - pshape.size(), ov::Dimension(1));
+    return pshape;
+}
+
+inline ov::PartialShape extend_shape_to_rank_from_begin(ov::PartialShape pshape, size_t rank = 4) {
+    if (pshape.size() >= rank) {
+        return pshape;
+    }
+    ov::PartialShape extended_pshape(std::vector<int64_t>(rank - pshape.size(), 1));
+    extended_pshape.insert(extended_pshape.end(), pshape.begin(), pshape.end());
+    return extended_pshape;
+}
+
+inline kernel_impl_params canonicalize_fused_shapes(const kernel_impl_params& impl_params) {
+    auto updated_impl_params = impl_params;
+    bool use_new_shape_infer = impl_params.prog->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
+
+    auto broadcastable = [use_new_shape_infer](const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape) {
+        if (first_pshape.is_dynamic() || second_pshape.is_dynamic()) {
+            return false;
+        }
+        if (first_pshape.size() != second_pshape.size() && use_new_shape_infer) {
+            return false;
+        }
+        size_t min_size = std::min(first_pshape.size(), second_pshape.size());
+
+        for (size_t i = 0; i < min_size; ++i) {
+            if (!(first_pshape[i] == 1 || second_pshape[i] == 1 || first_pshape[i] == second_pshape[i])) {
+                return false;
+            }
+        }
+        return true;
+    };
+
+    for (auto& fd : updated_impl_params.fused_desc) {
+        if (fd.is_type<eltwise>() && fd.total_num_deps == 2) {
+            auto out_pshape = updated_impl_params.output_layouts[0].get_partial_shape();
+
+            auto& dep_layout = updated_impl_params.input_layouts[fd.dep_start_idx];
+            auto dep_shape = dep_layout.get_partial_shape();
+
+            if (!broadcastable(dep_shape, out_pshape)) {
+                dep_layout.set_partial_shape(extend_shape_to_rank_from_begin(dep_shape, out_pshape.size()));
+            }
+        }
+    }
+    return updated_impl_params;
+}
+
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 603cfde26b5c60..7ec313355ceb85 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -89,7 +89,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         if (arg.can_be_optimized()) {
             return make_unique<ImplType>(kernel_selector::kernel_data{});
         }
-        auto kernel_params = ImplType::get_kernel_params(impl_param);
+        auto kernel_params = ImplType::get_kernel_params(ImplType::static_canonicalize_shapes(impl_param));
         kernel_params.first.is_shape_agnostic = impl_param.is_dynamic();
         auto& kernel_selector = ImplType::kernel_selector_t::Instance();
         auto best_kernel = kernel_selector.get_best_kernel(kernel_params.first, kernel_params.second);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
index 684d43e28ec16a..8dcc1db1a96bc0 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
@@ -28,37 +28,6 @@ struct select_impl : typed_primitive_impl_ocl<select> {
         auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
 
         std::vector<layout> input_layouts = impl_param.input_layouts;
-        auto o_layout = impl_param.get_output_layout();
-
-        auto broadcastable = [&](const layout& a, const layout& b) {
-            if (a.is_dynamic() || b.is_dynamic()) {
-                return false;
-            }
-
-            auto dims_a = a.get_partial_shape();
-            auto dims_b = b.get_partial_shape();
-
-            size_t min_size = std::min(dims_a.size(), dims_b.size());
-
-            for (size_t i = 0; i < min_size; ++i) {
-                if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
-                    return false;
-                }
-            }
-            return true;
-        };
-
-        for (auto& l : input_layouts) {
-            auto pshape = l.get_partial_shape();
-            auto rank = pshape.size();
-
-            if (rank < 4 && !broadcastable(o_layout, l)) {
-                pshape.insert(pshape.begin(), 4 - rank, 1);
-                layout new_layout = l;
-                new_layout.set_partial_shape(pshape);
-                l = new_layout;
-            }
-        }
 
         for (size_t i = 1; i < input_layouts.size(); ++i) {
             params.inputs.push_back(convert_data_tensor(input_layouts[i]));
@@ -66,6 +35,23 @@ struct select_impl : typed_primitive_impl_ocl<select> {
         return {params, optional_params};
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+
+        for (auto& input_layout : updated_impl_params.input_layouts) {
+            input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
+        }
+
+        auto& output_layout = updated_impl_params.output_layouts[0];
+        output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
index 2fce15c96d536b..f04dcf33323987 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
@@ -34,6 +34,22 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
         return {params, optional_params};
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+
+        auto& input_layout = updated_impl_params.input_layouts[0];
+        input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape(), 6));
+
+        auto& output_layout = updated_impl_params.output_layouts[0];
+        output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape(), 6));
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
index 309a4e24285437..1b0fbfe1041a80 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
@@ -75,7 +75,7 @@ struct gemm_onednn : typed_primitive_onednn_impl<gemm> {
             in_layouts.emplace_back(impl_params.get_input_layout(2));
         }
 
-        in_layouts = gemm_inst::transform_input_layouts(prim, in_layouts, out_l);
+        in_layouts = gemm_inst::transform_input_layouts(prim, in_layouts);
         out_l = gemm_inst::transform_output_layout(prim, in_layouts, out_l);
 
         const auto& in0_l = in_layouts[0];
diff --git a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h
index 9ef379f409c1da..9b339aadd5c221 100644
--- a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h
@@ -37,8 +37,6 @@ class typed_primitive_inst<broadcast> : public typed_primitive_inst_base<broadca
     template<typename ShapeType>
     static std::vector<layout> calc_output_layouts(broadcast_node const& /*node*/, const kernel_impl_params& impl_param);
     static layout calc_output_layout(broadcast_node const& node, kernel_impl_params const& impl_param);
-    static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
-    static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
     static std::string to_string(broadcast_node const& node);
     typed_primitive_inst(network& network, broadcast_node const& node);
 };
diff --git a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h
index 85d01e503576a8..fe81d863548609 100644
--- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h
@@ -32,13 +32,10 @@ class typed_primitive_inst<gemm> : public typed_primitive_inst_base<gemm> {
     template<typename ShapeType>
     static std::vector<layout> calc_output_layouts(gemm_node const& /*node*/, const kernel_impl_params& impl_param);
     static layout calc_output_layout(gemm_node const& node, kernel_impl_params const& impl_param);
-    static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
-    static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
     static std::string to_string(gemm_node const& node);
 
     static std::vector<layout> transform_input_layouts(const std::shared_ptr<const gemm> primitive,
-                                                       const std::vector<layout>& input_layouts,
-                                                       const layout& output_layout);
+                                                       const std::vector<layout>& input_layouts);
     static layout transform_output_layout(const std::shared_ptr<const gemm> primitive, const std::vector<layout>& input_layouts, const layout& output_layout);
 
     typed_primitive_inst(network& network, gemm_node const& node);
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index 9ec7614a1475ff..4dcb140c8c3f18 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -82,6 +82,12 @@ struct primitive_impl {
         OPENVINO_ASSERT(false, "[GPU] update_dispatch_data is not implemented for dynamic implemenation ", _kernel_name);
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params);
+
+    virtual kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const {
+        return primitive_impl::static_canonicalize_shapes(impl_params);
+    }
+
     virtual void set_kernels(std::map<const std::string, kernel::ptr>& kernels) {}
 
 protected:
@@ -424,26 +430,6 @@ class typed_primitive_inst_base : public primitive_inst {
         return std::move(orig_impl_param);
     }
 
-    static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
-        ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
-
-        if (ps.size() < 4) {
-            ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
-        }
-        layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-        return l.transform(format::bfwzyx).to_shape();
-    }
-
-    static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
-        ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
-
-        if (ps.size() < 4) {
-            ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
-        }
-        layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-        return l.transform(format::bfwzyx).to_shape();
-    }
-
     typed_primitive_inst_base(network& network, typed_node const& node)
         : typed_primitive_inst_base(network, node, do_allocate_memory(node)) {}
 
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type.h b/src/plugins/intel_gpu/src/graph/include/primitive_type.h
index 2803e74a92bedd..594c2e8d3e9cad 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_type.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_type.h
@@ -44,8 +44,6 @@ struct primitive_type {
     virtual layout calc_output_layout(const program_node& node, const kernel_impl_params& params) const = 0;
     virtual std::vector<layout> calc_output_layouts(const program_node& node, const kernel_impl_params& impl_param) const = 0;
     virtual kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const = 0;
-    virtual std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const = 0;
-    virtual std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const = 0;
     virtual std::string to_string(const program_node& node) const = 0;
 };
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h
index e9ad59af8c0cc1..209fd8067a64b3 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h
@@ -107,15 +107,11 @@ struct primitive_type_base : primitive_type {
 
         return res;
     }
+
     kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override {
         return typed_primitive_inst<PType>::get_fake_aligned_params(orig_impl_param);
     }
-    std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const override {
-        return typed_primitive_inst<PType>::extend_input_shape_to_6d(orig_impl_param, input_idx);
-    }
-    std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const override {
-        return typed_primitive_inst<PType>::extend_output_shape_to_6d(orig_impl_param, output_idx);
-    }
+
     std::string to_string(const cldnn::program_node& node) const override {
         OPENVINO_ASSERT(node.type() == this, "[GPU] primitive_type_base::to_string: primitive type mismatch");
         return typed_primitive_inst<PType>::to_string(node);
diff --git a/src/plugins/intel_gpu/src/graph/include/select_inst.h b/src/plugins/intel_gpu/src/graph/include/select_inst.h
index a30cb33e80a881..ce6b9d67109c84 100644
--- a/src/plugins/intel_gpu/src/graph/include/select_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h
@@ -32,9 +32,6 @@ class typed_primitive_inst<select> : public typed_primitive_inst_base<select> {
     template<typename ShapeType>
     static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param);
     static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param);
-    static std::vector<size_t> extend_shape_to_6d(ov::PartialShape ps);
-    static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
-    static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
     static std::string to_string(select_node const& node);
     typed_primitive_inst(network& network, select_node const& node);
 };
diff --git a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h
index fd8375838b126a..5d6a8dfd381dc5 100644
--- a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h
@@ -35,8 +35,6 @@ class typed_primitive_inst<shape_of> : public typed_primitive_inst_base<shape_of
     template<typename ShapeType>
     static std::vector<layout> calc_output_layouts(shape_of_node const& /*node*/, const kernel_impl_params& impl_param);
     static layout calc_output_layout(shape_of_node const& node, kernel_impl_params const& impl_param);
-    static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
-    static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
     static std::string to_string(shape_of_node const& node);
 
     typed_primitive_inst(network& network, shape_of_node const& node);
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 796b4e0b82cd17..c063ddf66f1990 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -88,6 +88,21 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
     }
     return false;
 }
+
+kernel_impl_params primitive_impl::static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+    auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+
+    for (auto& input_layout : updated_impl_params.input_layouts) {
+        input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape()));
+    }
+
+    for (auto& output_layout : updated_impl_params.output_layouts) {
+        output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape()));
+    }
+
+    return updated_impl_params;
+}
+
 uint32_t primitive_inst::get_network_id() const { return _network.get_id(); }
 
 void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const {
@@ -308,7 +323,11 @@ bool primitive_inst::update_impl() {
         size_t offset = 0;
         for (size_t i = 0; i < _node->get_dependencies().size(); i++) {
             if (_node->get_dependency(i).get_output_layout().is_dynamic()) {
-                auto input_shape = _node->type()->extend_input_shape_to_6d(params, static_cast<uint32_t>(i));
+                auto pshape = params.get_input_layout(i).get_partial_shape();
+                auto input_shape = layout::transform(pshape,
+                                                     format::get_default_format(pshape.size()),
+                                                     format::bfwzyx).to_shape();
+
                 for (size_t j = 0; j < input_shape.size(); j++)
                     lock[offset++] = static_cast<int32_t>(input_shape[j]);
             }
@@ -316,7 +335,11 @@ bool primitive_inst::update_impl() {
 
         for (size_t i = 0; i < _node->get_output_layouts().size(); i++) {
             if (_node->get_output_layout(i).is_dynamic()) {
-                auto output_shape = _node->type()->extend_output_shape_to_6d(params, static_cast<uint32_t>(i));
+                auto pshape = params.get_output_layout(i).get_partial_shape();
+                auto output_shape = layout::transform(pshape,
+                                                      format::get_default_format(pshape.size()),
+                                                      format::bfwzyx).to_shape();
+
                 for (size_t j = 0; j < output_shape.size(); j++)
                     lock[offset++] = static_cast<int32_t>(output_shape[j]);
             }
@@ -365,9 +388,10 @@ bool primitive_inst::update_impl() {
                     cache.add(updated_params, impl->clone());
                 });
                 _impl = _dynamic_impl->clone();
-                _impl->update_dispatch_data(*_impl_params);
+                auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
+                _impl->update_dispatch_data(new_impl_params);
 
-                update_shape_info(*_impl_params);
+                update_shape_info(new_impl_params);
             } else {
                 _impl = _node->type()->choose_impl(*_node, updated_params);
                 auto& kernels_cache = get_network().get_program()->get_kernels_cache();
diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp
index 1b86c70081e808..66f54729c6ed2c 100644
--- a/src/plugins/intel_gpu/src/graph/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/select.cpp
@@ -55,23 +55,6 @@ std::vector<layout> select_inst::calc_output_layouts(const select_node& /*node*/
     return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}};
 }
 
-std::vector<size_t> select_inst::extend_shape_to_6d(ov::PartialShape ps) {
-    if (ps.size() < 4) {
-        ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
-    }
-
-    layout l(ps, data_types::i32, format::get_default_format(ps.size()));
-    return l.transform(format::bfwzyx).to_shape();
-}
-
-std::vector<size_t> select_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
-    return extend_shape_to_6d(orig_impl_param.get_input_layout(input_idx).get_partial_shape());
-}
-
-std::vector<size_t> select_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
-    return extend_shape_to_6d(orig_impl_param.get_output_layout(output_idx).get_partial_shape());
-}
-
 std::string select_inst::to_string(select_node const& node) {
     auto node_info = node.desc_to_json();
     auto desc = node.get_primitive();
diff --git a/src/plugins/intel_gpu/src/graph/shape_of.cpp b/src/plugins/intel_gpu/src/graph/shape_of.cpp
index 86081cd9c38e11..d2a6e21b0a675a 100644
--- a/src/plugins/intel_gpu/src/graph/shape_of.cpp
+++ b/src/plugins/intel_gpu/src/graph/shape_of.cpp
@@ -46,18 +46,6 @@ std::vector<layout> shape_of_inst::calc_output_layouts(shape_of_node const& /*no
 
 template std::vector<layout> shape_of_inst::calc_output_layouts<ov::PartialShape>(shape_of_node const& node, const kernel_impl_params& impl_param);
 
-std::vector<size_t> shape_of_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
-    ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
-    ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1));
-    return ps.to_shape();
-}
-
-std::vector<size_t> shape_of_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
-    ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
-    ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1));
-    return ps.to_shape();
-}
-
 std::string shape_of_inst::to_string(shape_of_node const& node) {
     auto node_info = node.desc_to_json();
     auto desc = node.get_primitive();
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
index bf78ed54ff5bc6..e8790df0a055f5 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
@@ -69,7 +69,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
                      (uint32_t)prim_params.inputs.size(),
                      GetFusedPrimitiveInputsCount(params),
                      1,
-                     prim_params.outputs[0].is_dynamic());
+                     prim_params.has_dynamic_tensors());
 
     return {k_data};
 }
diff --git a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp
index 15c06192731142..9b7ec417368c1b 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp
@@ -133,7 +133,8 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
                                              "",
                                              cldnn::element_type_to_data_type(op->get_output_element_type(0)),
                                              cldnn::padding(),
-                                             shape_a.size());
+                                             rank_a,
+                                             rank_b);
 
         p.add_primitive(*op, fcPrim);
 
diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp
index 79e6925f0cd4e8..3c46b8a3ac76e0 100644
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@@ -506,20 +506,19 @@ bool layout::identical(const layout& other) const {
     return are_layouts_identical(*this, other).first;
 }
 
-ov::PartialShape layout::transform(cldnn::format new_fmt) const {
-    if (format == new_fmt) {
-        return size;
+ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) {
+    if (old_fmt == new_fmt) {
+        return pshape;
     }
 
     cldnn::tensor::value_type default_size = -1;
-    auto shape = size.to_shape();
+    auto shape = pshape.to_shape();
     std::vector<tensor::value_type> dims;
     for (auto dim : shape) {
         dims.push_back(static_cast<tensor::value_type>(dim));
     }
-
     const cldnn::format default_fmt = cldnn::format::bfwzyx;
-    auto old_sizes = convert_dimensions(dims, format.order(), default_fmt.internal_order()); // convert to internal order (bfxyzw)
+    auto old_sizes = convert_dimensions(dims, old_fmt.order(), default_fmt.internal_order()); // convert to internal order (bfxyzw)
 
     auto val_order = default_fmt.internal_order();
     auto new_order = new_fmt.internal_order();
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index 0b3ebc78d1ada9..c0c810043f3670 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -63,6 +63,10 @@ class FullyConnectedFusingTest : public ::BaseFusingTest<fully_connected_test_pa
         return layout{ p.weights_shape, p.weights_type, p.weights_format };
     }
 
+    size_t get_input_weights_rank(fully_connected_test_params& p) {
+        return p.weights_shape.size();
+    }
+
     layout get_bias_layout(fully_connected_test_params& p) {
         auto bias_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
         return layout{ bias_shape, p.default_type, p.default_format };
@@ -113,6 +117,10 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
         return layout{ p.weights_shape, p.weights_type, p.weights_format };
     }
 
+    size_t get_input_weights_rank(fully_connected_test_params& p) {
+        return p.weights_shape.size();
+    }
+
     layout get_bias_layout(fully_connected_test_params& p) {
         auto bias_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
         return layout{ bias_shape, p.default_type, p.default_format };
@@ -159,7 +167,7 @@ TEST_P(fc_fp32_activation, basic) {
         input_layout("input", get_input_layout(p)),
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         activation("activation", input_info("fc_prim"), activation_func::abs),
         reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
     );
@@ -185,7 +193,7 @@ TEST_P(fc_fp32_activation_dynamic, basic) {
         input_layout("input", dynamic_input_layout),
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         activation("activation", input_info("fc_prim"), activation_func::abs),
         reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
     );
@@ -212,7 +220,7 @@ TEST_P(fc_fp32_bias, basic) {
         input_layout("input", get_input_layout(p)),
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
-        fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("bias_add", { input_info("fc_prim"), input_info("bias") }, eltwise_mode::sum),
         reorder("reorder_bfyx", input_info("bias_add"), p.default_format, data_types::f32)
     );
@@ -239,7 +247,7 @@ TEST_P(fc_fp32_bias_dynamic, basic) {
         input_layout("input", dynamic_input_layout),
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
-        fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("bias_add", { input_info("fc_prim"), input_info("bias") }, eltwise_mode::sum),
         reorder("reorder_bfyx", input_info("bias_add"), p.default_format, data_types::f32)
     );
@@ -265,7 +273,7 @@ TEST_P(fc_int8_eltwise, basic) {
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
         data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
         reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
     );
@@ -297,7 +305,7 @@ TEST_P(fc_int8_quantize_u8, basic) {
         data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
         data("out_lo", get_mem(get_single_element_layout(p), 0)),
         data("out_hi", get_mem(get_single_element_layout(p), 255)),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         quantize("quantize", input_info("fc_prim"), input_info("in_lo"), input_info("in_hi"),
                  input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
         reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32)
@@ -331,7 +339,7 @@ TEST_P(fc_int8_eltwise_quantize_i8, basic) {
         data("out_lo", get_mem(get_single_element_layout(p), -127)),
         data("out_hi", get_mem(get_single_element_layout(p), 127)),
         data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
         quantize("quantize", input_info("eltwise"), input_info("in_lo"), input_info("in_hi"),
                  input_info("out_lo"), input_info("out_hi"), 255, data_types::i8),
@@ -366,7 +374,7 @@ TEST_P(fc_int8_eltwise_activation_quantize_i8, basic) {
         data("out_lo", get_mem(get_single_element_layout(p), -127)),
         data("out_hi", get_mem(get_single_element_layout(p), 127)),
         data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
         activation("activation_eltwise", input_info("eltwise"), activation_func::exp),
         quantize("quantize", input_info("activation_eltwise"), input_info("in_lo"), input_info("in_hi"),
@@ -405,7 +413,7 @@ TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
         data("weights", get_mem(get_weights_layout(p))),
         data("bias", get_mem(get_bias_layout(p))),
         data("shift_data", get_mem(shift_layout, 1)),
-        fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
         eltwise("shift", { input_info("fc_prim"), input_info("shift_data") }, eltwise_mode::sum, cldnn::data_types::f32),
         crop("crop", input_info("shift"), get_output_layout(p).get_tensor(), { 0, 0, 0, 0 }),
         reorder("reorder_bfyx", input_info("crop"), p.default_format, data_types::f32)
diff --git a/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp
new file mode 100644
index 00000000000000..cfff825479aaaa
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp
@@ -0,0 +1,238 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include "test_utils.h"
+#include "program_wrapper.h"
+
+#include "primitive_inst.h"
+#include "shape_of_inst.h"
+#include "select_inst.h"
+#include "broadcast_inst.h"
+#include "eltwise_inst.h"
+#include "fully_connected_inst.h"
+#include "gemm_inst.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+namespace {
+
+// first - input shape, second - expected input shape after canonicalization, third - expected output shape after canonicalization
+using Shapes = std::tuple<std::vector<ov::PartialShape>, std::vector<ov::PartialShape>, std::vector<ov::PartialShape>>;
+
+void canonicalization_test(cldnn::topology topology, std::string prim_name,
+                           const std::vector<ov::PartialShape>& expected_input_pshapes,
+                           const std::vector<ov::PartialShape>& expected_output_pshapes,
+                           bool enable_fusing = false) {
+    auto& engine = get_test_engine();
+
+    ExecutionConfig config({ov::intel_gpu::optimize_data(true),
+                            ov::intel_gpu::allow_new_shape_infer(true)});
+
+    auto prog = program::build_program(engine, topology, config, false, true);
+    if (enable_fusing) {
+        layout_optimizer lo;
+        program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
+    }
+    program_wrapper::run_graph_compilation(*prog);
+
+    auto& node = prog->get_node(prim_name);
+    auto impl = node.get_selected_impl();
+    ASSERT_TRUE(impl != nullptr);
+
+    auto impl_param = node.get_kernel_impl_params();
+    auto canonicalized_impl_param = impl->canonicalize_shapes(*impl_param);
+
+    for (size_t i = 0; i < canonicalized_impl_param.input_layouts.size(); ++i) {
+        EXPECT_TRUE(canonicalized_impl_param.input_layouts[i].get_partial_shape() == expected_input_pshapes[i]);
+    }
+
+    for (size_t i = 0; i < canonicalized_impl_param.output_layouts.size(); ++i) {
+        EXPECT_TRUE(canonicalized_impl_param.output_layouts[i].get_partial_shape() == expected_output_pshapes[i]);
+    }
+};
+
+layout create_default_layout(const ov::PartialShape& pshape) {
+    return layout {pshape, data_types::f32, format::bfyx};
+}
+
+std::vector<Shapes> shape_of_shapes {
+    {{{3}}, {{3, 1, 1, 1, 1, 1}}, {{1, 1, 1, 1, 1, 1}}},
+    {{{1, 2, 3}}, {{1, 2, 3, 1, 1, 1}}, {{3, 1, 1, 1, 1, 1}}},
+    {{{1, 2, 3, 4, 5}}, {{1, 2, 3, 4, 5, 1}}, {{5, 1, 1, 1, 1, 1}}}
+};
+
+TEST(canonicalization, shape_of) {
+    for (const auto& shapes : shape_of_shapes) {
+        layout in_layout {std::get<0>(shapes)[0], data_types::f32, format::bfyx};
+
+        cldnn::topology topology;
+        topology.add(input_layout("input", in_layout));
+        topology.add(shape_of("shape_of", input_info("input"), 3, data_types::i32));
+
+        canonicalization_test(topology, "shape_of", std::get<1>(shapes), std::get<2>(shapes));
+    }
+}
+
+std::vector<Shapes> select_shapes {
+    {{{2, 2}, {1, 2}, {2, 1}}, {{1, 1, 2, 2}, {1, 1, 2, 2}, {1, 1, 2, 2}}, {{1, 1, 2, 2}}}
+};
+
+TEST(canonicalization, select) {
+    for (const auto& shapes : select_shapes) {
+        layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
+        layout input1_layout = create_default_layout(std::get<1>(shapes)[0]);
+        layout input2_layout = create_default_layout(std::get<2>(shapes)[0]);
+
+        topology topology;
+        topology.add(input_layout("mask", input0_layout));
+        topology.add(input_layout("input1", input1_layout));
+        topology.add(input_layout("input2", input2_layout));
+        topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2")));
+
+        canonicalization_test(topology, "select", std::get<1>(shapes), std::get<2>(shapes));
+    }
+}
+
+struct broadcast_params {
+    ov::Shape target_shape;
+    ov::AxisSet axes_mapping;
+    ov::op::BroadcastModeSpec broadcast_mode;
+};
+
+std::vector<std::pair<Shapes, broadcast_params>> broadcast_shapes_with_params {
+    {{{{5}}, {{1, 1, 5, 1}}, {{3, 1, 5, 1}}}, {{3, 1, 5}, {}, ov::op::BroadcastType::NUMPY}},
+    {{{{5}}, {{1, 1, 1, 1, 5}}, {{1, 2, 3, 4, 5}}}, {{1, 2, 3, 4, 5}, {}, ov::op::BroadcastType::NUMPY}},
+    {{{{3, 1}}, {{1, 1, 3, 1}}, {{1, 2, 3, 4}}}, {{1, 2, 3, 4}, {}, {ov::op::BroadcastType::PDPD, 2}}},
+    {{{{4, 1, 6}}, {{1, 1, 1, 4, 1, 6}}, {{1, 2, 3, 4, 5, 6}}}, {{1, 2, 3, 4, 5, 6}, {}, {ov::op::BroadcastType::PDPD, 3}}}
+};
+
+TEST(canonicalization, broadcast) {
+    for (const auto& params : broadcast_shapes_with_params) {
+        layout input0_layout = create_default_layout(std::get<0>(params.first)[0]);
+
+        topology topology;
+        topology.add(input_layout("input", input0_layout));
+        topology.add(broadcast("broadcast", input_info("input"), params.second.target_shape,
+                               params.second.axes_mapping, params.second.broadcast_mode));
+
+        canonicalization_test(topology, "broadcast", std::get<1>(params.first), std::get<2>(params.first));
+    }
+}
+
+std::vector<Shapes> eltwise_shapes {
+    {{{2, 2, 3}, {2, 3}}, {{2, 2, 3, 1}, {1, 2, 3, 1}}, {{2, 2, 3, 1}}},
+    {{{6}, {2, 3, 4, 5, 6}}, {{1, 1, 1, 1, 6}, {2, 3, 4, 5, 6}}, {{2, 3, 4, 5, 6}}}
+};
+
+TEST(canonicalization, eltwise) {
+    for (const auto& shapes : eltwise_shapes) {
+        layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
+        layout input1_layout = create_default_layout(std::get<0>(shapes)[1]);
+
+        topology topology;
+        topology.add(input_layout("input0", input0_layout));
+        topology.add(input_layout("input1", input1_layout));
+        topology.add(eltwise("eltwise", { input_info("input0"), input_info("input1") }, eltwise_mode::sum));
+
+        canonicalization_test(topology, "eltwise", std::get<1>(shapes), std::get<2>(shapes));
+    }
+}
+
+std::vector<Shapes> fully_connected_shapes {
+    {{{5, 2}, {5, 2}}, {{5, 2, 1, 1}, {5, 2, 1, 1}}, {{5, 5, 1, 1}}}
+};
+
+TEST(canonicalization, fully_connected) {
+    auto& engine = get_test_engine();
+    for (const auto& shapes : fully_connected_shapes) {
+        layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
+        auto weights_prim = engine.allocate_memory(create_default_layout(std::get<0>(shapes)[1]));
+
+        size_t input_rank = input0_layout.get_partial_shape().size();
+        size_t weights_rank = weights_prim->get_layout().get_partial_shape().size();
+
+        topology topology;
+        topology.add(input_layout("input", input0_layout));
+        topology.add(data("weights", weights_prim));
+        topology.add(fully_connected("fully_connected", input_info("input"), "weights", "", {}, input_rank, weights_rank));
+
+        canonicalization_test(topology, "fully_connected", std::get<1>(shapes), std::get<2>(shapes));
+    }
+}
+
+std::vector<Shapes> gemm_shapes {
+    {{{1, 5}, {5, 2}}, {{1, 1, 1, 5}, {1, 1, 5, 2}}, {{1, 1, 1, 2}}}
+};
+
+TEST(canonicalization, gemm) {
+    for (const auto& shapes : gemm_shapes) {
+        layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
+        layout input1_layout = create_default_layout(std::get<0>(shapes)[1]);
+
+        size_t input_rank = input0_layout.get_partial_shape().size();
+        size_t weights_rank = input1_layout.get_partial_shape().size();
+
+        topology topology;
+        topology.add(input_layout("input0", input0_layout));
+        topology.add(input_layout("input1", input1_layout));
+        topology.add(gemm("gemm", {input_info("input0"), input_info("input1")},
+                          data_types::f32, false, false, 1.0f, 0.0f, input_rank, weights_rank));
+
+        canonicalization_test(topology, "gemm", std::get<1>(shapes), std::get<2>(shapes));
+    }
+}
+
+struct fusing_gemm_eltwise_params {
+    ov::PartialShape input_gemm_first;
+    ov::PartialShape weights_gemm_first;
+    ov::PartialShape input_gemm_second;
+    ov::PartialShape weights_gemm_second;
+};
+
+std::vector<std::pair<Shapes, fusing_gemm_eltwise_params>> fusing_gemm_eltwise_shapes_with_params {
+    {
+        {{/* placeholder */}, {{1, 1, 1, 4, 4}}, {{1, 1, 1, 4, 4}}},
+        {{1, 1, 1, 4, 5}, {1, 1, 1, 5, 4}, {1, 1, 4, 5}, {1, 1, 5, 4}}
+    }
+};
+
+TEST(canonicalization, fusing_gemm_eltwise) {
+    for (const auto& shapes : fusing_gemm_eltwise_shapes_with_params) {
+        layout input_layout_first = create_default_layout(shapes.second.input_gemm_first);
+        layout weights_layout_first = create_default_layout(shapes.second.weights_gemm_first);
+
+        layout input_layout_second = create_default_layout(shapes.second.input_gemm_second);
+        layout weights_layout_second = create_default_layout(shapes.second.weights_gemm_second);
+
+        size_t input_rank_first = input_layout_first.get_partial_shape().size();
+        size_t weights_rank_first = weights_layout_first.get_partial_shape().size();
+
+        size_t input_rank_second = input_layout_second.get_partial_shape().size();
+        size_t weights_rank_second = weights_layout_second.get_partial_shape().size();
+
+        size_t out_rank = std::max(std::max(input_rank_first, weights_rank_first),
+                                   std::max(input_rank_second, weights_rank_second));
+
+        topology topology;
+        topology.add(input_layout("input_first", input_layout_first));
+        topology.add(input_layout("weights_first", weights_layout_first));
+        topology.add(input_layout("input_second", input_layout_second));
+        topology.add(input_layout("weights_second", weights_layout_second));
+
+        topology.add(gemm("gemm_first", {input_info("input_first"), input_info("weights_first")},
+                          data_types::f32, false, false, 1.0f, 0.0f, input_rank_first, weights_rank_first));
+
+        topology.add(gemm("gemm_second", {input_info("input_second"), input_info("weights_second")},
+                          data_types::f32, false, false, 1.0f, 0.0f, input_rank_second, weights_rank_second));
+
+        topology.add(eltwise("sum", {input_info("gemm_first"), input_info("gemm_second")}, eltwise_mode::sum));
+        topology.add(reorder("out_reorder", input_info("sum"), format::get_default_format(out_rank), data_types::f32));
+
+        canonicalization_test(topology, "out_reorder", std::get<1>(shapes.first), std::get<2>(shapes.first), true);
+    }
+}
+
+} // namespace
diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
index b1721f2b7f1ce3..77e88ddb3fc53b 100644
--- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
@@ -1356,6 +1356,124 @@ TEST(eltwise_gpu_f32, dynamic_kernel_broadcast) {
     }
 }
 
+TEST(eltwise_gpu_f32, dynamic_kernel_broadcast_mixed_ranks_3d_2d) {
+    auto& engine = get_test_engine();
+
+    ov::PartialShape in1_shape = {3, 1, 5};
+    ov::PartialShape in2_shape = {1, 5};
+    auto in1_layout = layout{{-1, -1, 5}, data_types::f32, format::bfyx};
+    auto in2_layout = layout{{-1, 5}, data_types::f32, format::bfyx};
+    auto in1_mem_layout = layout{in1_shape, data_types::f32, format::bfyx};
+    auto in2_mem_layout = layout{in2_shape, data_types::f32, format::bfyx};
+    auto input1 = engine.allocate_memory(in1_mem_layout);
+    auto input2 = engine.allocate_memory(in2_mem_layout);
+
+    topology topology;
+    topology.add(input_layout("input1", in1_layout));
+    topology.add(input_layout("input2", in2_layout));
+    topology.add(eltwise("eltwise", { input_info("input1"), input_info("input2") }, eltwise_mode::sum));
+
+    set_values(input1, {
+        1.f,  0.f , 5.f,  1.5f, 2.f,
+        0.f,  6.f, 5.2f,   3.f, 0.5f,
+        7.f, 12.f,  4.f, -0.5f, 8.f
+    });
+
+    set_values(input2, { 0.5f, -0.5f, 1.0f, -1.0f, 2.f });
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    network network(engine, topology, config);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+
+    auto inst = network.get_primitive("eltwise");
+
+    auto impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_TRUE(impl->is_dynamic());
+
+    auto outputs = network.execute();
+
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    ov::PartialShape expected_shape{3, 1, 5};
+
+    ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
+
+    float answers[15] = { 1.5f, -0.5f, 6.f,  0.5f, 4.f,
+                          0.5f,  5.5f, 6.2f,  2.f, 2.5f,
+                          7.5f, 11.5f, 5.f,  -1.5f, 10.f };
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (int i = 0; i < 15; i++) {
+        ASSERT_EQ(answers[i], output_ptr[i]) << "i = " << i;
+    }
+}
+
+TEST(eltwise_gpu_f32, dynamic_kernel_broadcast_mixed_ranks_5d_2d) {
+    auto& engine = get_test_engine();
+
+    ov::PartialShape in1_shape = {1, 1, 3, 1, 5};
+    ov::PartialShape in2_shape = {1, 5};
+    auto in1_layout = layout{{1, 1, -1, -1, 5}, data_types::f32, format::bfzyx};
+    auto in2_layout = layout{{-1, 5}, data_types::f32, format::bfyx};
+    auto in1_mem_layout = layout{in1_shape, data_types::f32, format::bfzyx};
+    auto in2_mem_layout = layout{in2_shape, data_types::f32, format::bfyx};
+    auto input1 = engine.allocate_memory(in1_mem_layout);
+    auto input2 = engine.allocate_memory(in2_mem_layout);
+
+    topology topology;
+    topology.add(input_layout("input1", in1_layout));
+    topology.add(input_layout("input2", in2_layout));
+    topology.add(eltwise("eltwise", { input_info("input1"), input_info("input2") }, eltwise_mode::sum));
+
+    set_values(input1, {
+        1.f,  0.f , 5.f,  1.5f, 2.f,
+        0.f,  6.f, 5.2f,   3.f, 0.5f,
+        7.f, 12.f,  4.f, -0.5f, 8.f
+    });
+
+    set_values(input2, { 0.5f, -0.5f, 1.0f, -1.0f, 2.f });
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    network network(engine, topology, config);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+
+    auto inst = network.get_primitive("eltwise");
+
+    auto impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_TRUE(impl->is_dynamic());
+
+    auto outputs = network.execute();
+
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    ov::PartialShape expected_shape{1, 1, 3, 1, 5};
+
+    ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
+
+    float answers[15] = { 1.5f, -0.5f, 6.f,  0.5f, 4.f,
+                          0.5f,  5.5f, 6.2f,  2.f, 2.5f,
+                          7.5f, 11.5f, 5.f,  -1.5f, 10.f };
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (int i = 0; i < 15; i++) {
+        ASSERT_EQ(answers[i], output_ptr[i]) << "i = " << i;
+    }
+}
+
 TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
     //  Input2   : 2x2x2
     //  Input  : 2x2x2x2
diff --git a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
index 04f5d34b531638..b9a0c71901c01d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp
@@ -74,8 +74,8 @@ TEST(check_hash_value, fc_basic) {
     const auto primitive_hash = primitve->hash();
     const auto params_hash = prog_node.type()->get_fake_aligned_params(*prog_node.get_kernel_impl_params()).hash();
 
-    ASSERT_EQ(primitive_hash, 7881065839556591629UL);
-    ASSERT_EQ(params_hash, 12327057149074647711UL);
+    ASSERT_EQ(primitive_hash, 2197080758510296176UL);
+    ASSERT_EQ(params_hash, 5241462399408562393UL);
 }
 
 TEST(check_hash_value, gather_basic) {
diff --git a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
index c041372e0d7ab7..10bb0ba0a0f13e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp
@@ -127,7 +127,7 @@ TEST(shape_of_gpu, dynamic) {
 
     cldnn::topology topology;
     topology.add(input_layout("input", in_layout));
-    topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32));
+    topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
 
     ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

From 5e9ea6a146819227c61f2d877775658312172560 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Mon, 27 Mar 2023 15:10:00 +0400
Subject: [PATCH 112/296] [TF FE] Refactor utils routine (#16554)

Move all openvino_conversion rountines into utils. Avoid using Squeeze without axis
that can create dynamic output rank

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/src/op_table.hpp     |   1 -
 .../include/common_op_table.hpp               |   1 -
 .../include/openvino_conversions.hpp          |  60 -----
 .../tensorflow_common/include/utils.hpp       |  50 +++++
 .../tensorflow_common/src/op/concat.cpp       |   2 +-
 .../src/op/ctc_greedy_decoder.cpp             |   2 +-
 .../tensorflow_common/src/op/ctc_loss.cpp     |   2 +-
 .../tensorflow_common/src/op/max_pool.cpp     |   2 +-
 .../src/openvino_conversions.cpp              |  65 ------
 .../src/pass/transpose_sinking.cpp            |   1 -
 src/frontends/tensorflow_common/src/utils.cpp | 208 +++++++++++-------
 .../src/op/op_translation_utils.cpp           |   1 -
 .../src/op/op_translation_utils.hpp           |   1 -
 .../tensorflow_lite/src/op_table.hpp          |   1 -
 14 files changed, 184 insertions(+), 213 deletions(-)
 delete mode 100644 src/frontends/tensorflow_common/include/openvino_conversions.hpp
 delete mode 100644 src/frontends/tensorflow_common/src/openvino_conversions.cpp

diff --git a/src/frontends/tensorflow/src/op_table.hpp b/src/frontends/tensorflow/src/op_table.hpp
index cac20c1ebe10ad..6c0d5f9019deb7 100644
--- a/src/frontends/tensorflow/src/op_table.hpp
+++ b/src/frontends/tensorflow/src/op_table.hpp
@@ -10,7 +10,6 @@
 
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/tensorflow/node_context.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 namespace ov {
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
index 20fefeabd811b4..87afdca960f003 100644
--- a/src/frontends/tensorflow_common/include/common_op_table.hpp
+++ b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -10,7 +10,6 @@
 
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/node_context.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 #define TENSORFLOW_OP_VALIDATION(node_context, ...)                                        \
diff --git a/src/frontends/tensorflow_common/include/openvino_conversions.hpp b/src/frontends/tensorflow_common/include/openvino_conversions.hpp
deleted file mode 100644
index 90ad15398bf7eb..00000000000000
--- a/src/frontends/tensorflow_common/include/openvino_conversions.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-
-#include "openvino/opsets/opset8.hpp"
-
-namespace ov {
-namespace frontend {
-namespace tensorflow {
-
-std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
-                                                      const ov::AxisVector& input_order);
-
-std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
-                                                  const std::vector<int64_t>& new_shape);
-
-namespace detail {
-template <typename T>
-void convert_nhwc_to_hw(const std::vector<T>& src, std::vector<size_t>& dst) {
-    if (dst.size() >= 2) {
-        dst[0] = src[1];
-        dst[1] = src[2];
-    }
-    if (dst.size() >= 3) {
-        dst[2] = src[3];
-    }
-}
-
-template <typename T>
-void convert_nchw_to_hw(const std::vector<T>& src, std::vector<size_t>& dst) {
-    if (dst.size() >= 2) {
-        dst[0] = src[2];
-        dst[1] = src[3];
-    }
-    if (dst.size() >= 3) {
-        dst[2] = src[4];
-    }
-}
-}  // namespace detail
-
-void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank = ov::Rank::dynamic());
-
-void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank = ov::Rank::dynamic());
-
-template <typename T>
-void convert_nhwc_to_hw(bool is_nhwc, const std::vector<T>& src, std::vector<size_t>& dst) {
-    if (is_nhwc) {
-        detail::convert_nhwc_to_hw(src, dst);
-    } else {
-        detail::convert_nchw_to_hw(src, dst);
-    }
-}
-
-}  // namespace tensorflow
-}  // namespace frontend
-}  // namespace ov
diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp
index 06e68771aef80b..4f2eaa17f392d8 100644
--- a/src/frontends/tensorflow_common/include/utils.hpp
+++ b/src/frontends/tensorflow_common/include/utils.hpp
@@ -98,6 +98,56 @@ ov::op::PadMode convert_padding_mode(const NodeContext& node, const std::string&
 Output<Node> compute_subgraph_scalar_rank(const Output<Node>& output,
                                           element::Type output_type,
                                           bool as_scalar = false);
+
+std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
+                                                      const ov::AxisVector& input_order);
+
+std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
+                                                  const std::vector<int64_t>& new_shape);
+
+template <typename T>
+void convert_nhwc_to_hw(const std::vector<T>& src, std::vector<size_t>& dst) {
+    if (dst.size() >= 2) {
+        FRONT_END_GENERAL_CHECK(src.size() > 2,
+                                "[TensorFlow Frontend] Internal error: source vector size must be greater than 2.");
+        dst[0] = src[1];
+        dst[1] = src[2];
+    }
+    if (dst.size() >= 3) {
+        FRONT_END_GENERAL_CHECK(src.size() > 3,
+                                "[TensorFlow Frontend] Internal error: source vector size must be greater than 3.");
+        dst[2] = src[3];
+    }
+}
+
+template <typename T>
+void convert_nchw_to_hw(const std::vector<T>& src, std::vector<size_t>& dst) {
+    if (dst.size() >= 2) {
+        FRONT_END_GENERAL_CHECK(src.size() > 3,
+                                "[TensorFlow Frontend] Internal error: source vector size must be greater than 3.");
+        dst[0] = src[2];
+        dst[1] = src[3];
+    }
+    if (dst.size() >= 3) {
+        FRONT_END_GENERAL_CHECK(src.size() > 4,
+                                "[TensorFlow Frontend] Internal error: source vector size must be greater than 4.");
+        dst[2] = src[4];
+    }
+}
+
+void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank = ov::Rank::dynamic());
+
+void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank = ov::Rank::dynamic());
+
+template <typename T>
+void convert_nhwc_to_hw(bool is_nhwc, const std::vector<T>& src, std::vector<size_t>& dst) {
+    if (is_nhwc) {
+        convert_nhwc_to_hw(src, dst);
+    } else {
+        convert_nchw_to_hw(src, dst);
+    }
+}
+
 }  // namespace tensorflow
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/op/concat.cpp b/src/frontends/tensorflow_common/src/op/concat.cpp
index 07c4844b010d06..812a593fd2f3e3 100644
--- a/src/frontends/tensorflow_common/src/op/concat.cpp
+++ b/src/frontends/tensorflow_common/src/op/concat.cpp
@@ -9,7 +9,7 @@ using namespace std;
 using namespace ov;
 using namespace opset8;
 using namespace ov::frontend;
-using namespace frontend::tensorflow::detail;
+using namespace ov::frontend::tensorflow;
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
index 49f2c35eeec46d..773fdacd526ac2 100644
--- a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
+++ b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
@@ -10,7 +10,7 @@ using namespace std;
 using namespace ov;
 using namespace opset8;
 using namespace frontend;
-using namespace frontend::tensorflow::detail;
+using namespace frontend::tensorflow;
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp
index 84f2f6914f35e5..b4b72bb407fa0c 100644
--- a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp
+++ b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp
@@ -9,7 +9,7 @@ using namespace std;
 using namespace ov;
 using namespace opset8;
 using namespace ov::frontend;
-using namespace frontend::tensorflow::detail;
+using namespace frontend::tensorflow;
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow_common/src/op/max_pool.cpp b/src/frontends/tensorflow_common/src/op/max_pool.cpp
index 6d698b6c3b88cf..e0acc482075b6b 100644
--- a/src/frontends/tensorflow_common/src/op/max_pool.cpp
+++ b/src/frontends/tensorflow_common/src/op/max_pool.cpp
@@ -8,7 +8,7 @@
 
 using namespace std;
 using namespace ov;
-using namespace ov::frontend::tensorflow::detail;
+using namespace ov::frontend::tensorflow;
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow_common/src/openvino_conversions.cpp b/src/frontends/tensorflow_common/src/openvino_conversions.cpp
deleted file mode 100644
index 6fcaecd1df9a25..00000000000000
--- a/src/frontends/tensorflow_common/src/openvino_conversions.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "openvino_conversions.hpp"
-
-#include "utils.hpp"
-
-namespace ov {
-namespace frontend {
-namespace tensorflow {
-
-void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
-    if (need_convert) {
-        if (input_rank.is_dynamic()) {
-            // TODO: use ShapeOf sub-graph to generate permutation vector
-            OPENVINO_ASSERT(node.get_partial_shape().rank().is_static(),
-                            "For conversion into the first channel format, the input rank must be static or determined "
-                            "based on the operation.");
-            input_rank = node.get_partial_shape().rank();
-        }
-        auto rank_value = input_rank.get_length();
-        if (rank_value == 4) {
-            node = make_transpose(node, {0, 3, 1, 2});
-        } else if (rank_value == 5) {
-            node = make_transpose(node, {0, 4, 1, 2, 3});
-        }
-    }
-}
-
-void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
-    if (need_convert) {
-        if (input_rank.is_dynamic()) {
-            // TODO: use ShapeOf sub-graph to generate permutation vector
-            OPENVINO_ASSERT(node.get_partial_shape().rank().is_static(),
-                            "For conversion into the last channel format, the input rank must be static or determined "
-                            "based on the operation.");
-            input_rank = node.get_partial_shape().rank();
-        }
-        auto rank_value = input_rank.get_length();
-        if (rank_value == 4) {
-            node = make_transpose(node, {0, 2, 3, 1});
-        } else if (rank_value == 5) {
-            node = make_transpose(node, {0, 2, 3, 4, 1});
-        }
-    }
-}
-
-std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
-                                                      const ov::AxisVector& input_order) {
-    auto order = std::make_shared<ov::opset8::Constant>(element::i64, Shape{input_order.size()}, input_order);
-    auto transpose = std::make_shared<ov::opset8::Transpose>(arg, order);
-    return transpose;
-}
-
-std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
-                                                  const std::vector<int64_t>& new_shape) {
-    auto new_shape_node = std::make_shared<ov::opset8::Constant>(element::i64, Shape{new_shape.size()}, new_shape);
-    auto reshape = std::make_shared<ov::opset8::Reshape>(arg, new_shape_node, true);
-    return reshape;
-}
-
-}  // namespace tensorflow
-}  // namespace frontend
-}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp b/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
index 3b570d14b265ec..5b8c5ba3a8b80e 100644
--- a/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
+++ b/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
@@ -9,7 +9,6 @@
 #include "openvino/pass/pattern/op/label.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/log.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 using namespace std;
diff --git a/src/frontends/tensorflow_common/src/utils.cpp b/src/frontends/tensorflow_common/src/utils.cpp
index 13eded0e5d9af9..a84159406296ac 100644
--- a/src/frontends/tensorflow_common/src/utils.cpp
+++ b/src/frontends/tensorflow_common/src/utils.cpp
@@ -6,65 +6,67 @@
 
 #include <limits>
 
+#include "common_op_table.hpp"
 #include "openvino/opsets/opset10.hpp"
 #include "openvino/opsets/opset8.hpp"
-#include "openvino_conversions.hpp"
 
 using namespace ov;
 using namespace ov::op;
 using namespace ov::opset10;
-using namespace ov::opset8;
 using namespace std;
 using namespace ov::frontend::tensorflow;
 
-void ov::frontend::tensorflow::set_node_name(const std::string& node_name, const std::shared_ptr<Node>& node) {
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+void set_node_name(const string& node_name, const shared_ptr<Node>& node) {
     const auto& outputs = node->outputs();
     node->set_friendly_name(node_name);
     if (outputs.size() == 1) {
         set_out_name(node_name, outputs[0]);
     }
     for (size_t idx = 0; idx < outputs.size(); ++idx) {
-        set_out_name({node_name + ":" + std::to_string(idx)}, outputs[idx]);
+        set_out_name({node_name + ":" + to_string(idx)}, outputs[idx]);
     }
 }
 
-void ov::frontend::tensorflow::set_out_name(const std::string& out_name, const ov::Output<ov::Node>& output) {
+void set_out_name(const string& out_name, const Output<Node>& output) {
     output.get_tensor().add_names({out_name});
 }
 
-ov::op::PadType ov::frontend::tensorflow::convert_tf_padding(const ov::frontend::NodeContext& node,
-                                                             const std::string& tf_padding) {
-    std::set<std::string> supported_ops = {"Conv2D",
-                                           "Conv2DBackpropInput",
-                                           "Conv3D",
-                                           "Conv3DBackpropInputV2",
-                                           "MaxPool",
-                                           "MaxPoolV2",
-                                           "MaxPool3D",
-                                           "ExtractImagePatches",
-                                           "DepthwiseConv2dNative",
-                                           "AvgPool",
-                                           "AvgPool3D"};
+PadType convert_tf_padding(const ov::frontend::NodeContext& node, const string& tf_padding) {
+    set<string> supported_ops = {"Conv2D",
+                                 "Conv2DBackpropInput",
+                                 "Conv3D",
+                                 "Conv3DBackpropInputV2",
+                                 "MaxPool",
+                                 "MaxPoolV2",
+                                 "MaxPool3D",
+                                 "ExtractImagePatches",
+                                 "DepthwiseConv2dNative",
+                                 "AvgPool",
+                                 "AvgPool3D"};
     auto op_type = node.get_op_type();
     TENSORFLOW_OP_VALIDATION(
         node,
         supported_ops.count(op_type),
         "OpenVINO TensorFlow Frontend does not support conversion of padding type for " + op_type + " operation.");
 
-    std::set<std::string> supported_modes = {"VALID", "SAME", "EXPLICIT"};
+    set<string> supported_modes = {"VALID", "SAME", "EXPLICIT"};
     TENSORFLOW_OP_VALIDATION(node,
                              supported_modes.count(tf_padding),
                              "OpenVINO TensorFlow Frontend does not support " + tf_padding + " padding mode.");
 
     if (tf_padding == "VALID") {
-        return ov::op::PadType::VALID;
+        return PadType::VALID;
     }
     if (op_type == "Conv2DBackpropInput" || op_type == "Conv3DBackpropInputV2") {
         if (tf_padding == "SAME") {
             // According to the formulas for calculating auto_pad values of the
             // ConvBackpropData layer in the Operation specification,
             // the SAME_LOWER value matches to the SAME value in TensorFlow
-            return ov::op::PadType::SAME_LOWER;
+            return PadType::SAME_LOWER;
         }
     } else if (op_type == "Conv2D" || op_type == "Conv3D" || op_type == "MaxPool" || op_type == "MaxPoolV2" ||
                op_type == "MaxPool3D" || op_type == "ExtractImagePatches" || op_type == "DepthwiseConv2dNative" ||
@@ -73,20 +75,20 @@ ov::op::PadType ov::frontend::tensorflow::convert_tf_padding(const ov::frontend:
             // According to the formulas for calculating auto_pad values of the
             // Conv layer in the Operation specification,
             // the SAME_UPPER value matches to the SAME value in TensorFlow
-            return ov::op::PadType::SAME_UPPER;
+            return PadType::SAME_UPPER;
         }
     }
 
-    return ov::op::PadType::EXPLICIT;
+    return PadType::EXPLICIT;
 }
 
-void ov::frontend::tensorflow::fill_explicit_pads_vectors(const ov::frontend::NodeContext& node,
-                                                          bool is_nhwc,
-                                                          size_t spatial_dims_num,
-                                                          const std::vector<int64_t>& tf_explicit_paddings,
-                                                          ov::CoordinateDiff& pads_begin,
-                                                          ov::CoordinateDiff& pads_end) {
-    auto fullfill_pads = [&](ov::CoordinateDiff& pads, const std::vector<int64_t>& indexes) {
+void fill_explicit_pads_vectors(const ov::frontend::NodeContext& node,
+                                bool is_nhwc,
+                                size_t spatial_dims_num,
+                                const vector<int64_t>& tf_explicit_paddings,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end) {
+    auto fullfill_pads = [&](CoordinateDiff& pads, const vector<int64_t>& indexes) {
         pads.resize(indexes.size());
         for (size_t i = 0; i < indexes.size(); ++i) {
             pads[i] = tf_explicit_paddings[indexes[i]];
@@ -128,8 +130,7 @@ void ov::frontend::tensorflow::fill_explicit_pads_vectors(const ov::frontend::No
     }
 }
 
-ov::OutputVector ov::frontend::tensorflow::translate_convolution_op(const ov::frontend::NodeContext& node,
-                                                                    size_t spatial_dims_num) {
+OutputVector translate_convolution_op(const ov::frontend::NodeContext& node, size_t spatial_dims_num) {
     TENSORFLOW_OP_VALIDATION(node,
                              spatial_dims_num == 2 || spatial_dims_num == 3,
                              "Conv2D or Conv3D are supported only.");
@@ -138,20 +139,20 @@ ov::OutputVector ov::frontend::tensorflow::translate_convolution_op(const ov::fr
     auto filter = node.get_input(1);
 
     // retrieve attributes for Conv2D
-    auto tf_strides = node.get_attribute<std::vector<int64_t>>("strides");
-    auto tf_padding_type = node.get_attribute<std::string>("padding");
-    ov::op::PadType auto_pad = convert_tf_padding(node, tf_padding_type);
+    auto tf_strides = node.get_attribute<vector<int64_t>>("strides");
+    auto tf_padding_type = node.get_attribute<string>("padding");
+    PadType auto_pad = convert_tf_padding(node, tf_padding_type);
 
     // retrieve optional attributes
-    auto tf_data_format = node.get_attribute<std::string>("data_format", spatial_dims_num == 2 ? "NHWC" : "NDHWC");
-    auto tf_explicit_paddings = std::vector<int64_t>{};
-    if (auto_pad == ov::op::PadType::EXPLICIT) {
-        tf_explicit_paddings = node.get_attribute<std::vector<int64_t>>("explicit_paddings", {});
+    auto tf_data_format = node.get_attribute<string>("data_format", spatial_dims_num == 2 ? "NHWC" : "NDHWC");
+    auto tf_explicit_paddings = vector<int64_t>{};
+    if (auto_pad == PadType::EXPLICIT) {
+        tf_explicit_paddings = node.get_attribute<vector<int64_t>>("explicit_paddings", {});
     }
-    std::vector<int64_t> dilation_2d = {1, 1, 1, 1};
-    std::vector<int64_t> dilation_3d = {1, 1, 1, 1, 1};
+    vector<int64_t> dilation_2d = {1, 1, 1, 1};
+    vector<int64_t> dilation_3d = {1, 1, 1, 1, 1};
     auto tf_dilations =
-        node.get_attribute<std::vector<int64_t>>("dilations", spatial_dims_num == 2 ? dilation_2d : dilation_3d);
+        node.get_attribute<vector<int64_t>>("dilations", spatial_dims_num == 2 ? dilation_2d : dilation_3d);
 
     bool is_nhwc = true;
     if (spatial_dims_num == 2) {
@@ -167,22 +168,22 @@ ov::OutputVector ov::frontend::tensorflow::translate_convolution_op(const ov::fr
     }
 
     // prepare attributes for OpenVINO Convolution operation
-    ov::Strides strides(spatial_dims_num);
-    ov::Strides dilations(spatial_dims_num);
-    ov::frontend::tensorflow::convert_nhwc_to_hw(is_nhwc, tf_strides, strides);
-    ov::frontend::tensorflow::convert_nhwc_to_hw(is_nhwc, tf_dilations, dilations);
-
-    ov::CoordinateDiff pads_begin;
-    ov::CoordinateDiff pads_end;
-    if (auto_pad == ov::op::PadType::EXPLICIT) {
+    Strides strides(spatial_dims_num);
+    Strides dilations(spatial_dims_num);
+    convert_nhwc_to_hw(is_nhwc, tf_strides, strides);
+    convert_nhwc_to_hw(is_nhwc, tf_dilations, dilations);
+
+    CoordinateDiff pads_begin;
+    CoordinateDiff pads_end;
+    if (auto_pad == PadType::EXPLICIT) {
         fill_explicit_pads_vectors(node, is_nhwc, spatial_dims_num, tf_explicit_paddings, pads_begin, pads_end);
     }
 
     // prepare inputs to Convolution
-    ov::frontend::tensorflow::convert_nhwc_to_nchw(is_nhwc, input, ov::Rank(spatial_dims_num + 2));
-    ov::AxisVector permutation_2d = {3, 2, 0, 1};
-    ov::AxisVector permutation_3d = {4, 3, 0, 1, 2};
-    filter = ov::frontend::tensorflow::make_transpose(filter, spatial_dims_num == 2 ? permutation_2d : permutation_3d);
+    convert_nhwc_to_nchw(is_nhwc, input, Rank(spatial_dims_num + 2));
+    AxisVector permutation_2d = {3, 2, 0, 1};
+    AxisVector permutation_3d = {4, 3, 0, 1, 2};
+    filter = make_transpose(filter, spatial_dims_num == 2 ? permutation_2d : permutation_3d);
 
     bool input_channels_static = false;
     int64_t num_groups = 1;
@@ -206,11 +207,11 @@ ov::OutputVector ov::frontend::tensorflow::translate_convolution_op(const ov::fr
         }
     }
 
-    ov::Output<ov::Node> conv;
+    Output<Node> conv;
     if (input_channels_static && num_groups == 1) {
         // regular convolutional operation
         // we assume that input channel size will not be changed if they are already static
-        conv = std::make_shared<Convolution>(input, filter, strides, pads_begin, pads_end, dilations, auto_pad);
+        conv = make_shared<Convolution>(input, filter, strides, pads_begin, pads_end, dilations, auto_pad);
     } else {
         // grouped convolutional operation
         // compute input channels given from the input and the filter
@@ -225,59 +226,57 @@ ov::OutputVector ov::frontend::tensorflow::translate_convolution_op(const ov::fr
         auto num_groups = make_shared<Divide>(input_cin, filter_cin);
 
         // reshape the filter based on the number of groups information
-        auto int_max_const = make_shared<Constant>(element::i32, Shape{1}, std::numeric_limits<int>::max());
+        auto int_max_const = make_shared<Constant>(element::i32, Shape{1}, numeric_limits<int>::max());
         auto filter_cout = make_shared<Slice>(filter_shape, zero_const, one_const, one_const);
         auto filter_new_cout = make_shared<Divide>(filter_cout, num_groups);
         auto shape_cin_xy = make_shared<Slice>(filter_shape, one_const, int_max_const, one_const);
         auto filter_new_shape = make_shared<Concat>(OutputVector{num_groups, filter_new_cout, shape_cin_xy}, 0);
         auto new_filter = make_shared<Reshape>(filter, filter_new_shape, false);
-        conv =
-            std::make_shared<GroupConvolution>(input, new_filter, strides, pads_begin, pads_end, dilations, auto_pad);
+        conv = make_shared<GroupConvolution>(input, new_filter, strides, pads_begin, pads_end, dilations, auto_pad);
     }
 
-    ov::frontend::tensorflow::convert_nchw_to_nhwc(is_nhwc, conv, ov::Rank(spatial_dims_num + 2));
-    ov::frontend::tensorflow::set_node_name(node.get_name(), conv.get_node_shared_ptr());
+    convert_nchw_to_nhwc(is_nhwc, conv, Rank(spatial_dims_num + 2));
+    set_node_name(node.get_name(), conv.get_node_shared_ptr());
     return {conv};
 }
 
-void ov::frontend::tensorflow::default_op_checks(const ov::frontend::NodeContext& node,
-                                                 size_t min_input_size,
-                                                 const std::vector<std::string>& supported_ops) {
+void default_op_checks(const ov::frontend::NodeContext& node,
+                       size_t min_input_size,
+                       const vector<string>& supported_ops) {
     auto op_type = node.get_op_type();
     TENSORFLOW_OP_VALIDATION(node,
-                             std::find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(),
+                             find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(),
                              op_type + " is not supported for conversion.");
     TENSORFLOW_OP_VALIDATION(node,
                              node.get_input_size() >= min_input_size,
-                             op_type + " must have at least " + std::to_string(min_input_size) + " inputs.");
+                             op_type + " must have at least " + to_string(min_input_size) + " inputs.");
 }
 
-bool ov::frontend::tensorflow::is_conditional_edge(const std::string& input_tensor_name) {
+bool is_conditional_edge(const string& input_tensor_name) {
     return input_tensor_name.length() > 0 && input_tensor_name[0] == '^';
 }
 
-ov::Output<ov::Node> ov::frontend::tensorflow::get_elements_number_1d(const ov::Output<ov::Node>& output,
-                                                                      ov::element::Type output_type,
-                                                                      ov::pass::NodeRegistry& rg) {
+Output<Node> get_elements_number_1d(const Output<Node>& output, element::Type output_type, pass::NodeRegistry& rg) {
     auto output_rank = output.get_partial_shape().rank();
     if (output_rank.is_static() && output_rank.get_length() != 1) {
         FRONT_END_OP_CONVERSION_CHECK(false,
                                       "Internal error: get_elements_number_1d method supports only 1D input tensor.");
     }
     auto shape = rg.make<ShapeOf>(output, output_type);
-    auto num_elements = rg.make<Squeeze>(shape);
+    auto const_zero = make_shared<Constant>(element::i32, Shape{}, 0);
+    auto num_elements = rg.make<Squeeze>(shape, const_zero);
     return num_elements;
 }
 
-PadMode ov::frontend::tensorflow::convert_padding_mode(const NodeContext& node, const std::string& padding_mode) {
-    std::set<std::string> supported_ops = {"MirrorPad"};
+PadMode convert_padding_mode(const NodeContext& node, const string& padding_mode) {
+    set<string> supported_ops = {"MirrorPad"};
     auto op_type = node.get_op_type();
     TENSORFLOW_OP_VALIDATION(
         node,
         supported_ops.count(op_type),
         "OpenVINO TensorFlow Frontend does not support conversion of padding mode for " + op_type + " operation.");
 
-    std::set<std::string> supported_modes = {"REFLECT", "SYMMETRIC"};
+    set<string> supported_modes = {"REFLECT", "SYMMETRIC"};
     TENSORFLOW_OP_VALIDATION(node,
                              supported_modes.count(padding_mode),
                              "OpenVINO TensorFlow Frontend does not support " + padding_mode + " padding mode.");
@@ -291,14 +290,67 @@ PadMode ov::frontend::tensorflow::convert_padding_mode(const NodeContext& node,
     return PadMode::REFLECT;
 }
 
-Output<Node> ov::frontend::tensorflow::compute_subgraph_scalar_rank(const Output<Node>& output,
-                                                                    element::Type output_type,
-                                                                    bool as_scalar) {
+Output<Node> compute_subgraph_scalar_rank(const Output<Node>& output, element::Type output_type, bool as_scalar) {
     auto shape_of = make_shared<opset10::ShapeOf>(output, output_type);
     auto rank_of = make_shared<opset10::ShapeOf>(shape_of, output_type);
 
     if (as_scalar) {
-        return make_shared<opset10::Squeeze>(rank_of);
+        auto const_zero = make_shared<opset10::Constant>(element::i32, Shape{}, 0);
+        return make_shared<opset10::Squeeze>(rank_of, const_zero);
     }
     return rank_of;
 }
+
+void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
+    if (need_convert) {
+        if (input_rank.is_dynamic()) {
+            // TODO: use ShapeOf sub-graph to generate permutation vector
+            OPENVINO_ASSERT(node.get_partial_shape().rank().is_static(),
+                            "For conversion into the first channel format, the input rank must be static or determined "
+                            "based on the operation.");
+            input_rank = node.get_partial_shape().rank();
+        }
+        auto rank_value = input_rank.get_length();
+        if (rank_value == 4) {
+            node = make_transpose(node, {0, 3, 1, 2});
+        } else if (rank_value == 5) {
+            node = make_transpose(node, {0, 4, 1, 2, 3});
+        }
+    }
+}
+
+void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
+    if (need_convert) {
+        if (input_rank.is_dynamic()) {
+            // TODO: use ShapeOf sub-graph to generate permutation vector
+            OPENVINO_ASSERT(node.get_partial_shape().rank().is_static(),
+                            "For conversion into the last channel format, the input rank must be static or determined "
+                            "based on the operation.");
+            input_rank = node.get_partial_shape().rank();
+        }
+        auto rank_value = input_rank.get_length();
+        if (rank_value == 4) {
+            node = make_transpose(node, {0, 2, 3, 1});
+        } else if (rank_value == 5) {
+            node = make_transpose(node, {0, 2, 3, 4, 1});
+        }
+    }
+}
+
+std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
+                                                      const ov::AxisVector& input_order) {
+    auto order = std::make_shared<ov::opset8::Constant>(element::i64, Shape{input_order.size()}, input_order);
+    auto transpose = std::make_shared<ov::opset8::Transpose>(arg, order);
+    return transpose;
+}
+
+std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
+                                                  const std::vector<int64_t>& new_shape) {
+    auto new_shape_node = std::make_shared<ov::opset8::Constant>(element::i64, Shape{new_shape.size()}, new_shape);
+    auto reshape = std::make_shared<ov::opset8::Reshape>(arg, new_shape_node, true);
+    return reshape;
+}
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp b/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
index b849c06f2594c6..0df83e39c33a45 100644
--- a/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
+++ b/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
@@ -10,7 +10,6 @@
 
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/tensorflow_lite/node_context.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 namespace ov {
diff --git a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
index e3a6f62ffc43d6..fddf10423d7448 100644
--- a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
+++ b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
@@ -13,7 +13,6 @@
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/tensorflow_lite/node_context.hpp"
 #include "openvino/opsets/opset10.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 namespace ov {
diff --git a/src/frontends/tensorflow_lite/src/op_table.hpp b/src/frontends/tensorflow_lite/src/op_table.hpp
index 9b92e69cb293d8..797e02c7722249 100644
--- a/src/frontends/tensorflow_lite/src/op_table.hpp
+++ b/src/frontends/tensorflow_lite/src/op_table.hpp
@@ -13,7 +13,6 @@
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/tensorflow_lite/node_context.hpp"
 #include "openvino/opsets/opset10.hpp"
-#include "openvino_conversions.hpp"
 #include "utils.hpp"
 
 namespace ov {

From 7ccf1c89cf5f6e565ca02d7109c7b5712f2b05dc Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 27 Mar 2023 16:54:50 +0200
Subject: [PATCH 113/296] DOCS shift to rst - Image Classification Async C++
 Sample & Image Classification Async Python* Sample (#16580)

---
 .../cpp/classification_sample_async/README.md | 305 ++++++++++--------
 .../classification_sample_async/README.md     | 248 +++++++-------
 2 files changed, 301 insertions(+), 252 deletions(-)

diff --git a/samples/cpp/classification_sample_async/README.md b/samples/cpp/classification_sample_async/README.md
index 3038a1e4291df4..4b460b40db4058 100644
--- a/samples/cpp/classification_sample_async/README.md
+++ b/samples/cpp/classification_sample_async/README.md
@@ -1,173 +1,200 @@
 # Image Classification Async C++ Sample {#openvino_inference_engine_samples_classification_sample_async_README}
 
-This sample demonstrates how to do inference of image classification models using Asynchronous Inference Request API.  
+@sphinxdirective
+
+This sample demonstrates how to do inference of image classification models using Asynchronous Inference Request API. 
+ 
 Models with only one input and output are supported.
 
-In addition to regular images, the sample also supports single-channel `ubyte` images as an input for LeNet model.
+In addition to regular images, the sample also supports single-channel ``ubyte`` images as an input for LeNet model.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Asynchronous Infer | `ov::InferRequest::start_async`, `ov::InferRequest::set_callback` | Do asynchronous inference with callback. |
-| Model Operations | `ov::Output::get_shape`, `ov::set_batch` |  Manage the model, operate with its batch size. Set batch size using input image count. |
-| Infer Request Operations | `ov::InferRequest::get_input_tensor` | Get an input tensor. |
-| Tensor Operations | `ov::shape_size`, `ov::Tensor::data` | Get a tensor shape size and its data. |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
-
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../../samples/python/classification_sample_async/README.md) |
-
-## How It Works
++--------------------------+-----------------------------------------------------------------------+----------------------------------------------------------------------------------------+
+| Feature                  | API                                                                   | Description                                                                            |
++==========================+=======================================================================+========================================================================================+
+| Asynchronous Infer       | ``ov::InferRequest::start_async``, ``ov::InferRequest::set_callback`` | Do asynchronous inference with callback.                                               |
++--------------------------+-----------------------------------------------------------------------+----------------------------------------------------------------------------------------+
+| Model Operations         | ``ov::Output::get_shape``, ``ov::set_batch``                          | Manage the model, operate with its batch size. Set batch size using input image count. |
++--------------------------+-----------------------------------------------------------------------+----------------------------------------------------------------------------------------+
+| Infer Request Operations | ``ov::InferRequest::get_input_tensor``                                | Get an input tensor.                                                                   |
++--------------------------+-----------------------------------------------------------------------+----------------------------------------------------------------------------------------+
+| Tensor Operations        | ``ov::shape_size``, ``ov::Tensor::data``                              | Get a tensor shape size and its data.                                                  |
++--------------------------+-----------------------------------------------------------------------+----------------------------------------------------------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
+
++----------------------------+-------------------------------------------------------------------------------------------------------+
+| Options                    | Values                                                                                                |
++============================+=======================================================================================================+
+| Validated Models           | :doc:`alexnet <omz_models_model_alexnet>`, :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`        |
++----------------------------+-------------------------------------------------------------------------------------------------------+
+| Model Format               | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)                       |
++----------------------------+-------------------------------------------------------------------------------------------------------+
+| Supported devices          | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                                  |
++----------------------------+-------------------------------------------------------------------------------------------------------+
+| Other language realization | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_classification_sample_async_README>` |
++----------------------------+-------------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command line parameters and loads the specified model and input images (or a
 folder with images) to the OpenVINO™ Runtime plugin. The batch size of the model is set according to the number of read images. The batch mode is an independent attribute on the asynchronous mode. Asynchronous mode works efficiently with any batch size.
 
-Then, the sample creates an inference request object and assigns completion callback for it. In scope of the completion callback
-handling the inference request is executed again.
+Then, the sample creates an inference request object and assigns completion callback for it. In scope of the completion callback handling the inference request is executed again.
 
 After that, the application starts inference for the first infer request and waits of 10th inference request execution being completed. The asynchronous mode might increase the throughput of the pictures.
 
 When inference is done, the application outputs data to the standard output stream. You can place labels in .labels file near the model to get pretty output.
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
-Run the application with the `-h` option to see the usage instructions:
+Run the application with the ``-h`` option to see the usage instructions:
 
-```
-classification_sample_async -h
-```
+.. code-block:: sh
 
-Usage instructions:
+   classification_sample_async -h
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
+Usage instructions:
 
-classification_sample_async [OPTION]
-Options:
+.. code-block:: sh
+
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   
+   classification_sample_async [OPTION]
+   Options:
+   
+       -h                      Print usage instructions.
+       -m "<path>"             Required. Path to an .xml file with a trained model.
+       -i "<path>"             Required. Path to a folder with images or path to image files: a .ubyte file for LeNet and a .bmp file for other models.
+       -d "<device>"           Optional. Specify the target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:<comma_separated_devices_list>" format to specify the HETERO plugin. Sample will look for a suitable plugin for the device specified.
+   
+   Available target devices: <devices>
 
-    -h                      Print usage instructions.
-    -m "<path>"             Required. Path to an .xml file with a trained model.
-    -i "<path>"             Required. Path to a folder with images or path to image files: a .ubyte file for LeNet and a .bmp file for other models.
-    -d "<device>"           Optional. Specify the target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:<comma_separated_devices_list>" format to specify the HETERO plugin. Sample will look for a suitable plugin for the device specified.
+To run the sample, you need to specify a model and image:
 
-Available target devices: <devices>
-```
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available `here <https://storage.openvinotoolkit.org/data/test_data>`.
 
-To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+  
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+  
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+Example
++++++++
 
-### Example
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+   .. code-block:: sh
+      
+      python -m pip install openvino-dev[caffe]
+   
 
 2. Download a pre-trained model using:
-   ```
-   omz_downloader --name googlenet-v1
-   ```
+
+   .. code-block:: sh
+      
+      omz_downloader --name googlenet-v1
+   
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name googlenet-v1
-   ```
-
-4. Perform inference of `dog.bmp` using `googlenet-v1` model on a `GPU`, for example:
-   ```
-   classification_sample_async -m googlenet-v1.xml -i dog.bmp -d GPU
-   ```
-
-## Sample Output
-
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Parsing input parameters
-[ INFO ] Files were added: 1
-[ INFO ]     /images/dog.bmp
-[ INFO ] Loading model files:
-[ INFO ] /models/googlenet-v1.xml
-[ INFO ] model name: GoogleNet
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {1, 3, 224, 224}
-[ INFO ]     outputs
-[ INFO ]         output name: prob
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1000}
-[ INFO ] Read input images
-[ INFO ] Set batch size 1
-[ INFO ] model name: GoogleNet
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: u8
-[ INFO ]         input shape: {1, 224, 224, 3}
-[ INFO ]     outputs
-[ INFO ]         output name: prob
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1000}
-[ INFO ] Loading model to the device GPU
-[ INFO ] Create infer request
-[ INFO ] Start inference (asynchronous executions)
-[ INFO ] Completed 1 async request execution
-[ INFO ] Completed 2 async request execution
-[ INFO ] Completed 3 async request execution
-[ INFO ] Completed 4 async request execution
-[ INFO ] Completed 5 async request execution
-[ INFO ] Completed 6 async request execution
-[ INFO ] Completed 7 async request execution
-[ INFO ] Completed 8 async request execution
-[ INFO ] Completed 9 async request execution
-[ INFO ] Completed 10 async request execution
-[ INFO ] Completed async requests execution
-
-Top 10 results:
-
-Image /images/dog.bmp
-
-classid probability
-------- -----------
-156     0.8935547
-218     0.0608215
-215     0.0217133
-219     0.0105667
-212     0.0018835
-217     0.0018730
-152     0.0018730
-157     0.0015745
-154     0.0012817
-220     0.0010099
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+
+   .. code-block:: sh
+      
+      omz_converter --name googlenet-v1
+
+4. Perform inference of ``dog.bmp`` using ``googlenet-v1`` model on a ``GPU``, for example:
+   
+   .. code-block:: sh
+       
+      classification_sample_async -m googlenet-v1.xml -i dog.bmp -d GPU
+
+Sample Output
+#############
+
+.. code-block:: sh
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Parsing input parameters
+   [ INFO ] Files were added: 1
+   [ INFO ]     /images/dog.bmp
+   [ INFO ] Loading model files:
+   [ INFO ] /models/googlenet-v1.xml
+   [ INFO ] model name: GoogleNet
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {1, 3, 224, 224}
+   [ INFO ]     outputs
+   [ INFO ]         output name: prob
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1000}
+   [ INFO ] Read input images
+   [ INFO ] Set batch size 1
+   [ INFO ] model name: GoogleNet
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: u8
+   [ INFO ]         input shape: {1, 224, 224, 3}
+   [ INFO ]     outputs
+   [ INFO ]         output name: prob
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1000}
+   [ INFO ] Loading model to the device GPU
+   [ INFO ] Create infer request
+   [ INFO ] Start inference (asynchronous executions)
+   [ INFO ] Completed 1 async request execution
+   [ INFO ] Completed 2 async request execution
+   [ INFO ] Completed 3 async request execution
+   [ INFO ] Completed 4 async request execution
+   [ INFO ] Completed 5 async request execution
+   [ INFO ] Completed 6 async request execution
+   [ INFO ] Completed 7 async request execution
+   [ INFO ] Completed 8 async request execution
+   [ INFO ] Completed 9 async request execution
+   [ INFO ] Completed 10 async request execution
+   [ INFO ] Completed async requests execution
+   
+   Top 10 results:
+   
+   Image /images/dog.bmp
+   
+   classid probability
+   ------- -----------
+   156     0.8935547
+   218     0.0608215
+   215     0.0217133
+   219     0.0105667
+   212     0.0018835
+   217     0.0018730
+   152     0.0018730
+   157     0.0015745
+   154     0.0012817
+   220     0.0010099
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+
diff --git a/samples/python/classification_sample_async/README.md b/samples/python/classification_sample_async/README.md
index d2182839e24aa2..c705ef86ba206e 100644
--- a/samples/python/classification_sample_async/README.md
+++ b/samples/python/classification_sample_async/README.md
@@ -1,144 +1,166 @@
 # Image Classification Async Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_classification_sample_async_README}
 
-This sample demonstrates how to do inference of image classification models using Asynchronous Inference Request API.  
-Models with only 1 input and output are supported.
-
-The following Python API is used in the application:
+@sphinxdirective
 
-| Feature            | API                                                                                                                                                                                                                       | Description               |
-| :----------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------------------------ |
-| Asynchronous Infer | [openvino.runtime.AsyncInferQueue], [openvino.runtime.AsyncInferQueue.set_callback], [openvino.runtime.AsyncInferQueue.start_async], [openvino.runtime.AsyncInferQueue.wait_all], [openvino.runtime.InferRequest.results] | Do asynchronous inference |
+This sample demonstrates how to do inference of image classification models using Asynchronous Inference Request API.
 
-Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](../hello_classification/README.md).
+Models with only 1 input and output are supported.
 
-| Options                    | Values                                                                   |
-| :------------------------- | :----------------------------------------------------------------------- |
-| Validated Models           | [alexnet](@ref omz_models_model_alexnet)                                 |
-| Model Format               | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx) |
-| Supported devices          | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md)        |
-| Other language realization | [C++](../../../samples/cpp/classification_sample_async/README.md)        |
+The following Python API is used in the application:
 
-## How It Works
++--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------+
+| Feature            | API                                                                                                                                                                                                       | Description               |
++====================+===========================================================================================================================================================================================================+===========================+
+| Asynchronous Infer | `openvino.runtime.AsyncInferQueue <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html>`__ ,                                                             | Do asynchronous inference |
+|                    | `openvino.runtime.AsyncInferQueue.set_callback <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.set_callback>`__ ,  |                           |
+|                    | `openvino.runtime.AsyncInferQueue.start_async <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.start_async>`__ ,    |                           |
+|                    | `openvino.runtime.AsyncInferQueue.wait_all <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.wait_all>`__ ,          |                           |
+|                    | `openvino.runtime.InferRequest.results <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.results>`__                       |                           |
++--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification Python* Sample <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>`.
+
++----------------------------+-----------------------------------------------------------------------------------+
+| Options                    | Values                                                                            |
++============================+===================================================================================+
+| Validated Models           | :doc:`alexnet <omz_models_model_alexnet>`                                         |
++----------------------------+-----------------------------------------------------------------------------------+
+| Model Format               | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx)         |
++----------------------------+-----------------------------------------------------------------------------------+
+| Supported devices          | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`              |
++----------------------------+-----------------------------------------------------------------------------------+
+| Other language realization | :doc:`C++ <openvino_inference_engine_samples_classification_sample_async_README>` |
++----------------------------+-----------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image(s) to the OpenVINO™ Runtime plugin, performs synchronous inference, and processes output data, logging each step in a standard output stream.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+
+Running
+#######
 
-## Running
+Run the application with the ``-h`` option to see the usage message:
 
-Run the application with the `-h` option to see the usage message:
+.. code-block:: sh
 
-```
-python classification_sample_async.py -h
-```
+   python classification_sample_async.py -h
 
 Usage message:
 
-```
-usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...]
-                                      [-d DEVICE]
-
-Options:
-  -h, --help            Show this help message and exit.
-  -m MODEL, --model MODEL
-                        Required. Path to an .xml or .onnx file with a trained
-                        model.
-  -i INPUT [INPUT ...], --input INPUT [INPUT ...]
-                        Required. Path to an image file(s).
-  -d DEVICE, --device DEVICE
-                        Optional. Specify the target device to infer on; CPU,
-                        GPU or HETERO: is acceptable. The sample
-                        will look for a suitable plugin for device specified.
-                        Default value is CPU.
-```
+.. code-block:: sh
+   
+   usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...]
+                                         [-d DEVICE]
+   
+   Options:
+     -h, --help            Show this help message and exit.
+     -m MODEL, --model MODEL
+                           Required. Path to an .xml or .onnx file with a trained
+                           model.
+     -i INPUT [INPUT ...], --input INPUT [INPUT ...]
+                           Required. Path to an image file(s).
+     -d DEVICE, --device DEVICE
+                           Optional. Specify the target device to infer on; CPU,
+                           GPU or HETERO: is acceptable. The sample
+                           will look for a suitable plugin for device specified.
+                           Default value is CPU.
 
 To run the sample, you need specify a model and image:
 
-- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available `here <https://storage.openvinotoolkit.org/data/test_data>`__ .
+
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+  
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+  
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+Example
++++++++
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
 
-### Example
+   .. code-block:: sh
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+      python -m pip install openvino-dev[caffe]
 
 2. Download a pre-trained model:
-   ```
-   omz_downloader --name alexnet
-   ```
+
+   .. code-block:: sh
+
+      omz_downloader --name alexnet
+   
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name alexnet
-   ```
 
-4. Perform inference of `banana.jpg` and `car.bmp` using the `alexnet` model on a `GPU`, for example:
-   ```
-   python classification_sample_async.py -m alexnet.xml -i banana.jpg car.bmp -d GPU
-   ```
+   .. code-block:: sh
+
+      omz_converter --name alexnet
+
+4. Perform inference of ``banana.jpg`` and ``car.bmp`` using the ``alexnet`` model on a ``GPU``, for example:
 
-## Sample Output
+   .. code-block:: sh
+
+      python classification_sample_async.py -m alexnet.xml -i banana.jpg car.bmp -d GPU
+
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream and outputs top-10 inference results.
 
-```
-[ INFO ] Creating OpenVINO Runtime Core
-[ INFO ] Reading the model: C:/test_data/models/alexnet.xml
-[ INFO ] Loading the model to the plugin
-[ INFO ] Starting inference in asynchronous mode
-[ INFO ] Image path: /test_data/images/banana.jpg
-[ INFO ] Top 10 results:
-[ INFO ] class_id probability
-[ INFO ] --------------------
-[ INFO ] 954      0.9707602
-[ INFO ] 666      0.0216788
-[ INFO ] 659      0.0032558
-[ INFO ] 435      0.0008082
-[ INFO ] 809      0.0004359
-[ INFO ] 502      0.0003860
-[ INFO ] 618      0.0002867
-[ INFO ] 910      0.0002866
-[ INFO ] 951      0.0002410
-[ INFO ] 961      0.0002193
-[ INFO ]
-[ INFO ] Image path: /test_data/images/car.bmp
-[ INFO ] Top 10 results:
-[ INFO ] class_id probability
-[ INFO ] --------------------
-[ INFO ] 656      0.5120340
-[ INFO ] 874      0.1142275
-[ INFO ] 654      0.0697167
-[ INFO ] 436      0.0615163
-[ INFO ] 581      0.0552262
-[ INFO ] 705      0.0304179
-[ INFO ] 675      0.0151660
-[ INFO ] 734      0.0151582
-[ INFO ] 627      0.0148493
-[ INFO ] 757      0.0120964
-[ INFO ]
-[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-
-[openvino.runtime.AsyncInferQueue]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html
-[openvino.runtime.AsyncInferQueue.set_callback]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.set_callback
-[openvino.runtime.AsyncInferQueue.start_async]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.start_async
-[openvino.runtime.AsyncInferQueue.wait_all]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.AsyncInferQueue.html#openvino.runtime.AsyncInferQueue.wait_all
-[openvino.runtime.InferRequest.results]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.results
+.. code-block:: sh
+   
+   [ INFO ] Creating OpenVINO Runtime Core
+   [ INFO ] Reading the model: C:/test_data/models/alexnet.xml
+   [ INFO ] Loading the model to the plugin
+   [ INFO ] Starting inference in asynchronous mode
+   [ INFO ] Image path: /test_data/images/banana.jpg
+   [ INFO ] Top 10 results:
+   [ INFO ] class_id probability
+   [ INFO ] --------------------
+   [ INFO ] 954      0.9707602
+   [ INFO ] 666      0.0216788
+   [ INFO ] 659      0.0032558
+   [ INFO ] 435      0.0008082
+   [ INFO ] 809      0.0004359
+   [ INFO ] 502      0.0003860
+   [ INFO ] 618      0.0002867
+   [ INFO ] 910      0.0002866
+   [ INFO ] 951      0.0002410
+   [ INFO ] 961      0.0002193
+   [ INFO ]
+   [ INFO ] Image path: /test_data/images/car.bmp
+   [ INFO ] Top 10 results:
+   [ INFO ] class_id probability
+   [ INFO ] --------------------
+   [ INFO ] 656      0.5120340
+   [ INFO ] 874      0.1142275
+   [ INFO ] 654      0.0697167
+   [ INFO ] 436      0.0615163
+   [ INFO ] 581      0.0552262
+   [ INFO ] 705      0.0304179
+   [ INFO ] 675      0.0151660
+   [ INFO ] 734      0.0151582
+   [ INFO ] 627      0.0148493
+   [ INFO ] 757      0.0120964
+   [ INFO ]
+   [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+   
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From 9863b32792070f9bbba147eb6d5bf6e628480a36 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 27 Mar 2023 19:57:10 +0400
Subject: [PATCH 114/296] [CONFORMANCE] w/a Api Conformance crash for NVIDIA
 (#16508)

---
 .../api_conformance_runner/include/api_conformance_helpers.hpp   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp
index 3b355bcdd8ad69..f3d44dcbd23897 100644
--- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp
+++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp
@@ -22,6 +22,7 @@ inline const std::string get_plugin_lib_name_by_device(const std::string& device
             { "GNA", "openvino_intel_gna_plugin" },
             { "GPU", "openvino_intel_gpu_plugin" },
             { "TEMPLATE", "openvino_template_plugin" },
+            { "NVIDIA", "openvino_nvidia_gpu_plugin" },
     };
     if (devices.find(deviceName) == devices.end()) {
         if (std::string(targetPluginName) != "") {

From 6e99b48ecc4f8857094ddbb303a1004833d05ef7 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 27 Mar 2023 18:26:47 +0200
Subject: [PATCH 115/296] =?UTF-8?q?DOCS=20shift=20to=20rst=20-=20OpenVINO?=
 =?UTF-8?q?=E2=84=A2=20Samples=20and=20Get=20Started=20with=20C++=20Sample?=
 =?UTF-8?q?s=20(#16577)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/OV_Runtime_UG/Samples_Overview.md        | 415 ++++++++++--------
 docs/_static/images/dog.png                   |   3 +
 docs/get_started/get_started_demos.md         | 277 ++++++------
 .../install_guides/installing-openvino-yum.md |   5 +-
 4 files changed, 388 insertions(+), 312 deletions(-)
 create mode 100644 docs/_static/images/dog.png

diff --git a/docs/OV_Runtime_UG/Samples_Overview.md b/docs/OV_Runtime_UG/Samples_Overview.md
index 65e81ea9b4c8e9..6643ad288104bf 100644
--- a/docs/OV_Runtime_UG/Samples_Overview.md
+++ b/docs/OV_Runtime_UG/Samples_Overview.md
@@ -32,69 +32,91 @@
    openvino_inference_engine_samples_benchmark_app_README
    openvino_inference_engine_tools_benchmark_tool_README
 
-@endsphinxdirective
 
 The OpenVINO™ samples are simple console applications that show how to utilize specific OpenVINO API capabilities within an application. They can assist you in executing specific tasks such as loading a model, running inference, querying specific device capabilities, etc.
 
 If you installed OpenVINO Runtime via archive files, sample applications for С, and C++, and Python are created in the following directories:
-* `<INSTALL_DIR>/samples/c`
-* `<INSTALL_DIR>/samples/cpp`
-* `<INSTALL_DIR>/samples/python`
 
-If you installed OpenVINO via PyPI, download [the OpenVINO repository](https://github.com/openvinotoolkit/openvino/) and use samples from `samples/python`.
+* ``<INSTALL_DIR>/samples/c``
+* ``<INSTALL_DIR>/samples/cpp``
+* ``<INSTALL_DIR>/samples/python``
+
+If you installed OpenVINO via PyPI, download `the OpenVINO repository <https://github.com/openvinotoolkit/openvino/>`__ and use samples from ``samples/python``.
 
 The applications include:
 
 - **Speech Sample** - Acoustic model inference based on Kaldi neural networks and speech feature vectors.
-   - [Automatic Speech Recognition C++ Sample](../../samples/cpp/speech_sample/README.md)
-   - [Automatic Speech Recognition Python Sample](../../samples/python/speech_sample/README.md)
+
+  - :doc:`Automatic Speech Recognition C++ Sample <openvino_inference_engine_samples_speech_sample_README>`
+  - :doc:`Automatic Speech Recognition Python Sample <openvino_inference_engine_ie_bridges_python_sample_speech_sample_README>`
+
 - **Hello Classification Sample** – Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths).
-   - [Hello Classification C++ Sample](../../samples/cpp/hello_classification/README.md)
-   - [Hello Classification C Sample](../../samples/c/hello_classification/README.md)
-   - [Hello Classification Python Sample](../../samples/python/hello_classification/README.md)
+
+  - :doc:`Hello Classification C++ Sample <openvino_inference_engine_samples_hello_classification_README>`
+  - :doc:`Hello Classification C Sample <openvino_inference_engine_ie_bridges_c_samples_hello_classification_README>`
+  - :doc:`Hello Classification Python Sample <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>`
+
 - **Hello NV12 Input Classification Sample** – Input of any size and layout can be provided to an infer request. The sample transforms the input to the NV12 color format and pre-process it automatically during inference. The sample supports only images as inputs.
-   - [Hello NV12 Input Classification C++ Sample](../../samples/cpp/hello_nv12_input_classification/README.md)
-   - [Hello NV12 Input Classification C Sample](../../samples/c/hello_nv12_input_classification/README.md)
+
+  - :doc:`Hello NV12 Input Classification C++ Sample <openvino_inference_engine_samples_hello_nv12_input_classification_README>`
+  - :doc:`Hello NV12 Input Classification C Sample <openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README>`
+
 - **Hello Query Device Sample** – Query of available OpenVINO devices and their metrics, configuration values.
-   - [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md)
-   - [Hello Query Device Python* Sample](../../samples/python/hello_query_device/README.md)
+
+  - :doc:`Hello Query Device C++ Sample <openvino_inference_engine_samples_hello_query_device_README>`
+  - :doc:`Hello Query Device Python* Sample <openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README>`
+
 - **Hello Reshape SSD Sample** – Inference of SSD networks resized by ShapeInfer API according to an input size.
-   - [Hello Reshape SSD C++ Sample**](../../samples/cpp/hello_reshape_ssd/README.md)
-   - [Hello Reshape SSD Python Sample**](../../samples/python/hello_reshape_ssd/README.md)
+
+  - :doc:`Hello Reshape SSD C++ Sample** <openvino_inference_engine_samples_hello_reshape_ssd_README>`
+  - :doc:`Hello Reshape SSD Python Sample** <openvino_inference_engine_ie_bridges_python_sample_hello_reshape_ssd_README>`
+
 - **Image Classification Sample Async** – Inference of image classification networks like AlexNet and GoogLeNet using Asynchronous Inference Request API (the sample supports only images as inputs).
-   - [Image Classification Async C++ Sample](../../samples/cpp/classification_sample_async/README.md)
-   - [Image Classification Async Python* Sample](../../samples/python/classification_sample_async/README.md)
+
+  - :doc:`Image Classification Async C++ Sample <openvino_inference_engine_samples_classification_sample_async_README>`
+  - :doc:`Image Classification Async Python* Sample <openvino_inference_engine_ie_bridges_python_sample_classification_sample_async_README>`
+
 - **OpenVINO Model Creation Sample** – Construction of the LeNet model using the OpenVINO model creation sample.
-   - [OpenVINO Model Creation C++ Sample](../../samples/cpp/model_creation_sample/README.md)
-   - [OpenVINO Model Creation Python Sample](../../samples/python/model_creation_sample/README.md)
+
+  - :doc:`OpenVINO Model Creation C++ Sample <openvino_inference_engine_samples_model_creation_sample_README>`
+  - :doc:`OpenVINO Model Creation Python Sample <openvino_inference_engine_ie_bridges_python_sample_model_creation_sample_README>`
+
 - **Benchmark Samples** - Simple estimation of a model inference performance
-   - [Sync Benchmark C++ Sample](../../samples/cpp/benchmark/sync_benchmark/README.md)
-   - [Sync Benchmark Python* Sample](../../samples/python/benchmark/sync_benchmark/README.md)
-   - [Throughput Benchmark C++ Sample](../../samples/cpp/benchmark/throughput_benchmark/README.md)
-   - [Throughput Benchmark Python* Sample](../../samples/python/benchmark/throughput_benchmark/README.md)
-   - [Bert Benchmark Python* Sample](../../samples/python/benchmark/bert_benchmark/README.md)
 
+  - :doc:`Sync Benchmark C++ Sample <openvino_inference_engine_samples_sync_benchmark_README>`
+  - :doc:`Sync Benchmark Python* Sample <openvino_inference_engine_ie_bridges_python_sample_sync_benchmark_README>`
+  - :doc:`Throughput Benchmark C++ Sample <openvino_inference_engine_samples_throughput_benchmark_README>`
+  - :doc:`Throughput Benchmark Python* Sample <openvino_inference_engine_ie_bridges_python_sample_throughput_benchmark_README>`
+  - :doc:`Bert Benchmark Python* Sample <openvino_inference_engine_ie_bridges_python_sample_bert_benchmark_README>`
 
 - **Benchmark Application** – Estimates deep learning inference performance on supported devices for synchronous and asynchronous modes.
-   - [Benchmark C++ Tool](../../samples/cpp/benchmark_app/README.md)
+
+  - :doc:`Benchmark C++ Tool <openvino_inference_engine_samples_benchmark_app_README>`  
+  
+
+  Note that the Python version of the benchmark tool is currently available only through the :doc:`OpenVINO Development Tools installation <openvino_docs_install_guides_install_dev_tools>`. It is not created in the samples directory but can be launched with the following command: ``benchmark_app -m <model> -i <input> -d <device>``. For more information, check the :doc:`Benchmark Python Tool <openvino_inference_engine_tools_benchmark_tool_README>` documentation.
+
+.. note:: 
    
-   Note that the Python version of the benchmark tool is currently available only through the [OpenVINO Development Tools installation](../install_guides/installing-model-dev-tools.md). It is not created in the samples directory but can be launched with the following command: 
-   `benchmark_app -m <model> -i <input> -d <device>`
-   For more information, check the [Benchmark Python Tool](../../tools/benchmark_tool/README.md) documentation.
+   All C++ samples support input paths containing only ASCII characters, except for the Hello Classification Sample, that supports Unicode.
 
-> **NOTE**: All C++ samples support input paths containing only ASCII characters, except for the Hello Classification Sample, that supports Unicode.
+Media Files Available for Samples
+#################################
 
-## Media Files Available for Samples
+To run the sample applications, you can use images and videos from the media files collection available `here <https://storage.openvinotoolkit.org/data/test_data>`__ .
 
-To run the sample applications, you can use images and videos from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+Samples that Support Pre-Trained Models
+#######################################
 
-## Samples that Support Pre-Trained Models
+To run the sample, you can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
 
-To run the sample, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
+Build the Sample Applications
+#############################
 
-## Build the Sample Applications
+.. _build-samples-linux:
 
-### <a name="build-samples-linux"></a>Build the Sample Applications on Linux
+Build the Sample Applications on Linux
+++++++++++++++++++++++++++++++++++++++
 
 The officially supported Linux build environment is the following:
 
@@ -102,76 +124,100 @@ The officially supported Linux build environment is the following:
 * GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04)
 * CMake version 3.10 or higher
 
-> **NOTE**: For building samples from the open-source version of OpenVINO toolkit, see the [build instructions on GitHub](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md).
+.. note::
+   
+   For building samples from the open-source version of OpenVINO toolkit, see the `build instructions on GitHub <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__ .
+
+To build the C or C++ sample applications for Linux, go to the ``<INSTALL_DIR>/samples/c`` or ``<INSTALL_DIR>/samples/cpp`` directory, respectively, and run the ``build_samples.sh`` script:
 
-To build the C or C++ sample applications for Linux, go to the `<INSTALL_DIR>/samples/c` or `<INSTALL_DIR>/samples/cpp` directory, respectively, and run the `build_samples.sh` script:
-```sh
-build_samples.sh
-```
+.. code-block:: sh
+   
+   build_samples.sh
 
 Once the build is completed, you can find sample binaries in the following folders:
-* C samples: `~/openvino_c_samples_build/<architecture>/Release`
-* C++ samples: `~/openvino_cpp_samples_build/<architecture>/Release`
-where the <architecture> is the output of `uname -m`, for example, `intel64`, `armhf`, or `aarch64`.
+
+* C samples: ``~/openvino_c_samples_build/<architecture>/Release``
+* C++ samples: ``~/openvino_cpp_samples_build/<architecture>/Release`` where the <architecture> is the output of ``uname -m``, for example, ``intel64``, ``armhf``, or ``aarch64``.
 
 You can also build the sample applications manually:
 
-> **NOTE**: If you have installed the product as a root user, switch to root mode before you continue: `sudo -i`
+.. note::
 
-1. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named `build`:
-```sh
-mkdir build
-```
-> **NOTE**: If you run the Image Classification verification script during the installation, the C++ samples build directory is created in your home directory: `~/openvino_cpp_samples_build/`
+   If you have installed the product as a root user, switch to root mode before you continue: ``sudo -i`` .
 
-2. Go to the created directory:
-```sh
-cd build
-```
+1. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named ``build``:
 
+   .. code-block:: sh
+      
+      mkdir build
+   
+   .. note:: 
+   
+      If you run the Image Classification verification script during the installation, the C++ samples build directory is created in your home directory: ``~/openvino_cpp_samples_build/``
+   
+2. Go to the created directory:
+   
+   .. code-block:: sh
+      
+      cd build
+   
 3. Run CMake to generate the Make files for release or debug configuration. For example, for C++ samples:
-  - For release configuration:
-  ```sh
-  cmake -DCMAKE_BUILD_TYPE=Release <INSTALL_DIR>/samples/cpp
-  ```
-  - For debug configuration:
-  ```sh
-  cmake -DCMAKE_BUILD_TYPE=Debug <INSTALL_DIR>/samples/cpp
-  ```
-4. Run `make` to build the samples:
-```sh
-make
-```
-
-For the release configuration, the sample application binaries are in `<path_to_build_directory>/<architecture>/Release/`;
-for the debug configuration — in `<path_to_build_directory>/<architecture>/Debug/`.
-
-### <a name="build-samples-windows"></a>Build the Sample Applications on Microsoft Windows
+   
+   - For release configuration:
+
+     .. code-block:: sh
+      
+        cmake -DCMAKE_BUILD_TYPE=Release <INSTALL_DIR>/samples/cpp
+   
+   - For debug configuration:
+
+     .. code-block:: sh
+        
+        cmake -DCMAKE_BUILD_TYPE=Debug <INSTALL_DIR>/samples/cpp
+
+4. Run ``make`` to build the samples:
+
+   .. code-block:: sh
+      
+      make
+
+For the release configuration, the sample application binaries are in ``<path_to_build_directory>/<architecture>/Release/``;
+for the debug configuration — in ``<path_to_build_directory>/<architecture>/Debug/``.
+
+.. _build-samples-windows:
+
+Build the Sample Applications on Microsoft Windows
+++++++++++++++++++++++++++++++++++++++++++++++++++
 
 The recommended Windows build environment is the following:
+
 * Microsoft Windows 10
 * Microsoft Visual Studio 2019
 * CMake version 3.10 or higher
 
-> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14 or higher.
+.. note:: 
+
+   If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14 or higher.
 
-To build the C or C++ sample applications on Windows, go to the `<INSTALL_DIR>\samples\c` or `<INSTALL_DIR>\samples\cpp` directory, respectively, and run the `build_samples_msvc.bat` batch file:
-```sh
-build_samples_msvc.bat
-```
+To build the C or C++ sample applications on Windows, go to the ``<INSTALL_DIR>\samples\c`` or ``<INSTALL_DIR>\samples\cpp`` directory, respectively, and run the ``build_samples_msvc.bat`` batch file:
+
+.. code-block:: sh
+   
+   build_samples_msvc.bat
 
 By default, the script automatically detects the highest Microsoft Visual Studio version installed on the machine and uses it to create and build a solution for a sample code
 
 Once the build is completed, you can find sample binaries in the following folders:
-* C samples: `C:\Users\<user>\Documents\Intel\OpenVINO\openvino_c_samples_build\<architecture>\Release`
-* C++ samples: `C:\Users\<user>\Documents\Intel\OpenVINO\openvino_cpp_samples_build\<architecture>\Release`
-where the <architecture> is the output of `echo %PROCESSOR_ARCHITECTURE%`, for example, `intel64` (AMD64), or `arm64`.
 
-You can also build a generated solution manually. For example, if you want to build C++ sample binaries in Debug configuration, run the appropriate version of the
-Microsoft Visual Studio and open the generated solution file from the `C:\Users\<user>\Documents\Intel\OpenVINO\openvino_cpp_samples_build\Samples.sln`
-directory.
+* C samples: ``C:\Users\<user>\Documents\Intel\OpenVINO\openvino_c_samples_build\<architecture>\Release``
+* C++ samples: ``C:\Users\<user>\Documents\Intel\OpenVINO\openvino_cpp_samples_build\<architecture>\Release`` where the <architecture> is the output of ``echo PROCESSOR_ARCHITECTURE%``, for example, ``intel64`` (AMD64), or ``arm64``.
+
+You can also build a generated solution manually. For example, if you want to build C++ sample binaries in Debug configuration, run the appropriate version of the Microsoft Visual Studio and open the generated solution file from the ``C:\Users\<user>\Documents\Intel\OpenVINO\openvino_cpp_samples_build\Samples.sln`` directory.
+
+.. _build-samples-macos:
 
-### <a name="build-samples-macos"></a>Build the Sample Applications on macOS
+Build the Sample Applications on macOS
+++++++++++++++++++++++++++++++++++++++
 
 The officially supported macOS build environment is the following:
 
@@ -179,112 +225,131 @@ The officially supported macOS build environment is the following:
 * Clang compiler from Xcode 10.1 or higher
 * CMake version 3.13 or higher
 
-> **NOTE**: For building samples from the open-source version of OpenVINO toolkit, see the [build instructions on GitHub](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md).
+.. note:: 
+
+   For building samples from the open-source version of OpenVINO toolkit, see the `build instructions on GitHub <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__ .
+
+To build the C or C++ sample applications for macOS, go to the ``<INSTALL_DIR>/samples/c`` or ``<INSTALL_DIR>/samples/cpp`` directory, respectively, and run the ``build_samples.sh`` script:
 
-To build the C or C++ sample applications for macOS, go to the `<INSTALL_DIR>/samples/c` or `<INSTALL_DIR>/samples/cpp` directory, respectively, and run the `build_samples.sh` script:
-```sh
-build_samples.sh
-```
+.. code-block:: sh
+   
+   build_samples.sh
 
 Once the build is completed, you can find sample binaries in the following folders:
-* C samples: `~/openvino_c_samples_build/<architecture>/Release`
-* C++ samples: `~/openvino_cpp_samples_build/<architecture>/Release`
+
+* C samples: ``~/openvino_c_samples_build/<architecture>/Release``
+* C++ samples: ``~/openvino_cpp_samples_build/<architecture>/Release``
 
 You can also build the sample applications manually:
 
-> **NOTE**: If you have installed the product as a root user, switch to root mode before you continue: `sudo -i`
+.. note::
+
+   If you have installed the product as a root user, switch to root mode before you continue: ``sudo -i``
+
+.. note:: 
+
+   Before proceeding, make sure you have OpenVINO™ environment set correctly. This can be done manually by
+
+.. code-block:: sh
 
-> **NOTE**: Before proceeding, make sure you have OpenVINO™ environment set correctly. This can be done manually by
-```sh
-cd <INSTALL_DIR>/
-source setupvars.sh
-```
+   cd <INSTALL_DIR>/
+   source setupvars.sh
 
-1. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named `build`:
-```sh
-mkdir build
-```
-> **NOTE**: If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: `~/openvino_cpp_samples_build/`
+1. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named ``build``:
 
+   .. code-block:: sh
+   
+      mkdir build
+   
+   .. note:: 
+   
+      If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: ``~/openvino_cpp_samples_build/``
+   
 2. Go to the created directory:
-```sh
-cd build
-```
+
+   .. code-block:: sh
+   
+      cd build
 
 3. Run CMake to generate the Make files for release or debug configuration. For example, for C++ samples:
-  - For release configuration:
-  ```sh
-  cmake -DCMAKE_BUILD_TYPE=Release <INSTALL_DIR>/samples/cpp
-  ```
-  - For debug configuration:
-  ```sh
-  cmake -DCMAKE_BUILD_TYPE=Debug <INSTALL_DIR>/samples/cpp
-  ```
-4. Run `make` to build the samples:
-```sh
-make
-```
-
-For the release configuration, the sample application binaries are in `<path_to_build_directory>/<architecture>/Release/`;
-for the debug configuration — in `<path_to_build_directory>/<architecture>/Debug/`.
-
-## Get Ready for Running the Sample Applications
-
-### Get Ready for Running the Sample Applications on Linux
-
-Before running compiled binary files, make sure your application can find the
-OpenVINO Runtime libraries.
-Run the `setupvars` script to set all necessary environment variables:
-```sh
-source <INSTALL_DIR>/setupvars.sh
-```
-
-#### (Optional) Set Environment Variables Permanently
+
+   - For release configuration:
+
+     .. code-block:: sh
+
+        cmake -DCMAKE_BUILD_TYPE=Release <INSTALL_DIR>/samples/cpp
+   
+   - For debug configuration:
+
+     .. code-block:: sh
+
+        cmake -DCMAKE_BUILD_TYPE=Debug <INSTALL_DIR>/samples/cpp
+   
+4. Run ``make`` to build the samples:
+
+   .. code-block:: sh
+   
+      make
+
+For the release configuration, the sample application binaries are in ``<path_to_build_directory>/<architecture>/Release/``; for the debug configuration — in ``<path_to_build_directory>/<architecture>/Debug/``.
+
+Get Ready for Running the Sample Applications
+#############################################
+
+Get Ready for Running the Sample Applications on Linux
+++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Before running compiled binary files, make sure your application can find the OpenVINO Runtime libraries. Run the ``setupvars`` script to set all necessary environment variables:
+
+.. code-block:: sh
+   
+   source <INSTALL_DIR>/setupvars.sh
+
+(Optional) Set Environment Variables Permanently
+------------------------------------------------
 
 The OpenVINO environment variables are removed when you close the shell. As an option, you can permanently set the environment variables as follows:
 
-1. Open the `.bashrc` file in `<user_home_directory>`:
-```sh
-vi <user_home_directory>/.bashrc
-```
+1. Open the ``.bashrc`` file in ``<user_home_directory>``:
+
+   .. code-block:: sh
+      
+      vi <user_home_directory>/.bashrc
 
 2. Add this line to the end of the file:
-```sh
-source /opt/intel/openvino_2022/setupvars.sh
-```
-
-3. Save and close the file: press the **Esc** key, type `:wq` and press the **Enter** key.
-4. To test your change, open a new terminal. You will see `[setupvars.sh] OpenVINO environment initialized`.
-
-You are ready to run sample applications. To learn about how to run a particular
-sample, read the sample documentation by clicking the sample name in the samples
-list above.
-
-### Get Ready for Running the Sample Applications on Windows
-
-Before running compiled binary files, make sure your application can find the
-OpenVINO Runtime libraries.
-Use the `setupvars` script, which sets all necessary environment variables:
-```sh
-<INSTALL_DIR>\setupvars.bat
-```
-
-To debug or run the samples on Windows in Microsoft Visual Studio, make sure you
-have properly configured **Debugging** environment settings for the **Debug**
-and **Release** configurations. Set correct paths to the OpenCV libraries, and
-debug and release versions of the OpenVINO Runtime libraries.
-For example, for the **Debug** configuration, go to the project's
-**Configuration Properties** to the **Debugging** category and set the `PATH`
-variable in the **Environment** field to the following:
-
-```sh
-PATH=<INSTALL_DIR>\runtime\bin;%PATH%
-```
-where `<INSTALL_DIR>` is the directory in which the OpenVINO toolkit is installed.
-
-You are ready to run sample applications. To learn about how to run a particular
-sample, read the sample documentation by clicking the sample name in the samples
-list above.
-
-## See Also
-* [OpenVINO Runtime User Guide](openvino_intro.md)
+
+   .. code-block:: sh
+   
+      source /opt/intel/openvino_2022/setupvars.sh
+
+3. Save and close the file: press the **Esc** key, type ``:wq`` and press the **Enter** key.
+4. To test your change, open a new terminal. You will see ``[setupvars.sh] OpenVINO environment initialized``.
+
+You are ready to run sample applications. To learn about how to run a particular sample, read the sample documentation by clicking the sample name in the samples list above.
+
+Get Ready for Running the Sample Applications on Windows
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Before running compiled binary files, make sure your application can find the OpenVINO Runtime libraries. Use the ``setupvars`` script, which sets all necessary environment variables:
+
+.. code-block:: sh
+   
+   <INSTALL_DIR>\setupvars.bat
+
+To debug or run the samples on Windows in Microsoft Visual Studio, make sure you have properly configured **Debugging** environment settings for the **Debug** and **Release** configurations. Set correct paths to the OpenCV libraries, and debug and release versions of the OpenVINO Runtime libraries. For example, for the **Debug** configuration, go to the project's **Configuration Properties** to the **Debugging** category and set the ``PATH`` variable in the **Environment** field to the following:
+
+.. code-block:: sh
+
+   PATH=<INSTALL_DIR>\runtime\bin;%PATH%
+
+where ``<INSTALL_DIR>`` is the directory in which the OpenVINO toolkit is installed.
+
+You are ready to run sample applications. To learn about how to run a particular sample, read the sample documentation by clicking the sample name in the samples list above.
+
+See Also
+########
+
+* :doc:`OpenVINO Runtime User Guide <openvino_docs_OV_UG_OV_Runtime_User_Guide>`
+
+@endsphinxdirective
+
diff --git a/docs/_static/images/dog.png b/docs/_static/images/dog.png
new file mode 100644
index 00000000000000..77da0c3beaaeff
--- /dev/null
+++ b/docs/_static/images/dog.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d49a45dd5cbace188cfdc23a2aeff441c2187f1610c9720b91ea6fbbb232387
+size 150582
diff --git a/docs/get_started/get_started_demos.md b/docs/get_started/get_started_demos.md
index a3ac90983db68a..aef7c866dd2370 100644
--- a/docs/get_started/get_started_demos.md
+++ b/docs/get_started/get_started_demos.md
@@ -1,42 +1,54 @@
 # Get Started with C++ Samples {#openvino_docs_get_started_get_started_demos}
 
+@sphinxdirective
+
 The guide presents a basic workflow for building and running C++ code samples in OpenVINO. Note that these steps will not work with the Python samples. 
 
-To get started, you must first install OpenVINO Runtime, install OpenVINO Development tools, and build the sample applications. See the <a href="#prerequisites-samples">Prerequisites</a> section for instructions.
+To get started, you must first install OpenVINO Runtime, install OpenVINO Development tools, and build the sample applications. See the :ref:`Prerequisites <prerequisites-samples>` section for instructions.
 
 Once the prerequisites have been installed, perform the following steps:
 
-1. <a href="#download-models">Use Model Downloader to download a suitable model.</a>
-2. <a href="#convert-models-to-intermediate-representation">Convert the model with Model Optimizer.</a> 
-3. <a href="#download-media">Download media files to run inference.</a>
-4. <a href="#run-image-classification">Run inference with the Image Classification sample application and see the results.</a>
+1. :ref:`Use Model Downloader to download a suitable model <download-models>`.
+2. :ref:`Convert the model with Model Optimizer <convert-models-to-intermediate-representation>`.
+3. :ref:`Download media files to run inference <download-media>`.
+4. :ref:`Run inference with the Image Classification sample application and see the results <run-image-classification>`.
 
-## <a name="prerequisites-samples"></a>Prerequisites
+.. _prerequisites-samples:
 
-### Install OpenVINO Runtime
+Prerequisites
+#############
+
+Install OpenVINO Runtime
+++++++++++++++++++++++++
 
 To use sample applications, install OpenVINO Runtime via one of the following distribution channels (other distributions do not include sample files):
 
-* Archive files (recommended) - [Linux](@ref openvino_docs_install_guides_installing_openvino_from_archive_linux) | [Windows](@ref openvino_docs_install_guides_installing_openvino_from_archive_windows) | [macOS](@ref openvino_docs_install_guides_installing_openvino_from_archive_macos)
-* [APT](@ref openvino_docs_install_guides_installing_openvino_apt) or [YUM](@ref openvino_docs_install_guides_installing_openvino_yum) for Linux
-* Docker image - [Linux](@ref openvino_docs_install_guides_installing_openvino_docker_linux) | [Windows](@ref openvino_docs_install_guides_installing_openvino_docker_windows)
-* [Build from source](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md)
+* Archive files (recommended) - :doc:`Linux <openvino_docs_install_guides_installing_openvino_from_archive_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_from_archive_windows>` | :doc:`macOS <openvino_docs_install_guides_installing_openvino_from_archive_macos>`
+* :doc:`APT <openvino_docs_install_guides_installing_openvino_apt>` or :doc:`YUM <openvino_docs_install_guides_installing_openvino_yum>` for Linux
+* Docker image - :doc:`Linux <openvino_docs_install_guides_installing_openvino_docker_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_docker_windows>`
+* `Build from source <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__
+
+Make sure that you also `install OpenCV <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`__ , as it's required for running sample applications.
 
-Make sure that you also [install OpenCV](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO), as it's required for running sample applications.
+Install OpenVINO Development Tools
+++++++++++++++++++++++++++++++++++
 
-### Install OpenVINO Development Tools
+To install OpenVINO Development Tools, follow the :doc:`instructions for C++ developers on the Install OpenVINO Development Tools page <openvino_docs_install_guides_install_dev_tools>`. This guide uses the ``googlenet-v1`` model from the Caffe framework, therefore, when you get to Step 4 of the installation, run the following command to install OpenVINO with the Caffe requirements:
 
-To install OpenVINO Development Tools, follow the [instructions for C++ developers on the Install OpenVINO Development Tools page](../install_guides/installing-model-dev-tools.md#cpp_developers). This guide uses the `googlenet-v1` model from the Caffe framework, therefore, when you get to Step 4 of the installation, run the following command to install OpenVINO with the Caffe requirements:
+.. code-block:: sh
 
-``` sh
    pip install openvino-dev[caffe]
-```
 
-### Build Samples
 
-To build OpenVINO samples, follow the build instructions for your operating system on the [OpenVINO Samples](../OV_Runtime_UG/Samples_Overview.md) page. The build will take about 5-10 minutes, depending on your system.
+Build Samples
++++++++++++++
 
-## <a name="download-models"></a> Step 1: Download the Models
+To build OpenVINO samples, follow the build instructions for your operating system on the :doc:`OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>` page. The build will take about 5-10 minutes, depending on your system.
+
+.. _download-models:
+
+Step 1: Download the Models
+###########################
 
 You must have a model that is specific for your inference task. Example model types are:
 
@@ -46,34 +58,37 @@ You must have a model that is specific for your inference task. Example model ty
 
 You can use one of the following options to find a model suitable for OpenVINO:
 
-- Download public or Intel pre-trained models from [Open Model Zoo](@ref model_zoo) using [Model Downloader tool](@ref omz_tools_downloader)
+- Download public or Intel pre-trained models from :doc:`Open Model Zoo <model_zoo>` using :doc:`Model Downloader tool <omz_tools_downloader>`
 - Download from GitHub, Caffe Zoo, TensorFlow Zoo, etc.
 - Train your own model with machine learning tools
   
 This guide uses OpenVINO Model Downloader to get pre-trained models. You can use one of the following commands to find a model with this method:
 
 * List the models available in the downloader.
-  ``` sh
+  
+  .. code-block:: sh
+
      omz_info_dumper --print_all
-  ```
 
-* Use `grep` to list models that have a specific name pattern (e.g. `ssd-mobilenet`, `yolo`). Replace `<model_name>` with the name of the model.
-  ``` sh
+* Use ``grep`` to list models that have a specific name pattern (e.g. ``ssd-mobilenet``, ``yolo``). Replace ``<model_name>`` with the name of the model.
+
+  .. code-block:: sh
+
      omz_info_dumper --print_all | grep <model_name>
-  ```
 
-* Use Model Downloader to download models. Replace `<models_dir>` with the directory to download the model to and `<model_name>` with the name of the model.
-  ``` sh
+* Use Model Downloader to download models. Replace ``<models_dir>`` with the directory to download the model to and ``<model_name>`` with the name of the model.
+
+  .. code-block:: sh
+
      omz_downloader --name <model_name> --output_dir <models_dir>
-  ```
 
 This guide used the following model to run the Image Classification Sample:
 
-  |Model Name                                     | Code Sample or Demo App                  |
-  |-----------------------------------------------|------------------------------------------|
-  |`googlenet-v1`                                 | Image Classification Sample              |
-
-@sphinxdirective
++------------------+-----------------------------+
+| Model Name       | Code Sample or Demo App     |
++==================+=============================+
+| ``googlenet-v1`` | Image Classification Sample |
++------------------+-----------------------------+
 
 .. dropdown:: Click to view how to download the GoogleNet v1 Caffe model
 
@@ -147,25 +162,24 @@ This guide used the following model to run the Image Classification Sample:
 
          ========= Replacing text in /Users/username/models/public/googlenet-v1/googlenet-v1.prototxt =========
 
-@endsphinxdirective
+.. _convert-models-to-intermediate-representation:
 
-## <a name="convert-models-to-intermediate-representation"></a>Step 2: Convert the Model with Model Optimizer
+Step 2: Convert the Model with Model Optimizer
+##############################################
 
 In this step, your trained models are ready to run through the Model Optimizer to convert them to the IR (Intermediate Representation) format. For most model types, this is required before using OpenVINO Runtime with the model.
 
-Models in the IR format always include an `.xml` and `.bin` file and may also include other files such as `.json` or `.mapping`. Make sure you have these files together in a single directory so OpenVINO Runtime can find them.
+Models in the IR format always include an ``.xml`` and ``.bin`` file and may also include other files such as ``.json`` or ``.mapping``. Make sure you have these files together in a single directory so OpenVINO Runtime can find them.
 
-REQUIRED: `model_name.xml`
-REQUIRED: `model_name.bin`
-OPTIONAL: `model_name.json`, `model_name.mapping`, etc.
+REQUIRED: ``model_name.xml``
+REQUIRED: ``model_name.bin``
+OPTIONAL: ``model_name.json``, ``model_name.mapping``, etc.
 
 This tutorial uses the public GoogleNet v1 Caffe model to run the Image Classification Sample. See the example in the Download Models section of this page to learn how to download this model.
 
 The googlenet-v1 model is downloaded in the Caffe format. You must use Model Optimizer to convert the model to IR.
 
-Create an `<ir_dir>` directory to contain the model's Intermediate Representation (IR).
-
-@sphinxdirective
+Create an ``<ir_dir>`` directory to contain the model's Intermediate Representation (IR).
 
 .. tab:: Linux
 
@@ -185,21 +199,18 @@ Create an `<ir_dir>` directory to contain the model's Intermediate Representatio
 
       mkdir ~/ir
 
-@endsphinxdirective
-
-To save disk space for your IR file, you can apply [weights compression to FP16](../MO_DG/prepare_model/FP16_Compression.md). To generate an IR with FP16 weights, run Model Optimizer with the `--compress_to_fp16` option.
+To save disk space for your IR file, you can apply :doc:`weights compression to FP16 <openvino_docs_MO_DG_FP16_Compression>`. To generate an IR with FP16 weights, run Model Optimizer with the ``--compress_to_fp16`` option.
 
 Generic Model Optimizer script:
 
-``` sh
+.. code-block:: sh
+
    mo --input_model <model_dir>/<model_file>
-```
 
-The IR files produced by the script are written to the `<ir_dir>` directory.
 
-The command with most placeholders filled in and FP16 precision:
+The IR files produced by the script are written to the ``<ir_dir>`` directory.
 
-@sphinxdirective
+The command with most placeholders filled in and FP16 precision:
 
 .. tab:: Linux
 
@@ -219,95 +230,93 @@ The command with most placeholders filled in and FP16 precision:
 
       mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --compress_to_fp16 --output_dir ~/ir
 
-@endsphinxdirective
+.. _download-media:
 
-## <a name="download-media"></a> Step 3: Download a Video or a Photo as Media
+Step 3: Download a Video or a Photo as Media
+############################################
 
-Most of the samples require you to provide an image or a video as the input to run the model on. You can get them from sites like [Pexels](https://pexels.com) or [Google Images](https://images.google.com).
+Most of the samples require you to provide an image or a video as the input to run the model on. You can get them from sites like `Pexels <https://pexels.com>`__ or `Google Images <https://images.google.com>`__ .
 
 As an alternative, OpenVINO also provides several sample images and videos for you to run code samples and demo applications:
 
-   - [Sample images and video](https://storage.openvinotoolkit.org/data/test_data/)
-   - [Sample videos](https://github.com/intel-iot-devkit/sample-videos)
+- `Sample images and video <https://storage.openvinotoolkit.org/data/test_data/>`__
+- `Sample videos <https://github.com/intel-iot-devkit/sample-videos>`__
 
-## <a name="run-image-classification"></a>Step 4: Run Inference on a Sample
+.. _run-image-classification:
+
+Step 4: Run Inference on a Sample
+##################################
 
 To run the **Image Classification** code sample with an input image using the IR model:
 
 1. Set up the OpenVINO environment variables:
-@sphinxdirective
-.. tab:: Linux
 
-   .. code-block:: sh
-
-      source  <INSTALL_DIR>/setupvars.sh
-
-.. tab:: Windows
-
-   .. code-block:: bat
-
-      <INSTALL_DIR>\setupvars.bat
-
-.. tab:: macOS
-
-   .. code-block:: sh
-
-      source <INSTALL_DIR>/setupvars.sh
-
-@endsphinxdirective
+   .. tab:: Linux
+   
+      .. code-block:: sh
+   
+         source  <INSTALL_DIR>/setupvars.sh
+   
+   .. tab:: Windows
+   
+      .. code-block:: bat
+   
+         <INSTALL_DIR>\setupvars.bat
+   
+   .. tab:: macOS
+   
+      .. code-block:: sh
+   
+         source <INSTALL_DIR>/setupvars.sh
 
 2. Go to the code samples release directory created when you built the samples earlier:
-@sphinxdirective
-.. tab:: Linux
-
-   .. code-block:: sh
-
-      cd ~/openvino_cpp_samples_build/intel64/Release
-
-.. tab:: Windows
-
-   .. code-block:: bat
-
-      cd  %USERPROFILE%\Documents\Intel\OpenVINO\openvino_samples_build\intel64\Release
-
-.. tab:: macOS
-
-   .. code-block:: sh
-
-      cd ~/openvino_cpp_samples_build/intel64/Release
-
-@endsphinxdirective
 
+   .. tab:: Linux
+   
+      .. code-block:: sh
+   
+         cd ~/openvino_cpp_samples_build/intel64/Release
+   
+   .. tab:: Windows
+   
+      .. code-block:: bat
+   
+         cd  %USERPROFILE%\Documents\Intel\OpenVINO\openvino_samples_build\intel64\Release
+   
+   .. tab:: macOS
+   
+      .. code-block:: sh
+   
+         cd ~/openvino_cpp_samples_build/intel64/Release
+   
 3. Run the code sample executable, specifying the input media file, the IR for your model, and a target device for performing inference:
-@sphinxdirective
-
-.. tab:: Linux
-
-   .. code-block:: sh
-
-      classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
-
-.. tab:: Windows
-
-   .. code-block:: bat
 
-      classification_sample_async.exe -i <path_to_media> -m <path_to_model> -d <target_device>
-
-.. tab:: macOS
-
-   .. code-block:: sh
-
-      classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
-
-@endsphinxdirective
-
-### Examples
+   .. tab:: Linux
+   
+      .. code-block:: sh
+   
+         classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
+   
+   .. tab:: Windows
+   
+      .. code-block:: bat
+   
+         classification_sample_async.exe -i <path_to_media> -m <path_to_model> -d <target_device>
+   
+   .. tab:: macOS
+   
+      .. code-block:: sh
+   
+         classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
+   
+Examples
+++++++++
 
-#### Running Inference on CPU
+Running Inference on CPU
+------------------------
 
-The following command shows how to run the Image Classification Code Sample using the [dog.bmp](https://storage.openvinotoolkit.org/data/test_data/images/224x224/dog.bmp) file as an input image, the model in IR format from the `ir` directory, and the CPU as the target hardware:
+The following command shows how to run the Image Classification Code Sample using the `dog.bmp <https://storage.openvinotoolkit.org/data/test_data/images/224x224/dog.bmp>`__ file as an input image, the model in IR format from the ``ir`` directory, and the CPU as the target hardware:
 
-@sphinxdirective
 .. tab:: Linux
 
    .. code-block:: sh
@@ -326,15 +335,11 @@ The following command shows how to run the Image Classification Code Sample usin
 
       ./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d CPU
 
-@endsphinxdirective
-
 When the sample application is complete, you are given the label and confidence for the top 10 categories. The input image and sample output of the inference results is shown below:
 
-<img src="https://storage.openvinotoolkit.org/data/test_data/images/224x224/dog.bmp">
+.. image:: _static/images/dog.png
 
-@sphinxdirective
-
-   .. code-block:: sh
+.. code-block:: sh
 
    Top 10 results:
 
@@ -353,15 +358,15 @@ When the sample application is complete, you are given the label and confidence
       216     0.0057589   clumber, clumber spaniel
       154     0.0052615   Pekinese, Pekingese, Peke
 
-@endsphinxdirective
-
 The following example shows how to run the same sample using GPU as the target device.
 
-#### Running Inference on GPU
+Running Inference on GPU
+------------------------
 
-   > **NOTE**: Running inference on Intel® Processor Graphics (GPU) requires [additional hardware configuration steps](../install_guides/configurations-for-intel-gpu.md), as described earlier on this page. Running on GPU is not compatible with macOS.
+.. note:: 
+   
+   Running inference on Intel® Processor Graphics (GPU) requires :doc:`additional hardware configuration steps <openvino_docs_install_guides_configurations_for_intel_gpu>`, as described earlier on this page. Running on GPU is not compatible with macOS.
 
-@sphinxdirective
 .. tab:: Linux
 
    .. code-block:: sh
@@ -374,11 +379,13 @@ The following example shows how to run the same sample using GPU as the target d
 
       .\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d GPU
 
-@endsphinxdirective
 
+Other Demos and Samples
+#######################
+
+See the :doc:`Samples <openvino_docs_OV_UG_Samples_Overview>` page for more sample applications. Each sample page explains how the application works and shows how to run it. Use the samples as a starting point that can be adapted for your own application.
 
-## Other Demos and Samples
+OpenVINO also provides demo applications for using off-the-shelf models from :doc:`Open Model Zoo <model_zoo>`. Visit :doc:`Open Model Zoo Demos <omz_demos>` if you'd like to see even more examples of how to run model inference with the OpenVINO API.
 
-See the [Samples](../OV_Runtime_UG/Samples_Overview.md) page for more sample applications. Each sample page explains how the application works and shows how to run it. Use the samples as a starting point that can be adapted for your own application.
+@endsphinxdirective
 
-OpenVINO also provides demo applications for using off-the-shelf models from [Open Model Zoo](@ref model_zoo). Visit [Open Model Zoo Demos](@ref omz_demos) if you'd like to see even more examples of how to run model inference with the OpenVINO API.
diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md
index 80455db5d04060..5aaa145d1c293c 100644
--- a/docs/install_guides/installing-openvino-yum.md
+++ b/docs/install_guides/installing-openvino-yum.md
@@ -161,9 +161,10 @@ To build the C++ or C sample applications for Linux, run the `build_samples.sh`
 
       /usr/share/openvino/samples/c/build_samples.sh
 
-@endsphinxdirective
 
-For more information, refer to <a href="openvino_docs_OV_UG_Samples_Overview.html#build-samples-linux">Build the Sample Applications on Linux</a>.
+For more information, refer to :ref:`Build the Sample Applications on Linux <build-samples-linux>`.
+
+@endsphinxdirective
 
 ### Uninstalling OpenVINO Runtime
 

From 5c5a29d0952e4974d3cdf7d21f46ddf6258bbc9e Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 27 Mar 2023 18:28:16 +0200
Subject: [PATCH 116/296] DOCS shift to rst -Sync Benchmark Samples (#16561)

---
 .../cpp/benchmark/sync_benchmark/README.md    | 167 +++++++++++-------
 .../python/benchmark/sync_benchmark/README.md | 167 +++++++++++-------
 2 files changed, 203 insertions(+), 131 deletions(-)

diff --git a/samples/cpp/benchmark/sync_benchmark/README.md b/samples/cpp/benchmark/sync_benchmark/README.md
index f3132d8675cc83..afe299e975f3d9 100644
--- a/samples/cpp/benchmark/sync_benchmark/README.md
+++ b/samples/cpp/benchmark/sync_benchmark/README.md
@@ -1,97 +1,132 @@
 # Sync Benchmark C++ Sample {#openvino_inference_engine_samples_sync_benchmark_README}
 
-This sample demonstrates how to estimate performace of a model using Synchronous Inference Request API. It makes sence to use synchronous inference only in latency oriented scenarios. Models with static input shapes are supported. Unlike [demos](@ref omz_demos) this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+@sphinxdirective
 
-The following C++ API is used in the application:
-
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | `ov::get_openvino_version` | Get Openvino API version |
-| Basic Infer Flow | `ov::Core`, `ov::Core::compile_model`, `ov::CompiledModel::create_infer_request`, `ov::InferRequest::get_tensor` | Common API to do inference: compile a model, create an infer request, configure input tensors |
-| Synchronous Infer | `ov::InferRequest::infer` | Do synchronous inference |
-| Model Operations | `ov::CompiledModel::inputs` | Get inputs of a model |
-| Tensor Operations | `ov::Tensor::get_shape` | Get a tensor shape |
-| Tensor Operations | `ov::Tensor::get_shape`, `ov::Tensor::data` | Get a tensor shape and its data. |
+This sample demonstrates how to estimate performance of a model using Synchronous Inference Request API. It makes sense to use synchronous inference only in latency oriented scenarios. Models with static input shapes are supported. Unlike :doc:`demos <omz_demos>` this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
 
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) [yolo-v3-tf](@ref omz_models_model_yolo_v3_tf), [face-detection-0200](@ref omz_models_model_face_detection_0200) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../../python/benchmark/sync_benchmark/README.md) |
+The following C++ API is used in the application:
 
-## How It Works
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Feature                  | API                                          | Description                                  |
++==========================+==============================================+==============================================+
+| OpenVINO Runtime Version | ``ov::get_openvino_version``                 | Get Openvino API version.                    |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Basic Infer Flow         | ``ov::Core``, ``ov::Core::compile_model``,   | Common API to do inference: compile a model, |
+|                          | ``ov::CompiledModel::create_infer_request``, | create an infer request,                     |
+|                          | ``ov::InferRequest::get_tensor``             | configure input tensors.                     |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Synchronous Infer        | ``ov::InferRequest::infer``,                 | Do synchronous inference.                    |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Model Operations         | ``ov::CompiledModel::inputs``                | Get inputs of a model.                       |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Tensor Operations        | ``ov::Tensor::get_shape``,                   | Get a tensor shape and its data.             |
+|                          | ``ov::Tensor::data``                         |                                              |
++--------------------------+----------------------------------------------+----------------------------------------------+
+
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Options                        | Values                                                                                         |
++================================+================================================================================================+
+| Validated Models               | :doc:`alexnet <omz_models_model_alexnet>`,                                                     |
+|                                | :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`,                                           |
+|                                | :doc:`yolo-v3-tf <omz_models_model_yolo_v3_tf>`,                                               |
+|                                | :doc:`face-detection-0200 <omz_models_model_face_detection_0200>`                              |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Model Format                   | OpenVINO™ toolkit Intermediate Representation                                                  |
+|                                | (\*.xml + \*.bin), ONNX (\*.onnx)                                                              |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Supported devices              | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                           |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Other language realization     | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_sync_benchmark_README>`       |
++--------------------------------+------------------------------------------------------------------------------------------------+
+
+
+How It Works
+####################
 
 The sample compiles a model for a given device, randomly generates input data, performs synchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+####################
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+####################
+
+.. code-block:: sh
+
+   sync_benchmark <path_to_model>
 
-```
-sync_benchmark <path_to_model>
-```
 
 To run the sample, you need to specify a model:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+
+.. note::
 
-### Example
+   Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
+   The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+Example
+++++++++++++++++++++
+
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+
+   .. code-block:: sh
+
+      python -m pip install openvino-dev[caffe]
 
-```
-python -m pip install openvino-dev[caffe]
-```
 
 2. Download a pre-trained model using:
 
-```
-omz_downloader --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_downloader --name googlenet-v1
+
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
 
-```
-omz_converter --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_converter --name googlenet-v1
+
+
+4. Perform benchmarking using the ``googlenet-v1`` model on a ``CPU``:
+
+   .. code-block:: sh
 
-4. Perform benchmarking using the `googlenet-v1` model on a `CPU`:
+      sync_benchmark googlenet-v1.xml
 
-```
-sync_benchmark googlenet-v1.xml
-```
 
-## Sample Output
+Sample Output
+####################
 
 The application outputs performance results.
 
-```
-[ INFO ] OpenVINO:
-[ INFO ] Build ................................. <version>
-[ INFO ] Count:      992 iterations
-[ INFO ] Duration:   15009.8 ms
-[ INFO ] Latency:
-[ INFO ]        Median:     14.00 ms
-[ INFO ]        Average:    15.13 ms
-[ INFO ]        Min:        9.33 ms
-[ INFO ]        Max:        53.60 ms
-[ INFO ] Throughput: 66.09 FPS
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: sh
+
+   [ INFO ] OpenVINO:
+   [ INFO ] Build ................................. <version>
+   [ INFO ] Count:      992 iterations
+   [ INFO ] Duration:   15009.8 ms
+   [ INFO ] Latency:
+   [ INFO ]        Median:     14.00 ms
+   [ INFO ]        Average:    15.13 ms
+   [ INFO ]        Min:        9.33 ms
+   [ INFO ]        Max:        53.60 ms
+   [ INFO ] Throughput: 66.09 FPS
+
+
+See Also
+####################
+
+* :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
diff --git a/samples/python/benchmark/sync_benchmark/README.md b/samples/python/benchmark/sync_benchmark/README.md
index 3d4bda76e8c08a..f8194fa0c427c7 100644
--- a/samples/python/benchmark/sync_benchmark/README.md
+++ b/samples/python/benchmark/sync_benchmark/README.md
@@ -1,92 +1,129 @@
 # Sync Benchmark Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_sync_benchmark_README}
 
-This sample demonstrates how to estimate performace of a model using Synchronous Inference Request API. It makes sence to use synchronous inference only in latency oriented scenarios. Models with static input shapes are supported. Unlike [demos](@ref omz_demos) this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+@sphinxdirective
+
+This sample demonstrates how to estimate performance of a model using Synchronous Inference Request API. It makes sense to use synchronous inference only in latency oriented scenarios. Models with static input shapes are supported. Unlike :doc:`demos <omz_demos>` this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+
+The following Python API is used in the application:
+
+
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Feature                        | API                                             | Description                                  |
++================================+=================================================+==============================================+
+| OpenVINO Runtime Version       | [openvino.runtime.get_version]                  | Get Openvino API version.                    |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Basic Infer Flow               | [openvino.runtime.Core],                        | Common API to do inference: compile a model, |
+|                                | [openvino.runtime.Core.compile_model],          | configure input tensors.                     |
+|                                | [openvino.runtime.InferRequest.get_tensor]      |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Synchronous Infer              | [openvino.runtime.InferRequest.infer],          | Do synchronous inference.                    |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Model Operations               | [openvino.runtime.CompiledModel.inputs]         | Get inputs of a model.                       |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Tensor Operations              | [openvino.runtime.Tensor.get_shape],            | Get a tensor shape and its data.             |
+|                                | [openvino.runtime.Tensor.data]                  |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+
++--------------------------------+------------------------------------------------------------------------------+
+| Options                        | Values                                                                       |
++================================+==============================================================================+
+| Validated Models               | :doc:`alexnet <omz_models_model_alexnet>`,                                   |
+|                                | :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`,                         |
+|                                | :doc:`yolo-v3-tf <omz_models_model_yolo_v3_tf>`,                             |
+|                                | :doc:`face-detection-0200 <omz_models_model_face_detection_0200>`            |
++--------------------------------+------------------------------------------------------------------------------+
+| Model Format                   | OpenVINO™ toolkit Intermediate Representation                                |
+|                                | (\*.xml + \*.bin), ONNX (\*.onnx)                                            |
++--------------------------------+------------------------------------------------------------------------------+
+| Supported devices              | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`         |
++--------------------------------+------------------------------------------------------------------------------+
+| Other language realization     | :doc:`C++ <openvino_inference_engine_samples_sync_benchmark_README>`         |
++--------------------------------+------------------------------------------------------------------------------+
+
+
+How It Works
+####################
 
-The following Python\* API is used in the application:
+The sample compiles a model for a given device, randomly generates input data, performs synchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | [openvino.runtime.get_version] | Get Openvino API version |
-| Basic Infer Flow | [openvino.runtime.Core], [openvino.runtime.Core.compile_mode], [openvino.runtime.InferRequest.get_tensor] | Common API to do inference: compile a model, configure input tensors |
-| Synchronous Infer | [openvino.runtime.InferRequest.infer] | Do synchronous inference |
-| Model Operations | [openvino.runtime.CompiledModel.inputs] | Get inputs of a model |
-| Tensor Operations | [openvino.runtime.Tensor.get_shape], [openvino.runtime.Tensor.data] | Get a tensor shape and its data. |
+You can see the explicit description of
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) [yolo-v3-tf](@ref omz_models_model_yolo_v3_tf), [face-detection-0200](@ref omz_models_model_face_detection_0200) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C++](../../../cpp/benchmark/sync_benchmark/README.md) |
+Running
+####################
 
-## How It Works
+.. code-block:: sh
 
-The sample compiles a model for a given device, randomly generates input data, performs synchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
+   python sync_benchmark.py <path_to_model>
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Running
+To run the sample, you need to specify a model:
 
-```
-python sync_benchmark.py <path_to_model>
-```
+- You can use :doc:`public <omz_models_group_public>` or doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
 
-To run the sample, you need to specify a model:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
+.. note::
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+   Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
 
-### Example
+   The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
 
-```
-python -m pip install openvino-dev[caffe]
-```
+Example
+++++++++++++++++++++
+
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+
+   .. code-block:: sh
+
+      python -m pip install openvino-dev[caffe]
+
 
 2. Download a pre-trained model using:
 
-```
-omz_downloader --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_downloader --name googlenet-v1
+
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
 
-```
-omz_converter --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_converter --name googlenet-v1
+
+
+4. Perform benchmarking using the ``googlenet-v1`` model on a ``CPU``:
+
+   .. code-block:: sh
 
-4. Perform benchmarking using the `googlenet-v1` model on a `CPU`:
+      python sync_benchmark.py googlenet-v1.xml
 
-```
-python sync_benchmark.py googlenet-v1.xml
-```
 
-## Sample Output
+Sample Output
+####################
 
 The application outputs performance results.
 
-```
-[ INFO ] OpenVINO:
-[ INFO ] Build ................................. <version>
-[ INFO ] Count:          2333 iterations
-[ INFO ] Duration:       10003.59 ms
-[ INFO ] Latency:
-[ INFO ]     Median:     3.90 ms
-[ INFO ]     Average:    4.29 ms
-[ INFO ]     Min:        3.30 ms
-[ INFO ]     Max:        10.11 ms
-[ INFO ] Throughput: 233.22 FPS
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: sh
+
+   [ INFO ] OpenVINO:
+   [ INFO ] Build ................................. <version>
+   [ INFO ] Count:          2333 iterations
+   [ INFO ] Duration:       10003.59 ms
+   [ INFO ] Latency:
+   [ INFO ]     Median:     3.90 ms
+   [ INFO ]     Average:    4.29 ms
+   [ INFO ]     Min:        3.30 ms
+   [ INFO ]     Max:        10.11 ms
+   [ INFO ] Throughput: 233.22 FPS
+
+
+See Also
+####################
+
+* :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
\ No newline at end of file

From 1ca94326cbc7b622052c069a5996fc33d54a740c Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 27 Mar 2023 18:29:05 +0200
Subject: [PATCH 117/296] DOCS shift to rst - Benchmark Samples and Tools
 (#16566)

---
 .../benchmark/throughput_benchmark/README.md  | 169 +++---
 samples/cpp/benchmark_app/README.md           | 420 ++++++++------
 .../python/benchmark/bert_benchmark/README.md |  77 ++-
 .../benchmark/throughput_benchmark/README.md  | 172 +++---
 tools/benchmark_tool/README.md                | 542 ++++++++++--------
 5 files changed, 805 insertions(+), 575 deletions(-)

diff --git a/samples/cpp/benchmark/throughput_benchmark/README.md b/samples/cpp/benchmark/throughput_benchmark/README.md
index b8caccea4c1357..a6d0776c52f214 100644
--- a/samples/cpp/benchmark/throughput_benchmark/README.md
+++ b/samples/cpp/benchmark/throughput_benchmark/README.md
@@ -1,98 +1,135 @@
 # Throughput Benchmark C++ Sample {#openvino_inference_engine_samples_throughput_benchmark_README}
 
-This sample demonstrates how to estimate performace of a model using Asynchronous Inference Request API in throughput mode. Unlike [demos](@ref omz_demos) this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+@sphinxdirective
 
-The reported results may deviate from what [benchmark_app](../../benchmark_app/README.md) reports. One example is model input precision for computer vision tasks. benchmark_app sets uint8, while the sample uses default model precision which is usually float32.
+This sample demonstrates how to estimate performance of a model using Asynchronous Inference Request API in throughput mode. Unlike :doc:`demos <omz_demos>` this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
 
-The following C++ API is used in the application:
-
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | `ov::get_openvino_version` | Get Openvino API version |
-| Basic Infer Flow | `ov::Core`, `ov::Core::compile_model`, `ov::CompiledModel::create_infer_request`, `ov::InferRequest::get_tensor` | Common API to do inference: compile a model, create an infer request, configure input tensors |
-| Asynchronous Infer | `ov::InferRequest::start_async`, `ov::InferRequest::set_callback` | Do asynchronous inference with callback. |
-| Model Operations | `ov::CompiledModel::inputs` | Get inputs of a model |
-| Tensor Operations | `ov::Tensor::get_shape`, `ov::Tensor::data` | Get a tensor shape and its data. |
+The reported results may deviate from what :doc:`benchmark_app <openvino_inference_engine_samples_benchmark_app_README>` reports. One example is model input precision for computer vision tasks. benchmark_app sets ``uint8``, while the sample uses default model precision which is usually ``float32``.
 
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) [yolo-v3-tf](@ref omz_models_model_yolo_v3_tf), [face-detection-0200](@ref omz_models_model_face_detection_0200) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../../python/benchmark/throughput_benchmark/README.md) |
+The following C++ API is used in the application:
 
-## How It Works
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Feature                  | API                                          | Description                                  |
++==========================+==============================================+==============================================+
+| OpenVINO Runtime Version | ``ov::get_openvino_version``                 | Get Openvino API version.                    |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Basic Infer Flow         | ``ov::Core``, ``ov::Core::compile_model``,   | Common API to do inference: compile a model, |
+|                          | ``ov::CompiledModel::create_infer_request``, | create an infer request,                     |
+|                          | ``ov::InferRequest::get_tensor``             | configure input tensors.                     |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Asynchronous Infer       | ``ov::InferRequest::start_async``,           | Do asynchronous inference with callback.     |
+|                          | ``ov::InferRequest::set_callback``           |                                              |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Model Operations         | ``ov::CompiledModel::inputs``                | Get inputs of a model.                       |
++--------------------------+----------------------------------------------+----------------------------------------------+
+| Tensor Operations        | ``ov::Tensor::get_shape``,                   | Get a tensor shape and its data.             |
+|                          | ``ov::Tensor::data``                         |                                              |
++--------------------------+----------------------------------------------+----------------------------------------------+
+
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Options                        | Values                                                                                         |
++================================+================================================================================================+
+| Validated Models               | :doc:`alexnet <omz_models_model_alexnet>`,                                                     |
+|                                | :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`,                                           |
+|                                | :doc:`yolo-v3-tf <omz_models_model_yolo_v3_tf>`,                                               |
+|                                | :doc:`face-detection-0200 <omz_models_model_face_detection_0200>`                              |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Model Format                   | OpenVINO™ toolkit Intermediate Representation                                                  |
+|                                | (\*.xml + \*.bin), ONNX (\*.onnx)                                                              |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Supported devices              | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                           |
++--------------------------------+------------------------------------------------------------------------------------------------+
+| Other language realization     | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_throughput_benchmark_README>` |
++--------------------------------+------------------------------------------------------------------------------------------------+
+
+
+How It Works
+####################
 
 The sample compiles a model for a given device, randomly generates input data, performs asynchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+####################
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+####################
+
+.. code-block:: sh
+
+   throughput_benchmark <path_to_model>
 
-```
-throughput_benchmark <path_to_model>
-```
 
 To run the sample, you need to specify a model:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+
+.. note::
 
-### Example
+   Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
+   The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+Example
+++++++++++++++++++++
+
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+
+   .. code-block:: sh
+
+      python -m pip install openvino-dev[caffe]
 
-```
-python -m pip install openvino-dev[caffe]
-```
 
 2. Download a pre-trained model using:
 
-```
-omz_downloader --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_downloader --name googlenet-v1
+
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
 
-```
-omz_converter --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_converter --name googlenet-v1
+
+
+4. Perform benchmarking using the ``googlenet-v1`` model on a ``CPU``:
+
+   .. code-block:: sh
 
-4. Perform benchmarking using the `googlenet-v1` model on a `CPU`:
+      throughput_benchmark googlenet-v1.xml
 
-```
-throughput_benchmark googlenet-v1.xml
-```
 
-## Sample Output
+Sample Output
+####################
 
 The application outputs performance results.
 
-```
-[ INFO ] OpenVINO:
-[ INFO ] Build ................................. <version>
-[ INFO ] Count:      1577 iterations
-[ INFO ] Duration:   15024.2 ms
-[ INFO ] Latency:
-[ INFO ]        Median:     38.02 ms
-[ INFO ]        Average:    38.08 ms
-[ INFO ]        Min:        25.23 ms
-[ INFO ]        Max:        49.16 ms
-[ INFO ] Throughput: 104.96 FPS
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: sh
+
+   [ INFO ] OpenVINO:
+   [ INFO ] Build ................................. <version>
+   [ INFO ] Count:      1577 iterations
+   [ INFO ] Duration:   15024.2 ms
+   [ INFO ] Latency:
+   [ INFO ]        Median:     38.02 ms
+   [ INFO ]        Average:    38.08 ms
+   [ INFO ]        Min:        25.23 ms
+   [ INFO ]        Max:        49.16 ms
+   [ INFO ] Throughput: 104.96 FPS
+
+
+See Also
+####################
+
+* :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
diff --git a/samples/cpp/benchmark_app/README.md b/samples/cpp/benchmark_app/README.md
index f9cdda078e0af2..a0aaaf17d39c22 100644
--- a/samples/cpp/benchmark_app/README.md
+++ b/samples/cpp/benchmark_app/README.md
@@ -1,246 +1,306 @@
 # Benchmark C++ Tool {#openvino_inference_engine_samples_benchmark_app_README}
 
+@sphinxdirective
+
 This page demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices.
 
-> **NOTE**: This page describes usage of the C++ implementation of the Benchmark Tool. For the Python implementation, refer to the [Benchmark Python Tool](../../../tools/benchmark_tool/README.md) page. The Python version is recommended for benchmarking models that will be used in Python applications, and the C++ version is recommended for benchmarking models that will be used in C++ applications. Both tools have a similar command interface and backend.
+.. note::
+
+   This page describes usage of the C++ implementation of the Benchmark Tool. For the Python implementation, refer to the :doc:`Benchmark Python Tool <openvino_inference_engine_tools_benchmark_tool_README>` page. The Python version is recommended for benchmarking models that will be used in Python applications, and the C++ version is recommended for benchmarking models that will be used in C++ applications. Both tools have a similar command interface and backend.
+
+
+Basic Usage
+####################
 
+To use the C++ benchmark_app, you must first build it following the :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` instructions and then set up paths and environment variables by following the :doc:`Get Ready for Running the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` instructions. Navigate to the directory where the benchmark_app C++ sample binary was built.
 
-## Basic Usage
-To use the C++ benchmark_app, you must first build it following the [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) instructions and then set up paths and environment variables by following the [Get Ready for Running the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) instructions. Navigate to the directory where the benchmark_app C++ sample binary was built.
+.. note::
 
-> **NOTE**: If you installed OpenVINO Runtime using PyPI or Anaconda Cloud, only the [Benchmark Python Tool](../../../tools/benchmark_tool/README.md) is available, and you should follow the usage instructions on that page instead.
+   If you installed OpenVINO Runtime using PyPI or Anaconda Cloud, only the :doc:`Benchmark Python Tool <openvino_inference_engine_tools_benchmark_tool_README>` is available, and you should follow the usage instructions on that page instead.
 
-The benchmarking application works with models in the OpenVINO IR (`model.xml` and `model.bin`) and ONNX (`model.onnx`) formats. Make sure to [convert your models](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) if necessary.
+The benchmarking application works with models in the OpenVINO IR (``model.xml`` and ``model.bin``) and ONNX (``model.onnx``) formats. Make sure to :doc:`convert your models <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` if necessary.
 
 To run benchmarking with default options on a model, use the following command:
 
-```
-./benchmark_app -m model.xml
-```
+.. code-block:: sh
+
+   ./benchmark_app -m model.xml
+
 
 By default, the application will load the specified model onto the CPU and perform inferencing on batches of randomly-generated data inputs for 60 seconds. As it loads, it prints information about benchmark parameters. When benchmarking is completed, it reports the minimum, average, and maximum inferencing latency and average the throughput.
 
 You may be able to improve benchmark results beyond the default configuration by configuring some of the execution parameters for your model. For example, you can use "throughput" or "latency" performance hints to optimize the runtime for higher FPS or reduced inferencing time. Read on to learn more about the configuration options available with benchmark_app.
 
-## Configuration Options
-The benchmark app provides various options for configuring execution parameters. This section covers key configuration options for easily tuning benchmarking to achieve better performance on your device. A list of all configuration options is given in the [Advanced Usage](#advanced-usage-cpp-benchmark) section.
+Configuration Options
+#####################
+
+The benchmark app provides various options for configuring execution parameters. This section covers key configuration options for easily tuning benchmarking to achieve better performance on your device. A list of all configuration options is given in the :ref:`Advanced Usage <advanced-usage-cpp-benchmark>` section.
+
+Performance hints: latency and throughput
++++++++++++++++++++++++++++++++++++++++++
 
-### Performance hints: latency and throughput
 The benchmark app allows users to provide high-level "performance hints" for setting latency-focused or throughput-focused inference modes. This hint causes the runtime to automatically adjust runtime parameters, such as the number of processing streams and inference batch size, to prioritize for reduced latency or high throughput.
 
 The performance hints do not require any device-specific settings and they are completely portable between devices. Parameters are automatically configured based on whichever device is being used. This allows users to easily port applications between hardware targets without having to re-determine the best runtime parameters for the new device.
 
-If not specified, throughput is used as the default. To set the hint explicitly, use `-hint latency` or `-hint throughput` when running benchmark_app:
+If not specified, throughput is used as the default. To set the hint explicitly, use ``-hint latency`` or ``-hint throughput`` when running benchmark_app:
+
+.. code-block:: sh
+
+   ./benchmark_app -m model.xml -hint latency
+   ./benchmark_app -m model.xml -hint throughput
+
 
-```
-./benchmark_app -m model.xml -hint latency
-./benchmark_app -m model.xml -hint throughput
-```
+.. note::
 
-> **NOTE**
-It is up to the user to ensure the environment on which the benchmark is running is optimized for maximum performance.
-Otherwise, different results may occur when using the application in different environment settings (such as power optimization settings, processor overclocking, thermal throttling).
+   It is up to the user to ensure the environment on which the benchmark is running is optimized for maximum performance. Otherwise, different results may occur when using the application in different environment settings (such as power optimization settings, processor overclocking, thermal throttling).
+
+Latency
+--------------------
 
-#### Latency
 Latency is the amount of time it takes to process a single inference request. In applications where data needs to be inferenced and acted on as quickly as possible (such as autonomous driving), low latency is desirable. For conventional devices, lower latency is achieved by reducing the amount of parallel processing streams so the system can utilize as many resources as possible to quickly calculate each inference request. However, advanced devices like multi-socket CPUs and modern GPUs are capable of running multiple inference requests while delivering the same latency.
 
-When benchmark_app is run with `-hint latency`, it determines the optimal number of parallel inference requests for minimizing latency while still maximizing the parallelization capabilities of the hardware. It automatically sets the number of processing streams and inference batch size to achieve the best latency.
+When benchmark_app is run with ``-hint latency``, it determines the optimal number of parallel inference requests for minimizing latency while still maximizing the parallelization capabilities of the hardware. It automatically sets the number of processing streams and inference batch size to achieve the best latency.
+
+Throughput
+--------------------
 
-#### Throughput
 Throughput is the amount of data an inferencing pipeline can process at once, and it is usually measured in frames per second (FPS) or inferences per second. In applications where large amounts of data needs to be inferenced simultaneously (such as multi-camera video streams), high throughput is needed. To achieve high throughput, the runtime focuses on fully saturating the device with enough data to process. It utilizes as much memory and as many parallel streams as possible to maximize the amount of data that can be processed simultaneously.
 
-When benchmark_app is run with `-hint throughput`, it maximizes the number of parallel inference requests to utilize all the threads available on the device. On GPU, it automatically sets the inference batch size to fill up the GPU memory available.
+When benchmark_app is run with ``-hint throughput``, it maximizes the number of parallel inference requests to utilize all the threads available on the device. On GPU, it automatically sets the inference batch size to fill up the GPU memory available.
+
+For more information on performance hints, see the :doc:`High-level Performance Hints <openvino_docs_OV_UG_Performance_Hints>` page. For more details on optimal runtime configurations and how they are automatically determined using performance hints, see :doc:`Runtime Inference Optimizations <openvino_docs_deployment_optimization_guide_dldt_optimization_guide>`.
 
-For more information on performance hints, see the [High-level Performance Hints](../../../docs/OV_Runtime_UG/performance_hints.md) page. For more details on optimal runtime configurations and how they are automatically determined using performance hints, see [Runtime Inference Optimizations](../../../docs/optimization_guide/dldt_deployment_optimization_guide.md).
 
+Device
+++++++++++++++++++++
 
-### Device
-To set which device benchmarking runs on, use the `-d <device>` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and "GNA" devices. In order to use the GPU or GNA, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
+To set which device benchmarking runs on, use the ``-d <device>`` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and "GNA" devices. In order to use the GPU or GNA, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
 
 For example, to run benchmarking on GPU, use:
 
-```
-./benchmark_app -m model.xml -d GPU
-```
+.. code-block:: sh
 
-You may also specify "AUTO" as the device, in which case the benchmark_app will automatically select the best device for benchmarking and support it with the CPU at the model loading stage. This may result in increased performance, thus, should be used purposefully. For more information, see the [Automatic device selection](../../../docs/OV_Runtime_UG/auto_device_selection.md) page.
+   ./benchmark_app -m model.xml -d GPU
+
+
+You may also specify "AUTO" as the device, in which case the benchmark_app will automatically select the best device for benchmarking and support it with the CPU at the model loading stage. This may result in increased performance, thus, should be used purposefully. For more information, see the :doc:`Automatic device selection <openvino_docs_OV_UG_supported_plugins_AUTO>` page.
 
 (Note: If the latency or throughput hint is set, it will automatically configure streams and batch sizes for optimal performance based on the specified device.)
 
-### Number of iterations
+Number of iterations
+++++++++++++++++++++
+
 By default, the benchmarking app will run for a predefined duration, repeatedly performing inferencing with the model and measuring the resulting inference speed. There are several options for setting the number of inference iterations:
 
-* Explicitly specify the number of iterations the model runs using the `-niter <number_of_iterations>` option.
-* Set how much time the app runs for using the `-t <seconds>` option.
+* Explicitly specify the number of iterations the model runs using the ``-niter <number_of_iterations>`` option.
+* Set how much time the app runs for using the ``-t <seconds>`` option.
 * Set both of them (execution will continue until both conditions are met).
 * If neither -niter nor -t are specified, the app will run for a predefined duration that depends on the device.
 
-The more iterations a model runs, the better the statistics will be for determing average latency and throughput.
+The more iterations a model runs, the better the statistics will be for determining average latency and throughput.
+
+Inputs
+++++++++++++++++++++
+
+The benchmark tool runs benchmarking on user-provided input images in ``.jpg``, ``.bmp``, or ``.png`` format. Use ``-i <PATH_TO_INPUT>`` to specify the path to an image, or folder of images. For example, to run benchmarking on an image named ``test1.jpg``, use:
+
+.. code-block:: sh
+
+   ./benchmark_app -m model.xml -i test1.jpg
 
-### Inputs
-The benchmark tool runs benchmarking on user-provided input images in `.jpg`, `.bmp`, or `.png` format. Use `-i <PATH_TO_INPUT>` to specify the path to an image, or folder of images. For example, to run benchmarking on an image named `test1.jpg`, use:
 
-```
-./benchmark_app -m model.xml -i test1.jpg
-```
+The tool will repeatedly loop through the provided inputs and run inferencing on them for the specified amount of time or number of iterations. If the ``-i`` flag is not used, the tool will automatically generate random data to fit the input shape of the model.
 
-The tool will repeatedly loop through the provided inputs and run inferencing on them for the specified amount of time or number of iterations. If the `-i` flag is not used, the tool will automatically generate random data to fit the input shape of the model. 
+Examples
+++++++++++++++++++++
 
-### Examples
-For more usage examples (and step-by-step instructions on how to set up a model for benchmarking), see the [Examples of Running the Tool](#examples-of-running-the-tool-cpp) section.
+For more usage examples (and step-by-step instructions on how to set up a model for benchmarking), see the :ref:`Examples of Running the Tool <examples-of-running-the-tool-cpp>` section.
 
-## <a name="advanced-usage-cpp-benchmark"></a> Advanced Usage
+.. _advanced-usage-cpp-benchmark:
 
-> **NOTE**: By default, OpenVINO samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channel order in the sample or demo application or reconvert your model using the Model Optimizer tool with --reverse_input_channels argument specified. For more information about the argument, refer to When to Reverse Input Channels section of Converting a Model to Intermediate Representation (IR).
+Advanced Usage
+####################
 
-### Per-layer performance and logging
-The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you enable statistics dumping by setting the `-report_type` parameter to one of the possible values:
+.. note::
 
-* `no_counters` report includes configuration options specified, resulting FPS and latency.
-* `average_counters` report extends the `no_counters` report and additionally includes average PM counters values for each layer from the network.
-* `detailed_counters` report extends the `average_counters` report and additionally includes per-layer PM counters and latency for each executed infer request.
+   By default, OpenVINO samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channel order in the sample or demo application or reconvert your model using the Model Optimizer tool with --reverse_input_channels argument specified. For more information about the argument, refer to When to Reverse Input Channels section of Converting a Model to Intermediate Representation (IR).
+
+Per-layer performance and logging
++++++++++++++++++++++++++++++++++
+
+The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you enable statistics dumping by setting the ``-report_type`` parameter to one of the possible values:
+
+* ``no_counters`` report includes configuration options specified, resulting FPS and latency.
+* ``average_counters`` report extends the ``no_counters`` report and additionally includes average PM counters values for each layer from the network.
+* ``detailed_counters`` report extends the ``average_counters`` report and additionally includes per-layer PM counters and latency for each executed infer request.
 
 Depending on the type, the report is stored to benchmark_no_counters_report.csv, benchmark_average_counters_report.csv, or benchmark_detailed_counters_report.csv file located in the path specified in -report_folder. The application also saves executable graph information serialized to an XML file if you specify a path to it with the -exec_graph_path parameter.
 
-### <a name="all-configuration-options-cpp-benchmark"></a> All configuration options
-
-Running the application with the `-h` or `--help` option yields the following usage message:
-
-```
-[Step 1/11] Parsing and validating input arguments
-[ INFO ] Parsing input parameters
-usage: benchmark_app [OPTION]
-
-Options:
-    -h, --help                    Print the usage message
-    -m  <path>                    Required. Path to an .xml/.onnx file with a trained model or to a .blob files with a trained compiled model.
-    -i  <path>                    Optional. Path to a folder with images and/or binaries or to specific image or binary file.
-                              In case of dynamic shapes models with several inputs provide the same number of files for each input (except cases with single file for any input):"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ". Also you can pass specific keys for inputs: "random" - for fillling input with random data, "image_info" - for filling input with image size.
-                              You should specify either one files set to be used for all inputs (without providing input names) or separate files sets for every input of model (providing inputs names).
-                              Currently supported data types: bmp, bin, npy.
-                              If OPENCV is enabled, this functionality is extended with the following data types:
-                              dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.
-    -d  <device>                  Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin. The application looks for a suitable plugin for the specified device.
-    -hint  <performance hint> (latency or throughput or cumulative_throughput or none)   Optional. Performance hint allows the OpenVINO device to select the right model-specific settings.
-                               'throughput' or 'tput': device performance mode will be set to THROUGHPUT.
-                               'cumulative_throughput' or 'ctput': device performance mode will be set to CUMULATIVE_THROUGHPUT.
-                               'latency': device performance mode will be set to LATENCY.
-                               'none': device performance mode will be set to UNDEFINED.
-                              Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
-    -niter  <integer>             Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
-    -t                            Optional. Time in seconds to execute topology.
-
-Input shapes
-    -b  <integer>                 Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
-    -shape                        Optional. Set shape for model input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]. For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].
-    -data_shape                   Required for models with dynamic shapes. Set shape for input blobs. In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]". In case of several input sizes provide the same number for each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]", "input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]". If model shapes are all static specifying the option will cause an exception.
-    -layout                       Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
-
-Advanced options
-    -extensions  <absolute_path>  Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations.
-    -c  <absolute_path>           Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
-    -cache_dir  <path>            Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message.
-    -load_from_file               Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored
-    -api <sync/async>             Optional (deprecated). Enable Sync/Async API. Default value is "async".
-    -nireq  <integer>             Optional. Number of infer requests. Default value is determined automatically for device.
-    -nstreams  <integer>          Optional. Number of streams to use for inference on the CPU or GPU devices (for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1.
-    -inference_only         Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false".
-    -infer_precision        Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32'
-
-Preprocessing options:
-    -ip   <value>           Optional. Specifies precision for all input layers of the model.
-    -op   <value>           Optional. Specifies precision for all output layers of the model.
-    -iop  <value>           Optional. Specifies precision for input and output layers by name.
-                                             Example: -iop "input:FP16, output:FP16".
-                                             Notice that quotes are required.
-                                             Overwrites precision from ip and op options for specified layers.
-    -mean_values   [R,G,B]  Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. Applying the values affects performance and may cause type conversion
-    -scale_values  [R,G,B]  Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Applying the values affects performance and may cause type conversion
-
-Device-specific performance options:
-    -nthreads  <integer>          Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
-    -pin  <string>  ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA"  Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):
-                                enabling threads->cores pinning("YES", which is already default for any conventional CPU),
-                                letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs)
-                                threads->(NUMA)nodes("NUMA") or
-                                completely disable("NO") CPU inference threads pinning
-
-Statistics dumping options:
-    -latency_percentile     Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
-    -report_type  <type>    Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
-    -report_folder          Optional. Path to a folder where statistics report is stored.
-    -json_stats             Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
-    -pc                     Optional. Report performance counters.
-    -pcsort                 Optional. Report performance counters and analysis the sort hotpoint opts.  "sort" Analysis opts time cost, print by hotpoint order  "no_sort" Analysis opts time cost, print by normal order  "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order
-    -pcseq                  Optional. Report latencies for each shape in -data_shape sequence.
-    -exec_graph_path        Optional. Path to a file where to store executable graph information serialized.
-    -dump_config            Optional. Path to JSON file to dump IE parameters, which were set by application.
-    -load_config            Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
-                              Example 1: a simple JSON file for HW device with primary properties.
-                                       {
-                                            "CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
-                                       }
-                              Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
-                                       {
-                                               "AUTO": {
-                                                       "PERFORMANCE_HINT": "",
-                                                       "PERF_COUNT": "NO",
-                                                       "DEVICE_PROPERTIES": {
-                                                       "CPU": {
-                                                           "INFERENCE_PRECISION_HINT": "f32",
-                                                           "NUM_STREAMS": "3"
-                                                       },
-                                                       "GPU": {
-                                                           "INFERENCE_PRECISION_HINT": "f32",
-                                                           "NUM_STREAMS": "5"
-                                                       }
-                                                   }
-                                               }
-                                       }
-```
+.. _all-configuration-options-cpp-benchmark:
+
+All configuration options
++++++++++++++++++++++++++
+
+Running the application with the ``-h`` or ``--help`` option yields the following usage message:
+
+.. scrollbox::
+
+   .. code-block::
+
+      [Step 1/11] Parsing and validating input arguments
+      [ INFO ] Parsing input parameters
+      usage: benchmark_app [OPTION]
+
+      Options:
+          -h, --help                    Print the usage message
+          -m  <path>                    Required. Path to an .xml/.onnx file with a trained model or to a .blob files with a trained compiled model.
+          -i  <path>                    Optional. Path to a folder with images and/or binaries or to specific image or binary file.
+                                    In case of dynamic shapes models with several inputs provide the same number of files for each input (except cases with single file for any input)   :"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ". Also you can pass specific keys for inputs: "random" - for    fillling input with random data, "image_info" - for filling input with image size.
+                                    You should specify either one files set to be used for all inputs (without providing input names) or separate files sets for every input of model    (providing inputs names).
+                                    Currently supported data types: bmp, bin, npy.
+                                    If OPENCV is enabled, this functionality is extended with the following data types:
+                                    dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.
+          -d  <device>                  Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d    HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin. The application looks for    a suitable plugin for the specified device.
+          -hint  <performance hint> (latency or throughput or cumulative_throughput or none)   Optional. Performance hint allows the OpenVINO device to select the right model-specific    settings.
+                                     'throughput' or 'tput': device performance mode will be set to THROUGHPUT.
+                                     'cumulative_throughput' or 'ctput': device performance mode will be set to CUMULATIVE_THROUGHPUT.
+                                     'latency': device performance mode will be set to LATENCY.
+                                     'none': device performance mode will be set to UNDEFINED.
+                                    Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
+          -niter  <integer>             Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
+          -t                            Optional. Time in seconds to execute topology.
+
+      Input shapes
+          -b  <integer>                 Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
+          -shape                        Optional. Set shape for model input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter    affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]. For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].
+          -data_shape                   Required for models with dynamic shapes. Set shape for input blobs. In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]   ". In case of several input sizes provide the same number for each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]", "input1[1,1,   128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]". If model shapes are all static specifying the    option will cause an exception.
+          -layout                       Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
+
+      Advanced options
+          -extensions  <absolute_path>  Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations.
+          -c  <absolute_path>           Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
+          -cache_dir  <path>            Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message.
+          -load_from_file               Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored
+          -api <sync/async>             Optional (deprecated). Enable Sync/Async API. Default value is "async".
+          -nireq  <integer>             Optional. Number of infer requests. Default value is determined automatically for device.
+          -nstreams  <integer>          Optional. Number of streams to use for inference on the CPU or GPU devices (for HETERO and MULTI device cases use format <dev1>:<nstreams1>,   <dev2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable    performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently    throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1.
+          -inference_only         Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage,    inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false".
+          -infer_precision        Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32'
+
+      Preprocessing options:
+          -ip   <value>           Optional. Specifies precision for all input layers of the model.
+          -op   <value>           Optional. Specifies precision for all output layers of the model.
+          -iop  <value>           Optional. Specifies precision for input and output layers by name.
+                                                   Example: -iop "input:FP16, output:FP16".
+                                                   Notice that quotes are required.
+                                                   Overwrites precision from ip and op options for specified layers.
+          -mean_values   [R,G,B]  Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input of the    model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. Applying the    values affects performance and may cause type conversion
+          -scale_values  [R,G,B]  Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input of the    model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. If both    --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Applying the values    affects performance and may cause type conversion
+
+      Device-specific performance options:
+          -nthreads  <integer>          Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
+          -pin  <string>  ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA"  Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):
+                                      enabling threads->cores pinning("YES", which is already default for any conventional CPU),
+                                      letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs)
+                                      threads->(NUMA)nodes("NUMA") or
+                                      completely disable("NO") CPU inference threads pinning
+
+      Statistics dumping options:
+          -latency_percentile     Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
+          -report_type  <type>    Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency.    "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends    "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
+          -report_folder          Optional. Path to a folder where statistics report is stored.
+          -json_stats             Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
+          -pc                     Optional. Report performance counters.
+          -pcsort                 Optional. Report performance counters and analysis the sort hotpoint opts.  "sort" Analysis opts time cost, print by hotpoint order  "no_sort" Analysis    opts time cost, print by normal order  "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order
+          -pcseq                  Optional. Report latencies for each shape in -data_shape sequence.
+          -exec_graph_path        Optional. Path to a file where to store executable graph information serialized.
+          -dump_config            Optional. Path to JSON file to dump IE parameters, which were set by application.
+          -load_config            Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration    file.
+                                    Example 1: a simple JSON file for HW device with primary properties.
+                                             {
+                                                  "CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
+                                             }
+                                    Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
+                                             {
+                                                     "AUTO": {
+                                                             "PERFORMANCE_HINT": "",
+                                                             "PERF_COUNT": "NO",
+                                                             "DEVICE_PROPERTIES": {
+                                                             "CPU": {
+                                                                 "INFERENCE_PRECISION_HINT": "f32",
+                                                                 "NUM_STREAMS": "3"
+                                                             },
+                                                             "GPU": {
+                                                                 "INFERENCE_PRECISION_HINT": "f32",
+                                                                 "NUM_STREAMS": "5"
+                                                             }
+                                                         }
+                                                     }
+                                             }
+
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-### More information on inputs
+More information on inputs
+++++++++++++++++++++++++++
+
 The benchmark tool supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter, and the inputs will be filled with random values. If a model has only image input(s), provide a folder with images or a path to an image as input. If a model has some specific input(s) (besides images), please prepare a binary file(s) or numpy array(s) that is filled with data of appropriate precision and provide a path to it as input. If a model has mixed input types, the input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
 
-## <a name="examples-of-running-the-tool-cpp"></a> Examples of Running the Tool
-This section provides step-by-step instructions on how to run the Benchmark Tool with the `asl-recognition` model from the [Open Model Zoo](@ref model_zoo) on CPU or GPU devices. It uses random data as the input.
+.. _examples-of-running-the-tool-cpp:
+
+Examples of Running the Tool
+############################
+
+This section provides step-by-step instructions on how to run the Benchmark Tool with the ``asl-recognition`` model from the :doc:`Open Model Zoo <model_zoo>` on CPU or GPU devices. It uses random data as the input.
+
+.. note::
+
+   Internet access is required to execute the following steps successfully. If you have access to the Internet through a proxy server only, please make sure that it is configured in your OS environment.
 
-> **NOTE**: Internet access is required to execute the following steps successfully. If you have access to the Internet through a proxy server only, please make sure that it is configured in your OS environment.
 
 1. Install OpenVINO Development Tools (if it hasn't been installed already):
-   ```sh
-   pip install openvino-dev
-   ```
 
-2. Download the model using `omz_downloader`, specifying the model name and directory to download the model to:
-   ```sh
-   omz_downloader --name asl-recognition-0004 --precisions FP16 --output_dir omz_models
-   ```
+   .. code-block:: sh
+
+      pip install openvino-dev
+
+
+2. Download the model using ``omz_downloader``, specifying the model name and directory to download the model to:
+
+   .. code-block:: sh
+
+      omz_downloader --name asl-recognition-0004 --precisions FP16 --output_dir omz_models
+
 
 3. Run the tool, specifying the location of the model .xml file, the device to perform inference on, and with a performance hint. The following commands demonstrate examples of how to run the Benchmark Tool in latency mode on CPU and throughput mode on GPU devices:
 
    * On CPU (latency mode):
-   ```sh
-   ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
-   ```
+
+     .. code-block:: sh
+
+        ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
+
 
    * On GPU (throughput mode):
-   ```sh
-   ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d GPU -hint throughput
-   ```
+
+     .. code-block:: sh
+
+        ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d GPU -hint throughput
+
 
 The application outputs the number of executed iterations, total duration of execution, latency, and throughput.
-Additionally, if you set the `-report_type` parameter, the application outputs a statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
+Additionally, if you set the ``-report_type`` parameter, the application outputs a statistics report. If you set the ``-pc`` parameter, the application outputs performance counters. If you set ``-exec_graph_path``, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
 
 An example of the information output when running benchmark_app on CPU in latency mode is shown below:
 
-   ```sh
+.. code-block:: sh
+
    ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
-   ```
 
-   ```sh
+
+.. code-block:: sh
+
    [Step 1/11] Parsing and validating input arguments
    [ INFO ] Parsing input parameters
    [ INFO ] Input command: /home/openvino/bin/intel64/DEBUG/benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
@@ -300,14 +360,18 @@ An example of the information output when running benchmark_app on CPU in latenc
    [ INFO ]    Min:        20.60 ms
    [ INFO ]    Max:        37.19 ms
    [ INFO ] Throughput:   91.12 FPS
-   ```
-The Benchmark Tool can also be used with dynamically shaped networks to measure expected inference time for various input data shapes. See the `-shape` and `-data_shape` argument descriptions in the <a href="#all-configuration-options-cpp-benchmark">All configuration options</a> section to learn more about using dynamic shapes. Here is a command example for using benchmark_app with dynamic networks and a portion of the resulting output:
 
-   ```sh
+
+
+The Benchmark Tool can also be used with dynamically shaped networks to measure expected inference time for various input data shapes. See the ``-shape`` and ``-data_shape`` argument descriptions in the :ref:`All configuration options <all-configuration-options-cpp-benchmark>` section to learn more about using dynamic shapes. Here is a command example for using benchmark_app with dynamic networks and a portion of the resulting output:
+
+.. code-block:: sh
+
    ./benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -shape [-1,3,16,224,224] -data_shape [1,3,16,224,224][2,3,16,224,224][4,3,16,224,224] -pcseq
-   ```
 
-   ```sh
+
+.. code-block:: sh
+
    [Step 9/11] Creating infer requests and preparing input tensors
    [ INFO ] Test Config 0
    [ INFO ] input  ([N,C,D,H,W], f32, {1, 3, 16, 224, 224}, dyn:{?,3,16,224,224}): random (binary data is expected)
@@ -343,9 +407,13 @@ The Benchmark Tool can also be used with dynamically shaped networks to measure
    [ INFO ]    Min:        327.50 ms
    [ INFO ]    Max:        743.46 ms
    [ INFO ] Throughput:   107.61 FPS
-   ```
 
-## See Also
-* [Using OpenVINO Runtime Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-* [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](@ref omz_tools_downloader)
+
+See Also
+####################
+
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/samples/python/benchmark/bert_benchmark/README.md b/samples/python/benchmark/bert_benchmark/README.md
index 84571ec446788d..5e8c6ca3365ad6 100644
--- a/samples/python/benchmark/bert_benchmark/README.md
+++ b/samples/python/benchmark/bert_benchmark/README.md
@@ -1,50 +1,69 @@
 # Bert Benchmark Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_bert_benchmark_README}
 
-This sample demonstrates how to estimate performace of a Bert model using Asynchronous Inference Request API. Unlike [demos](@ref omz_demos) this sample doesn't have configurable command line arguments. Feel free to modify sample's source code to try out different options.
+@sphinxdirective
 
-The following Python\* API is used in the application:
+This sample demonstrates how to estimate performance of a Bert model using Asynchronous Inference Request API. Unlike :doc:`demos <omz_demos>` this sample doesn't have configurable command line arguments. Feel free to modify sample's source code to try out different options.
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | [openvino.runtime.get_version] | Get Openvino API version |
-| Basic Infer Flow | [openvino.runtime.Core], [openvino.runtime.Core.compile_model] | Common API to do inference: compile a model |
-| Asynchronous Infer | [openvino.runtime.AsyncInferQueue], [openvino.runtime.AsyncInferQueue.start_async], [openvino.runtime.AsyncInferQueue.wait_all] | Do asynchronous inference |
-| Model Operations | [openvino.runtime.CompiledModel.inputs] | Get inputs of a model |
+The following Python API is used in the application:
 
-## How It Works
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Feature                        | API                                             | Description                                  |
++================================+=================================================+==============================================+
+| OpenVINO Runtime Version       | [openvino.runtime.get_version]                  | Get Openvino API version.                    |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Basic Infer Flow               | [openvino.runtime.Core],                        | Common API to do inference: compile a model. |
+|                                | [openvino.runtime.Core.compile_model]           |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Asynchronous Infer             | [openvino.runtime.AsyncInferQueue],             | Do asynchronous inference.                   |
+|                                | [openvino.runtime.AsyncInferQueue.start_async], |                                              |
+|                                | [openvino.runtime.AsyncInferQueue.wait_all]     |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Model Operations               | [openvino.runtime.CompiledModel.inputs]         | Get inputs of a model.                       |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
 
-The sample downloads a model and a tokenizer, export the model to onnx, reads the exported model and reshapes it to enforce dynamic inpus shapes, compiles the resulting model, downloads a dataset and runs benchmarking on the dataset.
+How It Works
+####################
+
+The sample downloads a model and a tokenizer, export the model to onnx, reads the exported model and reshapes it to enforce dynamic input shapes, compiles the resulting model, downloads a dataset and runs benchmarking on the dataset.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+
+Running
+####################
+
+Install the ``openvino`` Python package:
+
+.. code-block:: sh
 
-## Running
+   python -m pip install openvino
 
-Install the `openvino` Python package:
 
-```
-python -m pip install openvino
-```
+Install packages from ``requirements.txt``:
 
-Install packages from `requirements.txt`:
+.. code-block:: sh
+
+   python -m pip install -r requirements.txt
 
-```
-python -m pip install -r requirements.txt
-```
 
 Run the sample
 
-```
-python bert_benchmark.py
-```
+.. code-block:: sh
+
+   python bert_benchmark.py
 
-## Sample Output
+
+Sample Output
+####################
 
 The sample outputs how long it takes to process a dataset.
 
-## See Also
+See Also
+####################
+
+* :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
 
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+@endsphinxdirective
diff --git a/samples/python/benchmark/throughput_benchmark/README.md b/samples/python/benchmark/throughput_benchmark/README.md
index 8e104248e497b0..9b5f18c7df7c53 100644
--- a/samples/python/benchmark/throughput_benchmark/README.md
+++ b/samples/python/benchmark/throughput_benchmark/README.md
@@ -1,94 +1,134 @@
 # Throughput Benchmark Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_throughput_benchmark_README}
 
-This sample demonstrates how to estimate performace of a model using Asynchronous Inference Request API in throughput mode. Unlike [demos](@ref omz_demos) this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+@sphinxdirective
+
+
+This sample demonstrates how to estimate performance of a model using Asynchronous Inference Request API in throughput mode. Unlike :doc:`demos <omz_demos>` this sample doesn't have other configurable command line arguments. Feel free to modify sample's source code to try out different options.
+
+The reported results may deviate from what :doc:`benchmark_app <openvino_inference_engine_tools_benchmark_tool_README>` reports. One example is model input precision for computer vision tasks. benchmark_app sets uint8, while the sample uses default model precision which is usually float32.
+
+The following Python API is used in the application:
+
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Feature                        | API                                             | Description                                  |
++================================+=================================================+==============================================+
+| OpenVINO Runtime Version       | [openvino.runtime.get_version]                  | Get Openvino API version.                    |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Basic Infer Flow               | [openvino.runtime.Core],                        | Common API to do inference: compile a model, |
+|                                | [openvino.runtime.Core.compile_model]           | configure input tensors.                     |
+|                                | [openvino.runtime.InferRequest.get_tensor]      |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Asynchronous Infer             | [openvino.runtime.AsyncInferQueue],             | Do asynchronous inference.                   |
+|                                | [openvino.runtime.AsyncInferQueue.start_async], |                                              |
+|                                | [openvino.runtime.AsyncInferQueue.wait_all],    |                                              |
+|                                | [openvino.runtime.InferRequest.results]         |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Model Operations               | [openvino.runtime.CompiledModel.inputs]         | Get inputs of a model.                       |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+| Tensor Operations              | [openvino.runtime.Tensor.get_shape],            | Get a tensor shape and its data.             |
+|                                | [openvino.runtime.Tensor.data]                  |                                              |
++--------------------------------+-------------------------------------------------+----------------------------------------------+
+
++--------------------------------+------------------------------------------------------------------------------+
+| Options                        | Values                                                                       |
++================================+==============================================================================+
+| Validated Models               | :doc:`alexnet <omz_models_model_alexnet>`,                                   |
+|                                | :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`,                         |
+|                                | :doc:`yolo-v3-tf <omz_models_model_yolo_v3_tf>`,                             |
+|                                | :doc:`face-detection-0200 <omz_models_model_face_detection_0200>`            |
++--------------------------------+------------------------------------------------------------------------------+
+| Model Format                   | OpenVINO™ toolkit Intermediate Representation                                |
+|                                | (\*.xml + \*.bin), ONNX (\*.onnx)                                            |
++--------------------------------+------------------------------------------------------------------------------+
+| Supported devices              | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`         |
++--------------------------------+------------------------------------------------------------------------------+
+| Other language realization     | :doc:`C++ <openvino_inference_engine_samples_throughput_benchmark_README>`   |
++--------------------------------+------------------------------------------------------------------------------+
+
+
+How It Works
+####################
 
-The reported results may deviate from what [benchmark_app](../../../../tools/benchmark_tool/README.md) reports. One example is model input precision for computer vision tasks. benchmark_app sets uint8, while the sample uses default model precision which is usually float32.
+The sample compiles a model for a given device, randomly generates input data, performs asynchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
 
-The following Python\* API is used in the application:
+You can see the explicit description of
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | [openvino.runtime.get_version] | Get Openvino API version |
-| Basic Infer Flow | [openvino.runtime.Core], [openvino.runtime.Core.compile_model], [openvino.runtime.InferRequest.get_tensor] | Common API to do inference: compile a model, configure input tensors |
-| Asynchronous Infer | [openvino.runtime.AsyncInferQueue], [openvino.runtime.AsyncInferQueue.start_async], [openvino.runtime.AsyncInferQueue.wait_all], [openvino.runtime.InferRequest.results] | Do asynchronous inference |
-| Model Operations | [openvino.runtime.CompiledModel.inputs] | Get inputs of a model |
-| Tensor Operations | [openvino.runtime.Tensor.get_shape], [openvino.runtime.Tensor.data] | Get a tensor shape and its data. |
+Running
+####################
 
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) [yolo-v3-tf](@ref omz_models_model_yolo_v3_tf), [face-detection-0200](@ref omz_models_model_face_detection_0200) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C++](../../../cpp/benchmark/throughput_benchmark/README.md) |
+.. code-block:: sh
 
-## How It Works
+   python throughput_benchmark.py <path_to_model>
 
-The sample compiles a model for a given device, randomly generates input data, performs asynchronous inference multiple times for a given number of seconds. Then processes and reports performance results.
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+To run the sample, you need to specify a model:
 
-## Running
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
 
-```
-python throughput_benchmark.py <path_to_model>
-```
+.. note::
+
+   Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+
+   The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-To run the sample, you need to specify a model:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+Example
+++++++++++++++++++++
 
-### Example
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
+   .. code-block:: sh
+
+      python -m pip install openvino-dev[caffe]
 
-```
-python -m pip install openvino-dev[caffe]
-```
 
 2. Download a pre-trained model using:
 
-```
-omz_downloader --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_downloader --name googlenet-v1
+
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
 
-```
-omz_converter --name googlenet-v1
-```
+   .. code-block:: sh
+
+      omz_converter --name googlenet-v1
+
+
+4. Perform benchmarking using the ``googlenet-v1`` model on a ``CPU``:
+
+   .. code-block:: sh
 
-4. Perform benchmarking using the `googlenet-v1` model on a `CPU`:
+      python throughput_benchmark.py googlenet-v1.xml
 
-```
-python throughput_benchmark.py googlenet-v1.xml
-```
 
-## Sample Output
+Sample Output
+####################
 
 The application outputs performance results.
 
-```
-[ INFO ] OpenVINO:
-[ INFO ] Build ................................. <version>
-[ INFO ] Count:          2817 iterations
-[ INFO ] Duration:       10012.65 ms
-[ INFO ] Latency:
-[ INFO ]     Median:     13.80 ms
-[ INFO ]     Average:    14.10 ms
-[ INFO ]     Min:        8.35 ms
-[ INFO ]     Max:        28.38 ms
-[ INFO ] Throughput: 281.34 FPS
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: sh
+
+   [ INFO ] OpenVINO:
+   [ INFO ] Build ................................. <version>
+   [ INFO ] Count:          2817 iterations
+   [ INFO ] Duration:       10012.65 ms
+   [ INFO ] Latency:
+   [ INFO ]     Median:     13.80 ms
+   [ INFO ]     Average:    14.10 ms
+   [ INFO ]     Min:        8.35 ms
+   [ INFO ]     Max:        28.38 ms
+   [ INFO ] Throughput: 281.34 FPS
+
+
+See Also
+####################
+
+* :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
diff --git a/tools/benchmark_tool/README.md b/tools/benchmark_tool/README.md
index 8d48a37ee5032f..db2268bdc00eab 100644
--- a/tools/benchmark_tool/README.md
+++ b/tools/benchmark_tool/README.md
@@ -1,333 +1,393 @@
 # Benchmark Python Tool {#openvino_inference_engine_tools_benchmark_tool_README}
 
+@sphinxdirective
+
 This page demonstrates how to use the Benchmark Python Tool to estimate deep learning inference performance on supported devices.
 
-> **NOTE**: This page describes usage of the Python implementation of the Benchmark Tool. For the C++ implementation, refer to the [Benchmark C++ Tool](../../samples/cpp/benchmark_app/README.md) page. The Python version is recommended for benchmarking models that will be used in Python applications, and the C++ version is recommended for benchmarking models that will be used in C++ applications. Both tools have a similar command interface and backend.
+.. note::
+
+   This page describes usage of the Python implementation of the Benchmark Tool. For the C++ implementation, refer to the :doc:`Benchmark C++ Tool <openvino_inference_engine_samples_benchmark_app_README>` page. The Python version is recommended for benchmarking models that will be used in Python applications, and the C++ version is recommended for benchmarking models that will be used in C++ applications. Both tools have a similar command interface and backend.
 
-## Basic Usage
+Basic Usage
+####################
 
-The Python benchmark_app is automatically installed when you install OpenVINO Developer Tools using [PyPI](../../docs/install_guides/installing-openvino-pip.md). Before running `benchmark_app`, make sure the `openvino_env` virtual environment is activated, and navigate to the directory where your model is located.
+The Python benchmark_app is automatically installed when you install OpenVINO Developer Tools using :doc:`PyPI <openvino_docs_install_guides_installing_openvino_pip>`. Before running ``benchmark_app``, make sure the ``openvino_env`` virtual environment is activated, and navigate to the directory where your model is located.
 
-The benchmarking application works with models in the OpenVINO IR (`model.xml` and `model.bin`) and ONNX (`model.onnx`) formats. Make sure to [convert your models](../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) if necessary.
+The benchmarking application works with models in the OpenVINO IR (``model.xml`` and ``model.bin``) and ONNX (``model.onnx``) formats. 
+Make sure to :doc:`convert your models <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` if necessary.
 
 To run benchmarking with default options on a model, use the following command:
 
-```
-benchmark_app -m model.xml
-```
+.. code-block:: sh
+
+   benchmark_app -m model.xml
+
 
 By default, the application will load the specified model onto the CPU and perform inferencing on batches of randomly-generated data inputs for 60 seconds. As it loads, it prints information about benchmark parameters. When benchmarking is completed, it reports the minimum, average, and maximum inferencing latency and average the throughput.
 
 You may be able to improve benchmark results beyond the default configuration by configuring some of the execution parameters for your model. For example, you can use "throughput" or "latency" performance hints to optimize the runtime for higher FPS or reduced inferencing time. Read on to learn more about the configuration options available with benchmark_app.
 
-## Configuration Options
-The benchmark app provides various options for configuring execution parameters. This section covers key configuration options for easily tuning benchmarking to achieve better performance on your device. A list of all configuration options is given in the [Advanced Usage](#advanced-usage-python-benchmark) section.
+Configuration Options
+#####################
+
+The benchmark app provides various options for configuring execution parameters. This section covers key configuration options for easily tuning benchmarking to achieve better performance on your device. A list of all configuration options is given in the :ref:`Advanced Usage <advanced-usage-python-benchmark>` section.
+
+Performance hints: latency and throughput
++++++++++++++++++++++++++++++++++++++++++
 
-### Performance hints: latency and throughput
 The benchmark app allows users to provide high-level "performance hints" for setting latency-focused or throughput-focused inference modes. This hint causes the runtime to automatically adjust runtime parameters, such as the number of processing streams and inference batch size, to prioritize for reduced latency or high throughput.
 
 The performance hints do not require any device-specific settings and they are completely portable between devices. Parameters are automatically configured based on whichever device is being used. This allows users to easily port applications between hardware targets without having to re-determine the best runtime parameters for the new device.
 
-If not specified, throughput is used as the default. To set the hint explicitly, use `-hint latency` or `-hint throughput` when running benchmark_app:
+If not specified, throughput is used as the default. To set the hint explicitly, use ``-hint latency`` or ``-hint throughput`` when running benchmark_app:
+
+.. code-block:: sh
+
+   benchmark_app -m model.xml -hint latency
+   benchmark_app -m model.xml -hint throughput
+
 
-```
-benchmark_app -m model.xml -hint latency
-benchmark_app -m model.xml -hint throughput
-```
+.. note::
 
-> **NOTE**
-It is up to the user to ensure the environment on which the benchmark is running is optimized for maximum performance.
-Otherwise, different results may occur when using the application in different environment settings (such as power optimization settings, processor overclocking, thermal throttling).
+   It is up to the user to ensure the environment on which the benchmark is running is optimized for maximum performance. Otherwise, different results may occur when using the application in different environment settings (such as power optimization settings, processor overclocking, thermal throttling).
+
+
+Latency
+--------------------
 
-#### Latency
 Latency is the amount of time it takes to process a single inference request. In applications where data needs to be inferenced and acted on as quickly as possible (such as autonomous driving), low latency is desirable. For conventional devices, lower latency is achieved by reducing the amount of parallel processing streams so the system can utilize as many resources as possible to quickly calculate each inference request. However, advanced devices like multi-socket CPUs and modern GPUs are capable of running multiple inference requests while delivering the same latency.
 
-When benchmark_app is run with `-hint latency`, it determines the optimal number of parallel inference requests for minimizing latency while still maximizing the parallelization capabilities of the hardware. It automatically sets the number of processing streams and inference batch size to achieve the best latency.
+When benchmark_app is run with ``-hint latency``, it determines the optimal number of parallel inference requests for minimizing latency while still maximizing the parallelization capabilities of the hardware. It automatically sets the number of processing streams and inference batch size to achieve the best latency.
+
+Throughput
+--------------------
 
-#### Throughput
 Throughput is the amount of data an inferencing pipeline can process at once, and it is usually measured in frames per second (FPS) or inferences per second. In applications where large amounts of data needs to be inferenced simultaneously (such as multi-camera video streams), high throughput is needed. To achieve high throughput, the runtime focuses on fully saturating the device with enough data to process. It utilizes as much memory and as many parallel streams as possible to maximize the amount of data that can be processed simultaneously.
 
-When benchmark_app is run with `-hint throughput`, it maximizes the number of parallel inference requests to utilize all the threads available on the device. On GPU, it automatically sets the inference batch size to fill up the GPU memory available.
+When benchmark_app is run with ``-hint throughput``, it maximizes the number of parallel inference requests to utilize all the threads available on the device. On GPU, it automatically sets the inference batch size to fill up the GPU memory available.
+
+For more information on performance hints, see the :doc:`High-level Performance Hints <openvino_docs_OV_UG_Performance_Hints>` page. For more details on optimal runtime configurations and how they are automatically determined using performance hints, see :doc:`Runtime Inference Optimizations <openvino_docs_deployment_optimization_guide_dldt_optimization_guide>`.
 
-For more information on performance hints, see the [High-level Performance Hints](../../docs/OV_Runtime_UG/performance_hints.md) page. For more details on optimal runtime configurations and how they are automatically determined using performance hints, see [Runtime Inference Optimizations](../../docs/optimization_guide/dldt_deployment_optimization_guide.md).
 
+Device
+++++++++++++++++++++
 
-### Device
-To set which device benchmarking runs on, use the `-d <device>` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and GNA devices. In order to use the GPU, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
+To set which device benchmarking runs on, use the ``-d <device>`` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and GNA devices. In order to use the GPU, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
 
 For example, to run benchmarking on GPU, use:
 
-```
-benchmark_app -m model.xml -d GPU
-```
+.. code-block:: sh
 
-You may also specify "AUTO" as the device, in which case the benchmark_app will automatically select the best device for benchmarking and support it with the CPU at the model loading stage. This may result in increased performance, thus, should be used purposefully. For more information, see the [Automatic device selection](../../docs/OV_Runtime_UG/auto_device_selection.md) page.
+   benchmark_app -m model.xml -d GPU
+
+
+You may also specify "AUTO" as the device, in which case the benchmark_app will automatically select the best device for benchmarking and support it with the CPU at the model loading stage. This may result in increased performance, thus, should be used purposefully. For more information, see the :doc:`Automatic device selection <openvino_docs_OV_UG_supported_plugins_AUTO>` page.
 
 (Note: If the latency or throughput hint is set, it will automatically configure streams and batch sizes for optimal performance based on the specified device.)
 
-### Number of iterations
+Number of iterations
+++++++++++++++++++++
+
 By default, the benchmarking app will run for a predefined duration, repeatedly performing inferencing with the model and measuring the resulting inference speed. There are several options for setting the number of inference iterations:
 
-* Explicitly specify the number of iterations the model runs using the `-niter <number_of_iterations>` option
-* Set how much time the app runs for using the `-t <seconds>` option
+* Explicitly specify the number of iterations the model runs using the ``-niter <number_of_iterations>`` option
+* Set how much time the app runs for using the ``-t <seconds>`` option
 * Set both of them (execution will continue until both conditions are met)
 * If neither -niter nor -t are specified, the app will run for a predefined duration that depends on the device
 
-The more iterations a model runs, the better the statistics will be for determing average latency and throughput.
+The more iterations a model runs, the better the statistics will be for determining average latency and throughput.
+
+Inputs
+++++++++++++++++++++
+
+The benchmark tool runs benchmarking on user-provided input images in ``.jpg``, ``.bmp``, or ``.png`` format. Use ``-i <PATH_TO_INPUT>`` to specify the path to an image, or folder of images. For example, to run benchmarking on an image named ``test1.jpg``, use:
+
+.. code-block:: sh
+
+   ./benchmark_app -m model.xml -i test1.jpg
+
+
+The tool will repeatedly loop through the provided inputs and run inferencing on them for the specified amount of time or number of iterations. If the ``-i`` flag is not used, the tool will automatically generate random data to fit the input shape of the model.
+
+Examples
+++++++++++++++++++++
+
+For more usage examples (and step-by-step instructions on how to set up a model for benchmarking), see the :ref:`Examples of Running the Tool <examples-of-running-the-tool-python>` section.
+
+.. _advanced-usage-python-benchmark:
+
+Advanced Usage
+####################
+
+.. note::
+
+   By default, OpenVINO samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channel order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to When to Reverse Input Channels section of Converting a Model to Intermediate Representation (IR).
+
+
+Per-layer performance and logging
++++++++++++++++++++++++++++++++++
+
+The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you enable statistics dumping by setting the ``-report_type`` parameter to one of the possible values:
+
+* ``no_counters`` report includes configuration options specified, resulting FPS and latency.
+* ``average_counters`` report extends the ``no_counters`` report and additionally includes average PM counters values for each layer from the network.
+* ``detailed_counters`` report extends the ``average_counters`` report and additionally includes per-layer PM counters and latency for each executed infer request.
+
+Depending on the type, the report is stored to ``benchmark_no_counters_report.csv``, ``benchmark_average_counters_report.csv``, or ``benchmark_detailed_counters_report.csv`` file located in the path specified in ``-report_folder``. The application also saves executable graph information serialized to an XML file if you specify a path to it with the ``-exec_graph_path`` parameter.
+
+.. _all-configuration-options-python-benchmark:
 
-### Inputs
-The benchmark tool runs benchmarking on user-provided input images in `.jpg`, `.bmp`, or `.png` format. Use `-i <PATH_TO_INPUT>` to specify the path to an image, or folder of images. For example, to run benchmarking on an image named `test1.jpg`, use:
+All configuration options
++++++++++++++++++++++++++
 
-```
-./benchmark_app -m model.xml -i test1.jpg
-```
+Running the application with the ``-h`` or ``--help`` option yields the following usage message:
 
-The tool will repeatedly loop through the provided inputs and run inferencing on them for the specified amount of time or number of iterations. If the `-i` flag is not used, the tool will automatically generate random data to fit the input shape of the model.
+.. scrollbox::
 
-### Examples
-For more usage examples (and step-by-step instructions on how to set up a model for benchmarking), see the [Examples of Running the Tool](#examples-of-running-the-tool-python) section.
+   .. code-block:: sh
 
-## <a name="advanced-usage-python-benchmark"></a> Advanced Usage
+      [Step 1/11] Parsing and validating input arguments
+      [ INFO ] Parsing input parameters
+      usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE]
+                              [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE]
+                              [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]]
+                              [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]]
+                              [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}]
+                              [-op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-iop INPUT_OUTPUT_PRECISION] [--mean_values [R,G,B]] [--scale_values [R,G,B]]
+                              [-nthreads NUMBER_THREADS] [-pin {YES,NO,NUMA,HYBRID_AWARE}] [-latency_percentile LATENCY_PERCENTILE]
+                              [-report_type {no_counters,average_counters,detailed_counters}] [-report_folder REPORT_FOLDER] [-pc [PERF_COUNTS]]
+                              [-pcsort {no_sort,sort,simple_sort}] [-pcseq [PCSEQ]] [-exec_graph_path EXEC_GRAPH_PATH] [-dump_config DUMP_CONFIG] [-load_config LOAD_CONFIG]
 
-> **NOTE**: By default, OpenVINO samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channel order in the sample or demo application or reconvert your model using the Model Optimizer tool with --reverse_input_channels argument specified. For more information about the argument, refer to When to Reverse Input Channels section of Converting a Model to Intermediate Representation (IR).
+      Options:
+        -h [HELP], --help [HELP]
+                              Show this help message and exit.
 
-### Per-layer performance and logging
-The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you enable statistics dumping by setting the `-report_type` parameter to one of the possible values:
+        -i PATHS_TO_INPUT [PATHS_TO_INPUT ...], --paths_to_input PATHS_TO_INPUT [PATHS_TO_INPUT ...]
+                              Optional. Path to a folder with images and/or binaries or to specific image or binary file.It is also allowed to map files to model inputs:
+                              input_1:file_1/dir1,file_2/dir2,input_4:file_4/dir4 input_2:file_3/dir3 Currently supported data types: bin, npy. If OPENCV is enabled, this
+                              functionalityis extended with the following data types: bmp, dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.
 
-* `no_counters` report includes configuration options specified, resulting FPS and latency.
-* `average_counters` report extends the `no_counters` report and additionally includes average PM counters values for each layer from the network.
-* `detailed_counters` report extends the `average_counters` report and additionally includes per-layer PM counters and latency for each executed infer request.
+        -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
+                              Required. Path to an .xml/.onnx file with a trained model or to a .blob file with a trained compiled model.
 
-Depending on the type, the report is stored to benchmark_no_counters_report.csv, benchmark_average_counters_report.csv, or benchmark_detailed_counters_report.csv file located in the path specified in -report_folder. The application also saves executable graph information serialized to an XML file if you specify a path to it with the -exec_graph_path parameter.
+        -d TARGET_DEVICE, --target_device TARGET_DEVICE
+                              Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use '-d HETERO:<comma
+                              separated devices list>' format to specify HETERO plugin. Use '-d MULTI:<comma separated devices list>' format to specify MULTI plugin. The
+                              application looks for a suitable plugin for the specified device.
 
-### <a name="all-configuration-options-python-benchmark"></a> All configuration options
-Running the application with the `-h` or `--help` option yields the following usage message:
+        -hint {throughput,cumulative_throughput,latency,none}, --perf_hint {throughput,cumulative_throughput,latency,none}
+                              Optional. Performance hint (latency or throughput or cumulative_throughput or none). Performance hint allows the OpenVINO device to select the
+                              right model-specific settings. 'throughput': device performance mode will be set to THROUGHPUT. 'cumulative_throughput': device performance
+                              mode will be set to CUMULATIVE_THROUGHPUT. 'latency': device performance mode will be set to LATENCY. 'none': no device performance mode will
+                              be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
 
-```
-[Step 1/11] Parsing and validating input arguments
-[ INFO ] Parsing input parameters
-usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE]
-                        [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE]
-                        [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]]
-                        [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]]
-                        [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}]
-                        [-op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-iop INPUT_OUTPUT_PRECISION] [--mean_values [R,G,B]] [--scale_values [R,G,B]]
-                        [-nthreads NUMBER_THREADS] [-pin {YES,NO,NUMA,HYBRID_AWARE}] [-latency_percentile LATENCY_PERCENTILE]
-                        [-report_type {no_counters,average_counters,detailed_counters}] [-report_folder REPORT_FOLDER] [-pc [PERF_COUNTS]]
-                        [-pcsort {no_sort,sort,simple_sort}] [-pcseq [PCSEQ]] [-exec_graph_path EXEC_GRAPH_PATH] [-dump_config DUMP_CONFIG] [-load_config LOAD_CONFIG]
+        -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
+                              Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
 
-Options:
-  -h [HELP], --help [HELP]
-                        Show this help message and exit.
+        -t TIME, --time TIME  Optional. Time in seconds to execute topology.
 
-  -i PATHS_TO_INPUT [PATHS_TO_INPUT ...], --paths_to_input PATHS_TO_INPUT [PATHS_TO_INPUT ...]
-                        Optional. Path to a folder with images and/or binaries or to specific image or binary file.It is also allowed to map files to model inputs:
-                        input_1:file_1/dir1,file_2/dir2,input_4:file_4/dir4 input_2:file_3/dir3 Currently supported data types: bin, npy. If OPENCV is enabled, this
-                        functionalityis extended with the following data types: bmp, dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.
+        -api {sync,async}, --api_type {sync,async}
+                              Optional. Enable using sync/async API. Default value is async.
 
-  -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
-                        Required. Path to an .xml/.onnx file with a trained model or to a .blob file with a trained compiled model.
 
-  -d TARGET_DEVICE, --target_device TARGET_DEVICE
-                        Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use '-d HETERO:<comma
-                        separated devices list>' format to specify HETERO plugin. Use '-d MULTI:<comma separated devices list>' format to specify MULTI plugin. The
-                        application looks for a suitable plugin for the specified device.
+      Input shapes:
+        -b BATCH_SIZE, --batch_size BATCH_SIZE
+                              Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation
 
-  -hint {throughput,cumulative_throughput,latency,none}, --perf_hint {throughput,cumulative_throughput,latency,none}
-                        Optional. Performance hint (latency or throughput or cumulative_throughput or none). Performance hint allows the OpenVINO device to select the
-                        right model-specific settings. 'throughput': device performance mode will be set to THROUGHPUT. 'cumulative_throughput': device performance
-                        mode will be set to CUMULATIVE_THROUGHPUT. 'latency': device performance mode will be set to LATENCY. 'none': no device performance mode will
-                        be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
+        -shape SHAPE          Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter
+                              affect model Parameter shape, can be dynamic. For dynamic dimesions use symbol `?`, `-1` or range `low.. up`.
 
-  -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
-                        Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
+        -data_shape DATA_SHAPE
+                              Optional. Optional if model shapes are all static (original ones or set by -shape).Required if at least one input shape is dynamic and input
+                              images are not provided.Set shape for input tensors. For example, "input1[1,3,224,224][1,3,448,448],input2[1,4][1,8]" or
+                              "[1,3,224,224][1,3,448,448] in case of one input size.
 
-  -t TIME, --time TIME  Optional. Time in seconds to execute topology.
+        -layout LAYOUT        Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input
+                              size.
 
-  -api {sync,async}, --api_type {sync,async}
-                        Optional. Enable using sync/async API. Default value is async.
 
+      Advanced options:
+        -extensions EXTENSIONS, --extensions EXTENSIONS
+                              Optional. Path or a comma-separated list of paths to libraries (.so or .dll) with extensions.
 
-Input shapes:
-  -b BATCH_SIZE, --batch_size BATCH_SIZE
-                        Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation
+        -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
+                              Optional. Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
 
-  -shape SHAPE          Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter
-                        affect model Parameter shape, can be dynamic. For dynamic dimesions use symbol `?`, `-1` or range `low.. up`.
+        -cdir CACHE_DIR, --cache_dir CACHE_DIR
+                              Optional. Enable model caching to specified directory
 
-  -data_shape DATA_SHAPE
-                        Optional. Optional if model shapes are all static (original ones or set by -shape).Required if at least one input shape is dynamic and input
-                        images are not provided.Set shape for input tensors. For example, "input1[1,3,224,224][1,3,448,448],input2[1,4][1,8]" or
-                        "[1,3,224,224][1,3,448,448] in case of one input size.
+        -lfile [LOAD_FROM_FILE], --load_from_file [LOAD_FROM_FILE]
+                              Optional. Loads model from file directly without read_model.
 
-  -layout LAYOUT        Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input
-                        size.
+        -nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS
+                              Optional. Number of infer requests. Default value is determined automatically for device.
 
+        -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
+                              Optional. Number of streams to use for inference on the CPU/GPU (for HETERO and MULTI device cases use format
+                              <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device. Please note that
+                              although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very
+                              small models. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams
+                              should be set to 1. See samples README for more details.
 
-Advanced options:
-  -extensions EXTENSIONS, --extensions EXTENSIONS
-                        Optional. Path or a comma-separated list of paths to libraries (.so or .dll) with extensions.
+        -inference_only [INFERENCE_ONLY], --inference_only [INFERENCE_ONLY]
+                              Optional. If true inputs filling only once before measurements (default for static models), else inputs filling is included into loop
+                              measurement (default for dynamic models)
 
-  -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
-                        Optional. Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
+        -infer_precision INFER_PRECISION
+                              Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32'
 
-  -cdir CACHE_DIR, --cache_dir CACHE_DIR
-                        Optional. Enable model caching to specified directory
+        -exec_graph_path EXEC_GRAPH_PATH, --exec_graph_path EXEC_GRAPH_PATH
+                              Optional. Path to a file where to store executable graph information serialized.
 
-  -lfile [LOAD_FROM_FILE], --load_from_file [LOAD_FROM_FILE]
-                        Optional. Loads model from file directly without read_model.
 
-  -nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS
-                        Optional. Number of infer requests. Default value is determined automatically for device.
+      Preprocessing options:
+        -ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}, --input_precision {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}
+                              Optional. Specifies precision for all input layers of the model.
 
-  -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
-                        Optional. Number of streams to use for inference on the CPU/GPU (for HETERO and MULTI device cases use format
-                        <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device. Please note that
-                        although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very
-                        small models. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams
-                        should be set to 1. See samples README for more details.
+        -op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}, --output_precision {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}
+                              Optional. Specifies precision for all output layers of the model.
+
+        -iop INPUT_OUTPUT_PRECISION, --input_output_precision INPUT_OUTPUT_PRECISION
+                              Optional. Specifies precision for input and output layers by name. Example: -iop "input:f16, output:f16". Notice that quotes are required.
+                              Overwrites precision from ip and op options for specified layers.
 
-  -inference_only [INFERENCE_ONLY], --inference_only [INFERENCE_ONLY]
-                        Optional. If true inputs filling only once before measurements (default for static models), else inputs filling is included into loop
-                        measurement (default for dynamic models)
-
-  -infer_precision INFER_PRECISION
-                        Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32'
-
-  -exec_graph_path EXEC_GRAPH_PATH, --exec_graph_path EXEC_GRAPH_PATH
-                        Optional. Path to a file where to store executable graph information serialized.
-
-
-Preprocessing options:
-  -ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}, --input_precision {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}
-                        Optional. Specifies precision for all input layers of the model.
-
-  -op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}, --output_precision {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}
-                        Optional. Specifies precision for all output layers of the model.
-
-  -iop INPUT_OUTPUT_PRECISION, --input_output_precision INPUT_OUTPUT_PRECISION
-                        Optional. Specifies precision for input and output layers by name. Example: -iop "input:f16, output:f16". Notice that quotes are required.
-                        Overwrites precision from ip and op options for specified layers.
-
-  --mean_values [R,G,B]
-                        Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input
-                        of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the
-                        original model was trained. Applying the values affects performance and may cause type conversion
+        --mean_values [R,G,B]
+                              Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input
+                              of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the
+                              original model was trained. Applying the values affects performance and may cause type conversion
 
-  --scale_values [R,G,B]
-                        Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input
-                        of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the
-                        original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied
-                        regardless of the order of options in command line. Applying the values affects performance and may cause type conversion
+        --scale_values [R,G,B]
+                              Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input
+                              of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the
+                              original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied
+                              regardless of the order of options in command line. Applying the values affects performance and may cause type conversion
 
 
-Device-specific performance options:
-  -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
-                        Number of threads to use for inference on the CPU, GNA (including HETERO and MULTI cases).
+      Device-specific performance options:
+        -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
+                              Number of threads to use for inference on the CPU, GNA (including HETERO and MULTI cases).
 
-  -pin {YES,NO,NUMA,HYBRID_AWARE}, --infer_threads_pinning {YES,NO,NUMA,HYBRID_AWARE}
-                        Optional. Enable threads->cores ('YES' which is OpenVINO runtime's default for conventional CPUs), threads->(NUMA)nodes ('NUMA'),
-                        threads->appropriate core types ('HYBRID_AWARE', which is OpenVINO runtime's default for Hybrid CPUs) or completely disable ('NO') CPU threads
-                        pinning for CPU-involved inference.
+        -pin {YES,NO,NUMA,HYBRID_AWARE}, --infer_threads_pinning {YES,NO,NUMA,HYBRID_AWARE}
+                              Optional. Enable threads->cores ('YES' which is OpenVINO runtime's default for conventional CPUs), threads->(NUMA)nodes ('NUMA'),
+                              threads->appropriate core types ('HYBRID_AWARE', which is OpenVINO runtime's default for Hybrid CPUs) or completely disable ('NO') CPU threads
+                              pinning for CPU-involved inference.
 
 
-Statistics dumping options:
-  -latency_percentile LATENCY_PERCENTILE, --latency_percentile LATENCY_PERCENTILE
-                        Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
+      Statistics dumping options:
+        -latency_percentile LATENCY_PERCENTILE, --latency_percentile LATENCY_PERCENTILE
+                              Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
 
-  -report_type {no_counters,average_counters,detailed_counters}, --report_type {no_counters,average_counters,detailed_counters}
-                        Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency.
-                        "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model.
-                        "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed
-                        infer request.
+        -report_type {no_counters,average_counters,detailed_counters}, --report_type {no_counters,average_counters,detailed_counters}
+                              Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency.
+                              "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model.
+                              "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed
+                              infer request.
 
-  -report_folder REPORT_FOLDER, --report_folder REPORT_FOLDER
-                        Optional. Path to a folder where statistics report is stored.
+        -report_folder REPORT_FOLDER, --report_folder REPORT_FOLDER
+                              Optional. Path to a folder where statistics report is stored.
 
-   -json_stats [JSON_STATS], --json_stats [JSON_STATS]
-                        Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
+         -json_stats [JSON_STATS], --json_stats [JSON_STATS]
+                              Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
 
-  -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
-                        Optional. Report performance counters.
-
-  -pcsort {no_sort,sort,simple_sort}, --perf_counts_sort {no_sort,sort,simple_sort}
-                        Optional. Report performance counters and analysis the sort hotpoint opts. sort: Analysis opts time cost, print by hotpoint order no_sort:
-                        Analysis opts time cost, print by normal order simple_sort: Analysis opts time cost, only print EXECUTED opts by normal order
-
-  -pcseq [PCSEQ], --pcseq [PCSEQ]
-                        Optional. Report latencies for each shape in -data_shape sequence.
-
-  -dump_config DUMP_CONFIG
-                        Optional. Path to JSON file to dump OpenVINO parameters, which were set by application.
+        -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
+                              Optional. Report performance counters.
+
+        -pcsort {no_sort,sort,simple_sort}, --perf_counts_sort {no_sort,sort,simple_sort}
+                              Optional. Report performance counters and analysis the sort hotpoint opts. sort: Analysis opts time cost, print by hotpoint order no_sort:
+                              Analysis opts time cost, print by normal order simple_sort: Analysis opts time cost, only print EXECUTED opts by normal order
+
+        -pcseq [PCSEQ], --pcseq [PCSEQ]
+                              Optional. Report latencies for each shape in -data_shape sequence.
+
+        -dump_config DUMP_CONFIG
+                              Optional. Path to JSON file to dump OpenVINO parameters, which were set by application.
+
+        -load_config LOAD_CONFIG
+                              Optional. Path to JSON file to load custom OpenVINO parameters.
+                              Please note, command line parameters have higher priority then parameters from configuration file.
+                              Example 1: a simple JSON file for HW device with primary properties.
+                                           {
+                                              "CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
+                                           }
+                              Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
+                                           {
+                                              "AUTO": {
+                                                   "PERFORMANCE_HINT": "",
+                                                   "PERF_COUNT": "NO",
+                                                   "DEVICE_PROPERTIES": {
+                                                        "CPU": {
+                                                             "INFERENCE_PRECISION_HINT": "f32",
+                                                             "NUM_STREAMS": "3"
+                                                        },
+                                                        "GPU": {
+                                                             "INFERENCE_PRECISION_HINT": "f32",
+                                                             "NUM_STREAMS": "5"
+                                                        }
+                                                   }
+                                              }
+                                           }
 
-  -load_config LOAD_CONFIG
-                        Optional. Path to JSON file to load custom OpenVINO parameters.
-                        Please note, command line parameters have higher priority then parameters from configuration file.
-                        Example 1: a simple JSON file for HW device with primary properties.
-                                     {
-                                        "CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
-                                     }
-                        Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
-                                     {
-                                        "AUTO": {
-                                             "PERFORMANCE_HINT": "",
-                                             "PERF_COUNT": "NO",
-                                             "DEVICE_PROPERTIES": {
-                                                  "CPU": {
-                                                       "INFERENCE_PRECISION_HINT": "f32",
-                                                       "NUM_STREAMS": "3"
-                                                  },
-                                                  "GPU": {
-                                                       "INFERENCE_PRECISION_HINT": "f32",
-                                                       "NUM_STREAMS": "5"
-                                                  }
-                                             }
-                                        }
-                                     }
-```
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-### More information on inputs
+More information on inputs
+++++++++++++++++++++++++++
+
 The benchmark tool supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter, and the inputs will be filled with random values. If a model has only image input(s), provide a folder with images or a path to an image as input. If a model has some specific input(s) (besides images), please prepare a binary file(s) or numpy array(s) that is filled with data of appropriate precision and provide a path to it as input. If a model has mixed input types, the input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
 
-## <a name="examples-of-running-the-tool-python"></a> Examples of Running the Tool
-This section provides step-by-step instructions on how to run the Benchmark Tool with the `asl-recognition` Intel model on CPU or GPU devices. It uses random data as the input.
+.. _examples-of-running-the-tool-python:
 
-> **NOTE**: Internet access is required to execute the following steps successfully. If you have access to the Internet through a proxy server only, please make sure that it is configured in your OS environment.
+Examples of Running the Tool
+############################
+
+This section provides step-by-step instructions on how to run the Benchmark Tool with the ``asl-recognition`` Intel model on CPU or GPU devices. It uses random data as the input.
+
+.. note::
+
+   Internet access is required to execute the following steps successfully. If you have access to the Internet through a proxy server only, please make sure that it is configured in your OS environment.
 
 1. Install OpenVINO Development Tools (if it hasn't been installed already):
-   ```sh
-   pip install openvino-dev
-   ```
 
-2. Download the model using `omz_downloader`, specifying the model name and directory to download the model to:
-   ```sh
-   omz_downloader --name asl-recognition-0004 --precisions FP16 --output_dir omz_models
-   ```
+   .. code-block:: sh
+
+      pip install openvino-dev
+
+
+2. Download the model using ``omz_downloader``, specifying the model name and directory to download the model to:
+
+   .. code-block:: sh
+
+      omz_downloader --name asl-recognition-0004 --precisions FP16 --output_dir omz_models
+
 
 3. Run the tool, specifying the location of the model .xml file, the device to perform inference on, and with a performance hint. The following commands demonstrate examples of how to run the Benchmark Tool in latency mode on CPU and throughput mode on GPU devices:
 
    * On CPU (latency mode):
-   ```sh
-   benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
-   ```
+
+     .. code-block:: sh
+
+        benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
+
 
    * On GPU (throughput mode):
-   ```sh
-   benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d GPU -hint throughput
-   ```
+
+     .. code-block:: sh
+
+        benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d GPU -hint throughput
+
 
 The application outputs the number of executed iterations, total duration of execution, latency, and throughput.
-Additionally, if you set the `-report_type` parameter, the application outputs a statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
+Additionally, if you set the ``-report_type`` parameter, the application outputs a statistics report. If you set the ``-pc`` parameter, the application outputs performance counters. If you set ``-exec_graph_path``, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
 
 An example of the information output when running benchmark_app on CPU in latency mode is shown below:
 
-   ```sh
+.. code-block:: sh
+
    benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
-   ```
 
-   ```sh
+
+.. code-block:: sh
+
    [Step 1/11] Parsing and validating input arguments
    [ INFO ] Parsing input parameters
    [ INFO ] Input command: /home/openvino/tools/benchmark_tool/benchmark_app.py -m omz_models/intel/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -hint latency
@@ -384,22 +444,24 @@ An example of the information output when running benchmark_app on CPU in latenc
    [ INFO ]    Min:        20.78 ms
    [ INFO ]    Max:        33.51 ms
    [ INFO ] Throughput:   89.61 FPS
-   ```
 
-The Benchmark Tool can also be used with dynamically shaped networks to measure expected inference time for various input data shapes. See the -shape and -data_shape argument descriptions in the <a href="#all-configuration-options-python-benchmark">All configuration options</a> section to learn more about using dynamic shapes. Here is a command example for using benchmark_app with dynamic networks and a portion of the resulting output:
 
-   ```sh
+The Benchmark Tool can also be used with dynamically shaped networks to measure expected inference time for various input data shapes. See the ``-shape`` and ``-data_shape`` argument descriptions in the :ref:`All configuration options <all-configuration-options-python-benchmark>` section to learn more about using dynamic shapes. Here is a command example for using benchmark_app with dynamic networks and a portion of the resulting output:
+
+.. code-block:: sh
+
    benchmark_app -m omz_models/intel/asl-recognition-0004/FP16/asl-recognition-0004.xml -d CPU -shape [-1,3,16,224,224] -data_shape [1,3,16,224,224][2,3,16,224,224][4,3,16,224,224] -pcseq
-   ```
 
-   ```sh
+
+.. code-block:: sh
+
    [Step 9/11] Creating infer requests and preparing input tensors
-  [ WARNING ] No input files were given for input 'input'!. This input will be filled with random values!
-  [ INFO ] Fill input 'input' with random values
-  [ INFO ] Defined 3 tensor groups:
-  [ INFO ]         input: {1, 3, 16, 224, 224}
-  [ INFO ]         input: {2, 3, 16, 224, 224}
-  [ INFO ]         input: {4, 3, 16, 224, 224}
+   [ WARNING ] No input files were given for input 'input'!. This input will be filled with random values!
+   [ INFO ] Fill input 'input' with random values
+   [ INFO ] Defined 3 tensor groups:
+   [ INFO ]         input: {1, 3, 16, 224, 224}
+   [ INFO ]         input: {2, 3, 16, 224, 224}
+   [ INFO ]         input: {4, 3, 16, 224, 224}
    [Step 10/11] Measuring performance (Start inference asynchronously, 11 inference requests, limits: 60000 ms duration)
    [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop).
    [ INFO ] First inference took 201.15 ms
@@ -428,9 +490,13 @@ The Benchmark Tool can also be used with dynamically shaped networks to measure
    [ INFO ]    Min:        305.51 ms
    [ INFO ]    Max:        773.55 ms
    [ INFO ] Throughput:   108.82 FPS
-   ```
 
-## See Also
-* [Using OpenVINO Samples](../../docs/OV_Runtime_UG/Samples_Overview.md)
-* [Model Optimizer](../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](@ref omz_tools_downloader)
+
+See Also
+####################
+
+* :doc:`Using OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+* :doc:`Model Downloader <omz_tools_downloader>`
+
+@endsphinxdirective

From 68e067062f1d9d07748375d392d64004e2bcdf95 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 27 Mar 2023 21:47:10 +0200
Subject: [PATCH 118/296] Update Doxyfile.config (#16564)

---
 docs/Doxyfile.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config
index 3dc3c07d6409a7..16cb335679bfaa 100644
--- a/docs/Doxyfile.config
+++ b/docs/Doxyfile.config
@@ -1002,7 +1002,6 @@ EXCLUDE_SYMBOLS        = InferenceEngine::details \
                          ie_api::BlobBuffer \
                          *impl* \
                          *device_name* \
-                         *num_requests* \
                          *exec_net* \
                          *c_config* \
                          *ie_core_impl* \

From aa0df8e5356edd941ba39bab4431c0077597af4c Mon Sep 17 00:00:00 2001
From: Orest Chura <orest.chura@intel.com>
Date: Mon, 27 Mar 2023 12:52:58 -0700
Subject: [PATCH 119/296] [Python][Build] Fix building openvino wheel on
 Windows (#16374)

* Add snippets dependency

* - removed dependency back
- added an INTEL_CPU condition on snippets configuring -> no dependency when configured w/0 CPU

* Disable snippets_ngraph_functions conditionally if inference_engine_snippets are not configured

---------

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 src/common/CMakeLists.txt                         |  5 ++++-
 src/tests/functional/plugin/shared/CMakeLists.txt | 14 +++++++++++---
 src/tests/ngraph_helpers/CMakeLists.txt           |  5 ++++-
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index ce10d767bffa23..45f6863deceb79 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -6,7 +6,10 @@ add_subdirectory(itt)
 add_subdirectory(conditional_compilation)
 add_subdirectory(util)
 
-add_subdirectory(snippets)
+if(ENABLE_INTEL_CPU)
+    add_subdirectory(snippets)
+endif()
+
 add_subdirectory(transformations)
 add_subdirectory(offline_transformations)
 add_subdirectory(low_precision_transformations)
diff --git a/src/tests/functional/plugin/shared/CMakeLists.txt b/src/tests/functional/plugin/shared/CMakeLists.txt
index 7db0704f80b979..aee3c039020e17 100644
--- a/src/tests/functional/plugin/shared/CMakeLists.txt
+++ b/src/tests/functional/plugin/shared/CMakeLists.txt
@@ -8,6 +8,10 @@ set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
 
 set(DEPENDENCIES mock_engine)
 
+list(APPEND LINK_LIBRARIES_PRIVATE
+        openvino::util
+        openvino::runtime::dev)
+
 if(ENABLE_HETERO)
     list(APPEND DEPENDENCIES openvino_hetero_plugin)
 endif()
@@ -33,6 +37,12 @@ else()
     set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/onnx)
 endif()
 
+if (TARGET inference_engine_snippets)
+    list(APPEND LINK_LIBRARIES_PRIVATE snippetsNgraphFunctions)
+else()
+    list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/snippets)
+endif()
+
 addIeTarget(
         NAME ${TARGET_NAME}
         TYPE STATIC
@@ -56,9 +66,7 @@ addIeTarget(
                 lptNgraphFunctions
                 sharedTestClasses
             PRIVATE
-                openvino::util
-                openvino::runtime::dev
-                snippetsNgraphFunctions
+                ${LINK_LIBRARIES_PRIVATE}
         DEPENDENCIES
             ${DEPENDENCIES}
 )
diff --git a/src/tests/ngraph_helpers/CMakeLists.txt b/src/tests/ngraph_helpers/CMakeLists.txt
index 0b2913de33ffe2..fd2beb23711249 100644
--- a/src/tests/ngraph_helpers/CMakeLists.txt
+++ b/src/tests/ngraph_helpers/CMakeLists.txt
@@ -4,4 +4,7 @@
 
 add_subdirectory(ngraph_functions)
 add_subdirectory(lpt_ngraph_functions)
-add_subdirectory(snippets_ngraph_functions)
+
+if(TARGET inference_engine_snippets)
+    add_subdirectory(snippets_ngraph_functions)
+endif()

From 815d4abc03dacff415758dc5fcc6e55c60d33756 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 28 Mar 2023 04:39:26 +0800
Subject: [PATCH 120/296] enable new property ov::hint::use_hyper_threading
 (#16176)

* enable apply_processor_type()

* declare PROCESSOR_TYPE

* enable readProperties

* test case for get_property()

* enable set_property() and test cases

* reduce changes

* fix code style issue

* fix python test case issue

* remove python interface

* move processor type definition out of dev_api

* refine coding

* add dependency

* update header file

* update description

* merge intel_cpu header file

* add inline in-code documentation

* change 'UNDEFINED' to 'DEFAULT'

* remove ProcTypeConfig

* refine change

* refine change

* enable new property use hyper threading

* update description

* resume legacy code

* change to ov::hint namespace

* update including header file

* update C API and Python API

* update description for comments

* update test case for comments

* update function location for comments

* fix typo

* fix typo

* fix code style issue and update test case

* move cpu_map_scheduling into threading folder
---
 .../c/include/openvino/c/ov_property.h        |  7 ++
 src/bindings/c/src/ov_property.cpp            |  1 +
 src/bindings/c/tests/ov_core_test.cpp         | 24 +++++
 .../pyopenvino/core/properties/properties.cpp |  1 +
 .../tests/test_runtime/test_properties.py     | 12 +++
 .../runtime/threading/cpu_map_scheduling.hpp  | 29 ++++++
 .../include/openvino/runtime/properties.hpp   | 16 ++++
 .../src/dev/threading/cpu_map_scheduling.cpp  | 28 ++++++
 src/plugins/intel_cpu/src/config.cpp          | 11 +++
 src/plugins/intel_cpu/src/config.h            |  2 +
 .../intel_cpu/src/cpu_streams_calculation.hpp |  3 +-
 src/plugins/intel_cpu/src/exec_network.cpp    |  4 +
 src/plugins/intel_cpu/src/plugin.cpp          |  6 +-
 .../behavior/ov_plugin/core_integration.cpp   |  4 +
 .../tests/unit/streams_info_table_test.cpp    | 91 +++++++++++++++++++
 .../behavior/ov_plugin/core_integration.hpp   | 18 ++++
 16 files changed, 255 insertions(+), 2 deletions(-)
 create mode 100644 src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
 create mode 100644 src/inference/src/dev/threading/cpu_map_scheduling.cpp

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 54c887435c5cd6..63146c7f48e906 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -119,6 +119,13 @@ ov_property_key_affinity;
 OPENVINO_C_VAR(const char*)
 ov_property_key_inference_num_threads;
 
+/**
+ * @brief Read-write property, it is high-level OpenVINO hint for using hyper threading processors during CPU inference
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_hint_use_hyper_threading;
+
 /**
  * @brief Read-write property, it is high-level OpenVINO Performance Hints
  * @ingroup ov_property_c_api
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 2d6c470ae5df1d..1fe6e9397e2df6 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,6 +23,7 @@ const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
+const char* ov_property_key_hint_use_hyper_threading = "USE_HYPER_THREADING";
 const char* ov_property_key_hint_inference_precision = "INFERENCE_PRECISION_HINT";
 const char* ov_property_key_hint_num_requests = "PERFORMANCE_HINT_NUM_REQUESTS";
 const char* ov_property_key_hint_model_priority = "MODEL_PRIORITY";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 0cb2f29f65e878..8fe3f41d9cc096 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -259,6 +259,22 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &ret));
     EXPECT_STRNE(invalid_mode, ret);
     ov_free(ret);
+
+    const char* key_ht = ov_property_key_hint_use_hyper_threading;
+    const char* val_ht = "YES";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
+    EXPECT_STREQ(val_ht, ret);
+    ov_free(ret);
+
+    const char* invalid_val = "INVALID_VAL";
+    OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, invalid_val));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
+    EXPECT_STRNE(invalid_val, ret);
+    ov_free(ret);
+
     ov_core_free(core);
 }
 
@@ -276,6 +292,14 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     EXPECT_STREQ(affinity, ret);
     ov_free(ret);
 
+    const char* key_ht = ov_property_key_hint_use_hyper_threading;
+    const char* val_ht = "YES";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
+    EXPECT_STREQ(val_ht, ret);
+    ov_free(ret);
+
     ov_core_free(core);
 }
 
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 7c45761700fca9..da9e4e0af513af 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -67,6 +67,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::inference_precision, "inference_precision");
     wrap_property_RW(m_hint, ov::hint::model_priority, "model_priority");
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
+    wrap_property_RW(m_hint, ov::hint::use_hyper_threading, "use_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
     wrap_property_RW(m_hint, ov::hint::model, "model");
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 41558de5aa7fc8..6eecdfb090b805 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -219,6 +219,16 @@ def test_properties_ro(ov_property_ro, expected_value):
             "PERFORMANCE_HINT",
             ((properties.hint.PerformanceMode.UNDEFINED, properties.hint.PerformanceMode.UNDEFINED),),
         ),
+        (
+            properties.hint.use_hyper_threading,
+            "USE_HYPER_THREADING",
+            (
+                (True, True),
+                (False, False),
+                (1, True),
+                (0, False),
+            ),
+        ),
         (
             properties.hint.execution_mode,
             "EXECUTION_MODE_HINT",
@@ -399,6 +409,7 @@ def test_single_property_setting(device):
                 properties.affinity(properties.Affinity.NONE),
                 properties.inference_precision(Type.f32),
                 properties.hint.performance_mode(properties.hint.PerformanceMode.LATENCY),
+                properties.hint.use_hyper_threading(True),
                 properties.hint.num_requests(12),
                 properties.streams.num(5),
             ],
@@ -411,6 +422,7 @@ def test_single_property_setting(device):
             properties.affinity(): properties.Affinity.NONE,
             properties.inference_precision(): Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
+            properties.hint.use_hyper_threading(): True,
             properties.hint.num_requests(): 12,
             properties.streams.num(): 5,
         },
diff --git a/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
new file mode 100644
index 00000000000000..d00929bfe242cd
--- /dev/null
+++ b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header file for CPU map scheduling
+ * @file cpu_map_scheduling.hpp
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "openvino/runtime/properties.hpp"
+
+namespace ov {
+
+/**
+ * @brief      Limit available CPU resource in processors type table according to hyper threading property
+ * @param[in]  input_type indicate value of property use_hyper_threading.
+ * @param[in]  input_changed indicate if value is set by user.
+ * @param[in]  proc_type_table candidate processors available at this time
+ * @return     updated proc_type_table which removed unmatched processors
+ */
+std::vector<std::vector<int>> apply_hyper_threading(bool input_type,
+                                                    const bool input_changed,
+                                                    const std::vector<std::vector<int>> proc_type_table);
+
+}  // namespace ov
\ No newline at end of file
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index d5a1853cb5fe11..5b5673941441b2 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -351,6 +351,22 @@ inline std::istream& operator>>(std::istream& is, PerformanceMode& performance_m
  */
 static constexpr Property<PerformanceMode> performance_mode{"PERFORMANCE_HINT"};
 
+/**
+ * @brief This property allows hyper threading during inference.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to use or not use hyper threading during inference. If user does not explicitly set
+ * value for this property, OpenVINO may choose any desired value based on internal logic.
+ *
+ * The following code is example to use this property.
+ *
+ * @code
+ * ie.set_property(ov::hint::use_hyper_threading(true));
+ * ie.set_property(ov::hint::use_hyper_threading(false));
+ * @endcode
+ */
+static constexpr Property<bool> use_hyper_threading{"USE_HYPER_THREADING"};
+
 /**
  * @brief (Optional) property that backs the (above) Performance Hints
  * by giving additional information on how many inference requests the application will be keeping in flight
diff --git a/src/inference/src/dev/threading/cpu_map_scheduling.cpp b/src/inference/src/dev/threading/cpu_map_scheduling.cpp
new file mode 100644
index 00000000000000..42ed0e33272f7b
--- /dev/null
+++ b/src/inference/src/dev/threading/cpu_map_scheduling.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/runtime/threading/cpu_map_scheduling.hpp"
+
+#include "ie_system_conf.h"
+
+namespace ov {
+
+std::vector<std::vector<int>> apply_hyper_threading(bool input_value,
+                                                    const bool input_changed,
+                                                    const std::vector<std::vector<int>> proc_type_table) {
+    std::vector<std::vector<int>> result_table = proc_type_table;
+
+    if ((proc_type_table[0][HYPER_THREADING_PROC] > 0) &&
+        (((!input_value) && input_changed) || ((!input_changed) && (proc_type_table.size() > 1)))) {
+        for (auto& i : result_table) {
+            i[ALL_PROC] -= i[HYPER_THREADING_PROC];
+            i[HYPER_THREADING_PROC] = 0;
+        }
+        input_value = false;
+    }
+
+    return result_table;
+}
+
+}  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 04c8c6467684f2..be8a40d5626043 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -78,6 +78,17 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             streamExecutorConfig.SetConfig(key, val);
         } else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) {
             perfHintsConfig.SetConfig(key, val);
+        } else if (key == ov::hint::use_hyper_threading.name()) {
+            if (val == PluginConfigParams::YES) {
+                useHyperThreading = true;
+                changedHyperThreading = true;
+            } else if (val == PluginConfigParams::NO) {
+                useHyperThreading = false;
+                changedHyperThreading = true;
+            } else {
+                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::use_hyper_threading.name()
+                           << ". Expected only true/false." << std::endl;
+            }
         } else if (key == PluginConfigParams::KEY_DYN_BATCH_LIMIT) {
             int val_i = -1;
             try {
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 8c399d5189a30f..27f83229002d8b 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -49,6 +49,8 @@ struct Config {
     size_t rtCacheCapacity = 5000ul;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
+    bool useHyperThreading = true;
+    bool changedHyperThreading = false;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
     bool enforceBF16 = true;
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index 53614942bf8cae..e331035567ba1b 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -13,6 +13,7 @@
 
 namespace ov {
 namespace intel_cpu {
+
 /**
  * @brief      Generate streams information table according to processors type table.
  * @param[in]  input_streams is the targeted number of streams set by user via ov::num_streams or hints.
@@ -41,4 +42,4 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table);
 }  // namespace intel_cpu
-}  // namespace ov
+}  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index 0395c7a416eb27..72dc81f58f8c96 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -308,6 +308,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::inference_precision.name()),
             RO_property(ov::hint::performance_mode.name()),
             RO_property(ov::hint::num_requests.name()),
+            RO_property(ov::hint::use_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
         };
     }
@@ -348,6 +349,9 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::use_hyper_threading.name()) {
+        const bool use_ht = config.useHyperThreading;
+        return decltype(ov::hint::use_hyper_threading)::value_type(use_ht);
     } else if (name == ov::hint::num_requests) {
         const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests;
         return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 7aba2c8e0f63bc..b2af5acfea35f0 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -21,7 +21,7 @@
 #include <ie_ngraph_utils.hpp>
 
 #include "performance_heuristics.hpp"
-
+#include "openvino/runtime/properties.hpp"
 #include "weights_cache.hpp"
 #include "utils/denormals.hpp"
 
@@ -514,6 +514,9 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::use_hyper_threading) {
+        const bool ht_value = engConfig.useHyperThreading;
+        return decltype(ov::hint::use_hyper_threading)::value_type(ht_value);
     } else if (name == ov::hint::num_requests) {
         const auto perfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests;
         return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
@@ -602,6 +605,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::inference_precision.name()),
                                                     RW_property(ov::hint::performance_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
+                                                    RW_property(ov::hint::use_hyper_threading.name()),
                                                     RW_property(ov::device::id.name()),
         };
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 3d3c468753027f..9523d3e3467f13 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -48,6 +48,10 @@ INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassGetMetricTest, OVClassGetMetricTest_FULL_DEVICE_NAME,
         ::testing::Values("CPU", "MULTI", "HETERO", "AUTO"));
 
+INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest,
+                         OVClassSetUseHyperThreadingHintConfigTest,
+                         ::testing::Values("CPU"));
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassGetMetricTest, OVClassGetMetricTest_OPTIMIZATION_CAPABILITIES,
         ::testing::Values("CPU"));
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
index 696c6508c479e3..982ba9d187e8d6 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -6,6 +6,7 @@
 #include <ie_system_conf.h>
 
 #include <common_test_utils/test_common.hpp>
+#include <openvino/runtime/threading/cpu_map_scheduling.hpp>
 
 #include "cpu_streams_calculation.hpp"
 
@@ -15,6 +16,96 @@ using namespace ov;
 
 namespace {
 
+struct UseHTTestCase {
+    bool use_ht_value;
+    bool use_ht_changed;
+    std::vector<std::vector<int>> proc_type_table;
+    std::vector<std::vector<int>> result_table;
+};
+
+class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<UseHTTestCase>> {
+public:
+    void SetUp() override {
+        const auto& test_data = std::get<0>(GetParam());
+
+        std::vector<std::vector<int>> test_result_table =
+            ov::apply_hyper_threading(test_data.use_ht_value,
+                                                 test_data.use_ht_changed,
+                                                 test_data.proc_type_table);
+
+        ASSERT_EQ(test_data.result_table, test_result_table);
+    }
+};
+
+UseHTTestCase _2sockets_false = {
+    false,
+    true,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+};
+
+UseHTTestCase _2sockets_true = {
+    true,
+    true,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+};
+
+UseHTTestCase _2sockets_default_1 = {
+    false,
+    false,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+};
+
+UseHTTestCase _2sockets_default_2 = {
+    true,
+    false,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
+};
+
+UseHTTestCase _1sockets_false = {
+    false,
+    true,
+    {{20, 6, 8, 6}},
+    {{14, 6, 8, 0}},
+};
+
+UseHTTestCase _1sockets_true = {
+    true,
+    true,
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+};
+
+UseHTTestCase _1sockets_default_1 = {
+    false,
+    false,
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+};
+
+UseHTTestCase _1sockets_default_2 = {
+    true,
+    false,
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+};
+
+TEST_P(UseHTTests, UseHT) {}
+
+INSTANTIATE_TEST_SUITE_P(UseHTTable,
+                         UseHTTests,
+                         testing::Values(_2sockets_false,
+                                         _2sockets_true,
+                                         _2sockets_default_1,
+                                         _2sockets_default_2,
+                                         _1sockets_false,
+                                         _1sockets_true,
+                                         _1sockets_default_1,
+                                         _1sockets_default_2));
+
 struct StreamsCalculationTestCase {
     int input_streams;
     int input_threads;
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index e5421ea7b8c977..fa9b3f22fcbac9 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -121,6 +121,7 @@ using OVClassLoadNetworkTest = OVClassQueryNetworkTest;
 using OVClassSetGlobalConfigTest = OVClassBaseTestP;
 using OVClassSetModelPriorityConfigTest = OVClassBaseTestP;
 using OVClassSetExecutionModeHintConfigTest = OVClassBaseTestP;
+using OVClassSetUseHyperThreadingHintConfigTest = OVClassBaseTestP;
 using OVClassSetTBBForceTerminatePropertyTest = OVClassBaseTestP;
 using OVClassSetLogLevelConfigTest = OVClassBaseTestP;
 using OVClassSpecificDeviceTestSetConfig = OVClassBaseTestP;
@@ -611,6 +612,23 @@ TEST_P(OVClassSetExecutionModeHintConfigTest, SetConfigNoThrow) {
     ASSERT_EQ(ov::hint::ExecutionMode::PERFORMANCE, ie.get_property(target_device, ov::hint::execution_mode));
 }
 
+TEST_P(OVClassSetUseHyperThreadingHintConfigTest, SetConfigNoThrow) {
+    ov::Core ie = createCoreWithTemplate();
+
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::use_hyper_threading);
+
+    bool defaultMode{};
+    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::use_hyper_threading));
+    (void)defaultMode;
+
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_hyper_threading));
+
+    ie.set_property(target_device, ov::hint::use_hyper_threading(false));
+    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::use_hyper_threading));
+    ie.set_property(target_device, ov::hint::use_hyper_threading(true));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_hyper_threading));
+}
+
 TEST_P(OVClassSetDevicePriorityConfigTest, SetConfigAndCheckGetConfigNoThrow) {
     ov::Core ie = createCoreWithTemplate();
     std::string devicePriority;

From 167bf7e16abead4128bba7c080e6acf1cee8ae05 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 28 Mar 2023 00:43:30 +0400
Subject: [PATCH 121/296] Added test to check that layout can be created from
 serialized format (#16575)

---
 src/core/tests/layout.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/core/tests/layout.cpp b/src/core/tests/layout.cpp
index c6c2627fe9a3ce..1214b6503ed0ad 100644
--- a/src/core/tests/layout.cpp
+++ b/src/core/tests/layout.cpp
@@ -195,3 +195,11 @@ TEST(layout, attribute_adapter) {
     at.set("NHCW");
     EXPECT_EQ(l, l2);
 }
+
+TEST(layout, compare_string) {
+    Layout l = "HWC";
+    EXPECT_EQ("[H,W,C]", l.to_string());
+    Layout l2 = l.to_string().c_str();
+    EXPECT_EQ(l2, l);
+    EXPECT_EQ("[H,W,C]", l2.to_string());
+}

From 5dff012233d1aa00603cc7890f7eee94f63d64e4 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 28 Mar 2023 00:45:40 +0400
Subject: [PATCH 122/296] Fixed Warnings in reference implementations (#16559)

* Fixed Warnings in reference implementations

* Removed suppression from shape_inference
---
 src/core/reference/CMakeLists.txt                           | 4 ----
 src/core/reference/src/runtime/reference/einsum.cpp         | 6 +++---
 .../reference/src/runtime/reference/generate_proposal.cpp   | 6 +++---
 src/core/shape_inference/CMakeLists.txt                     | 4 ----
 4 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt
index 6f2f0c8b834ee1..60ee5956ac9d94 100644
--- a/src/core/reference/CMakeLists.txt
+++ b/src/core/reference/CMakeLists.txt
@@ -4,10 +4,6 @@
 
 set(TARGET_NAME "ngraph_reference")
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4267)
-endif()
-
 file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
 file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp)
 
diff --git a/src/core/reference/src/runtime/reference/einsum.cpp b/src/core/reference/src/runtime/reference/einsum.cpp
index e6b19719708e1a..8d168c91a83177 100644
--- a/src/core/reference/src/runtime/reference/einsum.cpp
+++ b/src/core/reference/src/runtime/reference/einsum.cpp
@@ -124,16 +124,16 @@ std::unordered_map<std::string, ov::TensorLabel> compute_label_dim_map(const Ran
         if (label == ellipsis) {
             ov::TensorLabel label_dims;
             for (size_t ind = 0; ind < num_broadcasted_dims; ++ind) {
-                label_dims.push_back(current_dim + ind);
+                label_dims.push_back(static_cast<ov::label_t>(current_dim + ind));
             }
             resulted_map[label] = label_dims;
             current_dim += num_broadcasted_dims;
         } else if (resulted_map.find(label) != resulted_map.end()) {
-            resulted_map[label].push_back(current_dim);
+            resulted_map[label].push_back(static_cast<ov::label_t>(current_dim));
             ++current_dim;
         } else {
             ov::TensorLabel label_dims;
-            label_dims.push_back(current_dim);
+            label_dims.push_back(static_cast<ov::label_t>(current_dim));
             resulted_map[label] = label_dims;
             ++current_dim;
         }
diff --git a/src/core/reference/src/runtime/reference/generate_proposal.cpp b/src/core/reference/src/runtime/reference/generate_proposal.cpp
index 3c6eb774520779..3aa82a11eba380 100644
--- a/src/core/reference/src/runtime/reference/generate_proposal.cpp
+++ b/src/core/reference/src/runtime/reference/generate_proposal.cpp
@@ -325,11 +325,11 @@ void generate_proposals(const std::vector<float>& im_info,
                         std::vector<float>& output_scores,
                         std::vector<int64_t>& num_rois) {
     const auto im_info_size =
-        std::accumulate(im_info_shape.begin() + 1, im_info_shape.end(), 1, std::multiplies<size_t>());
+        std::accumulate(im_info_shape.begin() + 1, im_info_shape.end(), size_t(1), std::multiplies<size_t>());
     const auto deltas_size =
-        std::accumulate(deltas_shape.begin() + 1, deltas_shape.end(), 1, std::multiplies<size_t>());
+        std::accumulate(deltas_shape.begin() + 1, deltas_shape.end(), size_t(1), std::multiplies<size_t>());
     const auto scores_size =
-        std::accumulate(scores_shape.begin() + 1, scores_shape.end(), 1, std::multiplies<size_t>());
+        std::accumulate(scores_shape.begin() + 1, scores_shape.end(), size_t(1), std::multiplies<size_t>());
     for (size_t i = 0; i < im_info_shape[0]; i++) {
         std::vector<float> cur_im_info(im_info.begin() + i * im_info_size,
                                        im_info.begin() + i * im_info_size + im_info_size);
diff --git a/src/core/shape_inference/CMakeLists.txt b/src/core/shape_inference/CMakeLists.txt
index c3bd1a0f8ae53c..958833ccaa902d 100644
--- a/src/core/shape_inference/CMakeLists.txt
+++ b/src/core/shape_inference/CMakeLists.txt
@@ -4,10 +4,6 @@
 
 set(TARGET_NAME "ov_shape_inference")
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    ie_add_compiler_flags(/wd4018)
-endif()
-
 file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
 file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp)
 

From d06a22f4e41c9e36cfb5899981fa5d246d5f10ec Mon Sep 17 00:00:00 2001
From: hyunback kim <hyunback.kim@intel.com>
Date: Tue, 28 Mar 2023 14:49:49 +0900
Subject: [PATCH 123/296] [GPU] Support FC+eltwise fusion in fp16 for OneDNN
 (#16303)

* [GPU] Support FC+eltwise fusion in fp16

Signed-off-by: hyunback <hyunback.kim@intel.com>
---
 .../prepare_primitive_fusing.cpp              |  12 +-
 .../graph/graph_optimizer/reorder_inputs.cpp  |  23 ++++
 .../fusions/fully_connected_fusion_test.cpp   | 110 +++++++++++++++++-
 3 files changed, 138 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index 938b599e85ca5d..457c16a6fe5745 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -558,10 +558,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
             return false;
         };
 
-        auto fc_supports_fusings = [](fully_connected_node& node) -> bool {
-            auto in_dt = node.get_dependency(0).get_output_layout().data_type;
-
-            return data_type_traits::is_i8_u8(in_dt);
+        auto fc_supports_fusings = [&](fully_connected_node& node) -> bool {
+            if (_lo.get_optimization_attributes().use_onednn_impls &&
+                _lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) {
+                return true;
+            } else {
+                auto in_dt = node.get_dependency(0).get_output_layout().data_type;
+                return data_type_traits::is_i8_u8(in_dt);
+            }
         };
 
         auto gemm_supports_fusings = [](gemm_node& node) -> bool {
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
index 3f05f294294b32..66c87f99816576 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -1008,6 +1008,29 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
                     const auto prim_id = "broadcast:" + data.id() + "_broadcasted" + std::to_string(idx++);
                     auto broadcast_prim = std::make_shared<cldnn::broadcast>(prim_id, cldnn::input_info(data.id()), gemm_layout.get_shape(), ov::AxisSet{});
 
+                    auto& broadcast_node = p.get_or_create(broadcast_prim);
+                    p.add_intermediate(broadcast_node, *node, fused_prim.dep_start_idx, true);
+                    broadcast_node.recalc_output_layouts(false);
+                }
+            }
+        } else if (node->is_type<fully_connected>() && node->get_preferred_impl_type() == impl_types::onednn) {
+            for (const auto& fused_prim : node->get_fused_primitives()) {
+                if (fused_prim.is_type<eltwise>() &&
+                    one_of(fused_prim.typed_desc<eltwise>()->mode, {eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::prod})) {
+                    auto fc_layout = node->get_output_layout();
+                    auto& data = node->get_dependency(fused_prim.dep_start_idx);
+                    auto data_layout = data.get_output_layout();
+
+                    if ((fc_layout.batch() == 1 || fc_layout.feature() == 1) ||
+                        (data_layout.batch() == 1 && data_layout.feature() == 1) ||
+                        (fc_layout.count() == data_layout.count())) {
+                        continue;
+                    }
+
+                    static size_t idx = 0;
+                    const auto prim_id = "broadcast:" + data.id() + "_broadcasted" + std::to_string(idx++);
+                    auto broadcast_prim = std::make_shared<cldnn::broadcast>(prim_id, cldnn::input_info(data.id()), fc_layout.get_shape(), ov::AxisSet{});
+
                     auto& broadcast_node = p.get_or_create(broadcast_prim);
                     p.add_intermediate(broadcast_node, *node, fused_prim.dep_start_idx, true);
                     broadcast_node.recalc_output_layouts(false);
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index c0c810043f3670..1c758c1988c80f 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -30,6 +30,7 @@ struct fully_connected_test_params {
     format default_format;
     size_t expected_fused_primitives;
     size_t expected_not_fused_primitives;
+    std::string ocl_kernel_name;            // for onednn test
 };
 
 class FullyConnectedFusingTest : public ::BaseFusingTest<fully_connected_test_params> {
@@ -85,14 +86,23 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
         auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p));
 
         auto impl_forcing = cfg_fused.get_property(ov::intel_gpu::force_implementations);
-
         auto forcing_format = p.input_format;
         for (auto& forcing : impl_forcing)
             if (forcing.first == "fc_prim")
                 forcing_format = forcing.second.output_format;
 
-        ov::intel_gpu::ImplementationDesc conv_impl = { forcing_format, "", impl_types::onednn };
-        cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", conv_impl } }));
+        ov::intel_gpu::ImplementationDesc fc_impl = { forcing_format, "", impl_types::onednn };
+        cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_impl } }));
+
+        if (!p.ocl_kernel_name.empty()) {
+            auto ocl_impl_forcing = cfg_not_fused.get_property(ov::intel_gpu::force_implementations);
+            auto ocl_forcing_format = p.input_format;
+            for (auto& forcing : ocl_impl_forcing)
+                if (forcing.first == "fc_prim")
+                    ocl_forcing_format = forcing.second.output_format;
+            ov::intel_gpu::ImplementationDesc fc_ocl_impl = { ocl_forcing_format, p.ocl_kernel_name /*fully_connected_gpu_bfyx_ref*/};
+            cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_ocl_impl } }));
+        }
         network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
         network network_fused(this->engine, this->topology_fused, cfg_fused);
         network_fused.set_input_data("input", input_prim);
@@ -154,6 +164,16 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
 #define CASE_FC_U8S8_3D_3 { 2, 3, 1 }, { 2, 3, 15 }, { 15, 1, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_FC_U8S8_3D_4 { 1, 512, 1024 }, { 1, 384, 1024 }, { 1024, 1024, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 
+#define CASE_FC_FP16_1 { 1, 3 }, { 1, 4 }, { 4, 3 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_2 { 2, 3 }, { 2, 4 }, { 4, 3 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_3 { 2, 32 }, { 2, 16 }, { 16, 32 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_4 { 128, 76 }, { 128, 768 }, { 768, 76 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_5 { 1, 128, 76 }, { 1, 128, 768 }, { 1, 768, 76 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_6 { 2, 1, 76 }, { 2, 1, 768 }, { 768, 76, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_7 { 2, 128, 76 }, { 2, 128, 768 }, { 768, 76, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_3D_1 { 2, 32, 3 }, { 2, 32, 16 }, { 16, 3, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+#define CASE_FC_FP16_3D_2 { 1, 1, 3 }, { 1, 1, 32 }, { 32, 3, 1 }, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f32, format::bfyx
+
 /* ----------------------------------------------------------------------------------------------------- */
 /* ---------------------------------------- FC cases --------------------------------------------------- */
 /* ----------------------------------------------------------------------------------------------------- */
@@ -429,4 +449,88 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_inputs_fused_fp32_sum, ::testing::
     // fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 4 },
     fully_connected_test_params{ CASE_FC_U8S8_3D_4, 2, 4 },
 }));
+
+
+class fc_fp16_eltwise_add : public FullyConnectedFusingTestOneDNN {};
+TEST_P(fc_fp16_eltwise_add, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-2f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_add, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
+    // fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bs_f_bsv16_b1"}, // TODO check a failure in fully_connected_gpu_bs_f_bsv16_b1 + eltwise in iGPU
+    // fully_connected_test_params{ CASE_FC_FP16_3D_3, 2, 3, "fully_connected_gpu_bfyx_ref"},   // TODO check onednn failure
+    fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_4, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_5, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_6, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_7, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+}));
+
+class fc_fp16_eltwise_sub : public FullyConnectedFusingTestOneDNN {};
+TEST_P(fc_fp16_eltwise_sub, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sub),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-1f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_sub, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
+    fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+}));
+
+class fc_fp16_eltwise_prod : public FullyConnectedFusingTestOneDNN {};
+TEST_P(fc_fp16_eltwise_prod, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-1f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_prod, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
+    fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_3D_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
+}));
+
+
 #endif

From 906939a1f18209892eb60bc56d37c2a901f37628 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 28 Mar 2023 10:03:51 +0400
Subject: [PATCH 124/296] [GPU] Fixed invalid is_dynamic flag value for scalar
 inputs (#16565)

---
 .../intel_gpu/src/graph/program_node.cpp      |  4 +-
 .../tests/module_tests/network_test.cpp       | 62 +++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 src/plugins/intel_gpu/tests/module_tests/network_test.cpp

diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 70cc56d4420f0a..0cb388f54de723 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -359,11 +359,11 @@ bool program_node::is_dynamic() {
 }
 
 bool program_node::is_dynamic_output_layout(size_t idx) const {
-    return (output_layouts[idx].is_dynamic()) ||  (output_layouts[idx].get_partial_shape().size() == 0);
+    return output_layouts[idx].is_dynamic();
 }
 
 bool program_node::is_dynamic_output_layout(size_t idx) {
-    return (output_layouts[idx].is_dynamic()) ||  (output_layouts[idx].get_partial_shape().size() == 0);
+    return output_layouts[idx].is_dynamic();
 }
 
 bool program_node::has_padded_dependency() {
diff --git a/src/plugins/intel_gpu/tests/module_tests/network_test.cpp b/src/plugins/intel_gpu/tests/module_tests/network_test.cpp
new file mode 100644
index 00000000000000..1180b03a8af180
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/module_tests/network_test.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include "intel_gpu/graph/network.hpp"
+#include "intel_gpu/primitives/input_layout.hpp"
+#include "intel_gpu/primitives/data.hpp"
+#include "intel_gpu/primitives/broadcast.hpp"
+#include "intel_gpu/primitives/concatenation.hpp"
+
+#include <memory>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(network_test, model_with_scalar_input_is_not_dynamic) {
+    auto& engine = get_test_engine();
+    ov::PartialShape input_shape = {};
+    layout in_layout{input_shape, data_types::f32, format::bfyx};
+
+    topology topology;
+    topology.add(input_layout("input", in_layout));
+    topology.add(broadcast("output", input_info("input"), {1, 2}, ov::AxisSet{}));
+
+    network net(engine, topology);
+
+    ASSERT_FALSE(net.is_dynamic());
+}
+
+TEST(network_test, model_with_empty_input_is_not_dynamic) {
+    auto& engine = get_test_engine();
+    ov::PartialShape input_shape = {1, 0};
+    layout in_layout{input_shape, data_types::f32, format::bfyx};
+    auto const_mem = engine.allocate_memory({{1, 2}, data_types::f32, format::bfyx});
+
+    topology topology;
+    topology.add(input_layout("input0", in_layout));
+    topology.add(data("input1", const_mem));
+    topology.add(concatenation("output", { input_info("input0"), input_info("input1") }, 1));
+
+    network net(engine, topology, {ov::intel_gpu::allow_new_shape_infer(true)});
+
+    ASSERT_FALSE(net.is_dynamic());
+}
+
+TEST(network_test, model_with_dynamic_input_is_dynamic) {
+    auto& engine = get_test_engine();
+    ov::PartialShape input_shape = {1, -1};
+    layout in_layout{input_shape, data_types::f32, format::bfyx};
+    auto const_mem = engine.allocate_memory({{1, 2}, data_types::f32, format::bfyx});
+
+    topology topology;
+    topology.add(input_layout("input0", in_layout));
+    topology.add(data("input1", const_mem));
+    topology.add(concatenation("output", { input_info("input0"), input_info("input1") }, 1));
+
+    network net(engine, topology, {ov::intel_gpu::allow_new_shape_infer(true)});
+
+    ASSERT_TRUE(net.is_dynamic());
+}

From a726f0ae38d10e4a93f14dcd7cc819df31230535 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 28 Mar 2023 14:04:30 +0800
Subject: [PATCH 125/296] Enable new property ov::hint::scheduling_core_type
 (#16106)

* enable apply_processor_type()

* declare PROCESSOR_TYPE

* enable readProperties

* test case for get_property()

* enable set_property() and test cases

* reduce changes

* fix code style issue

* fix python test case issue

* remove python interface

* move processor type definition out of dev_api

* refine coding

* add dependency

* update header file

* update description

* merge intel_cpu header file

* add inline in-code documentation

* change 'UNDEFINED' to 'DEFAULT'

* remove ProcTypeConfig

* refine change

* refine change

* refine process_type to scheduling_core_type

* refine description

* fix code style issue

* change to ov::hint::scheduling_core_type

* fix code style issue

* fix code style issue

* fix python issue

* fix python issue

* fix python issue

* fix python issue

* change core_type_cfg to ov::hint::SchedulingCoreType

* update test case for comments

* update test case for comments

* add default for comments

* update code style

* update for comments

* update for comments

* fix typo

* move cpu_map_scheduling into threading folder

* update for merge conflict

* update for code style
---
 .../c/include/openvino/c/ov_property.h        |  7 ++
 src/bindings/c/src/ov_property.cpp            |  1 +
 src/bindings/c/tests/ov_core_test.cpp         | 23 +++++-
 .../pyopenvino/core/properties/properties.cpp |  6 ++
 .../python/src/pyopenvino/utils/utils.cpp     |  4 ++
 .../tests/test_runtime/test_properties.py     | 16 +++++
 .../runtime/threading/cpu_map_scheduling.hpp  | 11 ++-
 .../include/openvino/runtime/properties.hpp   | 56 +++++++++++++++
 .../src/dev/threading/cpu_map_scheduling.cpp  | 34 ++++++++-
 src/plugins/intel_cpu/src/config.cpp          | 12 ++++
 src/plugins/intel_cpu/src/config.h            |  2 +
 .../intel_cpu/src/cpu_streams_calculation.cpp |  2 +-
 .../intel_cpu/src/cpu_streams_calculation.hpp |  1 +
 src/plugins/intel_cpu/src/exec_network.cpp    |  4 ++
 src/plugins/intel_cpu/src/plugin.cpp          |  4 ++
 .../behavior/ov_plugin/core_integration.cpp   |  4 ++
 .../tests/unit/streams_info_table_test.cpp    | 70 ++++++++++++++++++-
 .../behavior/ov_plugin/core_integration.hpp   | 20 ++++++
 18 files changed, 270 insertions(+), 7 deletions(-)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 63146c7f48e906..743233df8fec80 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -133,6 +133,13 @@ ov_property_key_hint_use_hyper_threading;
 OPENVINO_C_VAR(const char*)
 ov_property_key_hint_performance_mode;
 
+/**
+ * @brief Read-write property, it is high-level OpenVINO Hints for the type of CPU core used during inference
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_hint_scheduling_core_type;
+
 /**
  * @brief Read-write property<ov_element_type_e> to set the hint for device to use specified precision for inference.
  * @ingroup ov_property_c_api
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 1fe6e9397e2df6..1506d94c1f1e08 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,6 +23,7 @@ const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
+const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
 const char* ov_property_key_hint_use_hyper_threading = "USE_HYPER_THREADING";
 const char* ov_property_key_hint_inference_precision = "INFERENCE_PRECISION_HINT";
 const char* ov_property_key_hint_num_requests = "PERFORMANCE_HINT_NUM_REQUESTS";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 8fe3f41d9cc096..20e116cd7b79bb 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -260,6 +260,19 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STRNE(invalid_mode, ret);
     ov_free(ret);
 
+    const char* key_type = ov_property_key_hint_scheduling_core_type;
+    const char* val_type = "PCORE_ONLY";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
+    EXPECT_STREQ(val_type, ret);
+    ov_free(ret);
+
+    const char* invalid_val = "INVALID_VAL";
+    OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
+
     const char* key_ht = ov_property_key_hint_use_hyper_threading;
     const char* val_ht = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
@@ -268,10 +281,10 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_ht, ret);
     ov_free(ret);
 
-    const char* invalid_val = "INVALID_VAL";
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
+
     EXPECT_STRNE(invalid_val, ret);
     ov_free(ret);
 
@@ -292,12 +305,20 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     EXPECT_STREQ(affinity, ret);
     ov_free(ret);
 
+    const char* key_type = ov_property_key_hint_scheduling_core_type;
+    const char* val_type = "PCORE_ONLY";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
+    EXPECT_STREQ(val_type, ret);
+
     const char* key_ht = ov_property_key_hint_use_hyper_threading;
     const char* val_ht = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
     EXPECT_STREQ(val_ht, ret);
+
     ov_free(ret);
 
     ov_core_free(core);
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index da9e4e0af513af..3bc97410508a89 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -58,6 +58,11 @@ void regmodule_properties(py::module m) {
         .value("THROUGHPUT", ov::hint::PerformanceMode::THROUGHPUT)
         .value("CUMULATIVE_THROUGHPUT", ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT);
 
+    py::enum_<ov::hint::SchedulingCoreType>(m_hint, "SchedulingCoreType", py::arithmetic())
+        .value("ANY_CORE", ov::hint::SchedulingCoreType::ANY_CORE)
+        .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY)
+        .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
+
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("UNDEFINED", ov::hint::ExecutionMode::UNDEFINED)
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
@@ -67,6 +72,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::inference_precision, "inference_precision");
     wrap_property_RW(m_hint, ov::hint::model_priority, "model_priority");
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
+    wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
     wrap_property_RW(m_hint, ov::hint::use_hyper_threading, "use_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index 12f08410a67271..297196a7197e5d 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -171,6 +171,8 @@ py::object from_ov_any(const ov::Any& any) {
         return py::cast(any.as<ov::hint::Priority>());
     } else if (any.is<ov::hint::PerformanceMode>()) {
         return py::cast(any.as<ov::hint::PerformanceMode>());
+    } else if (any.is<ov::hint::SchedulingCoreType>()) {
+        return py::cast(any.as<ov::hint::SchedulingCoreType>());
     } else if (any.is<ov::hint::ExecutionMode>()) {
         return py::cast(any.as<ov::hint::ExecutionMode>());
     } else if (any.is<ov::log::Level>()) {
@@ -336,6 +338,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::hint::Priority>(py_obj);
     } else if (py::isinstance<ov::hint::PerformanceMode>(py_obj)) {
         return py::cast<ov::hint::PerformanceMode>(py_obj);
+    } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
+        return py::cast<ov::hint::SchedulingCoreType>(py_obj);
     } else if (py::isinstance<ov::log::Level>(py_obj)) {
         return py::cast<ov::log::Level>(py_obj);
     } else if (py::isinstance<ov::device::Type>(py_obj)) {
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 6eecdfb090b805..f525ed23e1175c 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -60,6 +60,14 @@ def test_properties_rw_base():
                 (properties.hint.PerformanceMode.CUMULATIVE_THROUGHPUT, "PerformanceMode.CUMULATIVE_THROUGHPUT", 3),
             ),
         ),
+        (
+            properties.hint.SchedulingCoreType,
+            (
+                (properties.hint.SchedulingCoreType.ANY_CORE, "SchedulingCoreType.ANY_CORE", 0),
+                (properties.hint.SchedulingCoreType.PCORE_ONLY, "SchedulingCoreType.PCORE_ONLY", 1),
+                (properties.hint.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2),
+            ),
+        ),
         (
             properties.hint.ExecutionMode,
             (
@@ -219,6 +227,11 @@ def test_properties_ro(ov_property_ro, expected_value):
             "PERFORMANCE_HINT",
             ((properties.hint.PerformanceMode.UNDEFINED, properties.hint.PerformanceMode.UNDEFINED),),
         ),
+        (
+            properties.hint.scheduling_core_type,
+            "SCHEDULING_CORE_TYPE",
+            ((properties.hint.SchedulingCoreType.PCORE_ONLY, properties.hint.SchedulingCoreType.PCORE_ONLY),),
+        ),
         (
             properties.hint.use_hyper_threading,
             "USE_HYPER_THREADING",
@@ -409,6 +422,7 @@ def test_single_property_setting(device):
                 properties.affinity(properties.Affinity.NONE),
                 properties.inference_precision(Type.f32),
                 properties.hint.performance_mode(properties.hint.PerformanceMode.LATENCY),
+                properties.hint.scheduling_core_type(properties.hint.SchedulingCoreType.PCORE_ONLY),
                 properties.hint.use_hyper_threading(True),
                 properties.hint.num_requests(12),
                 properties.streams.num(5),
@@ -422,6 +436,7 @@ def test_single_property_setting(device):
             properties.affinity(): properties.Affinity.NONE,
             properties.inference_precision(): Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
+            properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY,
             properties.hint.use_hyper_threading(): True,
             properties.hint.num_requests(): 12,
             properties.streams.num(): 5,
@@ -434,6 +449,7 @@ def test_single_property_setting(device):
             properties.affinity(): "NONE",
             "INFERENCE_PRECISION_HINT": Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
+            properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY,
             properties.hint.num_requests(): 12,
             "NUM_STREAMS": properties.streams.Num(5),
         },
diff --git a/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
index d00929bfe242cd..db13f57fe7698e 100644
--- a/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
@@ -15,6 +15,15 @@
 
 namespace ov {
 
+/**
+ * @brief      Limit available CPU resource in processors type table according to scheduling core type property
+ * @param[in]  input_type input value of core type property.
+ * @param[in]  proc_type_table candidate processors available at this time
+ * @return     updated proc_type_table which removed unmatched processors
+ */
+std::vector<std::vector<int>> apply_scheduling_core_type(const ov::hint::SchedulingCoreType input_type,
+                                                         const std::vector<std::vector<int>>& proc_type_table);
+
 /**
  * @brief      Limit available CPU resource in processors type table according to hyper threading property
  * @param[in]  input_type indicate value of property use_hyper_threading.
@@ -24,6 +33,6 @@ namespace ov {
  */
 std::vector<std::vector<int>> apply_hyper_threading(bool input_type,
                                                     const bool input_changed,
-                                                    const std::vector<std::vector<int>> proc_type_table);
+                                                    const std::vector<std::vector<int>>& proc_type_table);
 
 }  // namespace ov
\ No newline at end of file
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 5b5673941441b2..bff6714cbd549f 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -351,6 +351,62 @@ inline std::istream& operator>>(std::istream& is, PerformanceMode& performance_m
  */
 static constexpr Property<PerformanceMode> performance_mode{"PERFORMANCE_HINT"};
 
+/**
+ * @enum       SchedulingCoreType
+ * @brief      This enum contains definition of core type can be used for CPU tasks on different devices.
+ */
+enum class SchedulingCoreType {
+    ANY_CORE = 0,    //!<  Any processors can be used.
+    PCORE_ONLY = 1,  //!<  Only processors of performance-cores can be used.
+    ECORE_ONLY = 2,  //!<  Only processors of efficient-cores can be used.
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const SchedulingCoreType& core_type) {
+    switch (core_type) {
+    case SchedulingCoreType::ANY_CORE:
+        return os << "ANY_CORE";
+    case SchedulingCoreType::PCORE_ONLY:
+        return os << "PCORE_ONLY";
+    case SchedulingCoreType::ECORE_ONLY:
+        return os << "ECORE_ONLY";
+    default:
+        throw ov::Exception{"Unsupported core type!"};
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) {
+    std::string str;
+    is >> str;
+    if (str == "ANY_CORE") {
+        core_type = SchedulingCoreType::ANY_CORE;
+    } else if (str == "PCORE_ONLY") {
+        core_type = SchedulingCoreType::PCORE_ONLY;
+    } else if (str == "ECORE_ONLY") {
+        core_type = SchedulingCoreType::ECORE_ONLY;
+    } else {
+        throw ov::Exception{"Unsupported core type: " + str};
+    }
+    return is;
+}
+/** @endcond */
+
+/**
+ * @brief This property defines CPU core type which can be used during inference.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to select specific CPU cores for inference. Please refer SchedulingCoreType for
+ * all definition of core type.
+ *
+ * The following code is an example to only use efficient-cores for inference on hybrid CPU. If user sets this
+ * configuration on a platform with only performance-cores, CPU inference will still run on the performance-cores.
+ *
+ * @code
+ * ie.set_property(ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY));
+ * @endcode
+ */
+static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
+
 /**
  * @brief This property allows hyper threading during inference.
  * @ingroup ov_runtime_cpp_prop_api
diff --git a/src/inference/src/dev/threading/cpu_map_scheduling.cpp b/src/inference/src/dev/threading/cpu_map_scheduling.cpp
index 42ed0e33272f7b..1ba22fb3090d9a 100644
--- a/src/inference/src/dev/threading/cpu_map_scheduling.cpp
+++ b/src/inference/src/dev/threading/cpu_map_scheduling.cpp
@@ -8,9 +8,41 @@
 
 namespace ov {
 
+std::vector<std::vector<int>> apply_scheduling_core_type(const ov::hint::SchedulingCoreType input_type,
+                                                         const std::vector<std::vector<int>>& proc_type_table) {
+    std::vector<std::vector<int>> result_table = proc_type_table;
+
+    switch (input_type) {
+    case ov::hint::SchedulingCoreType::ANY_CORE:
+        break;
+    case ov::hint::SchedulingCoreType::PCORE_ONLY:
+        if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0) {
+            for (auto& i : result_table) {
+                i[ALL_PROC] -= i[EFFICIENT_CORE_PROC];
+                i[EFFICIENT_CORE_PROC] = 0;
+            }
+        }
+        break;
+    case ov::hint::SchedulingCoreType::ECORE_ONLY:
+        if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
+            (proc_type_table[0][EFFICIENT_CORE_PROC] != proc_type_table[0][ALL_PROC])) {
+            for (auto& i : result_table) {
+                i[ALL_PROC] -= i[MAIN_CORE_PROC] + i[HYPER_THREADING_PROC];
+                i[MAIN_CORE_PROC] = 0;
+                i[HYPER_THREADING_PROC] = 0;
+            }
+        }
+        break;
+    default:
+        throw ov::Exception{"Unsupported core type!"};
+    }
+
+    return result_table;
+}
+
 std::vector<std::vector<int>> apply_hyper_threading(bool input_value,
                                                     const bool input_changed,
-                                                    const std::vector<std::vector<int>> proc_type_table) {
+                                                    const std::vector<std::vector<int>>& proc_type_table) {
     std::vector<std::vector<int>> result_table = proc_type_table;
 
     if ((proc_type_table[0][HYPER_THREADING_PROC] > 0) &&
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index be8a40d5626043..8407639d8737e4 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -78,6 +78,18 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             streamExecutorConfig.SetConfig(key, val);
         } else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) {
             perfHintsConfig.SetConfig(key, val);
+        } else if (key == ov::hint::scheduling_core_type.name()) {
+            const auto core_type = ov::util::from_string(val, ov::hint::scheduling_core_type);
+            if (core_type == ov::hint::SchedulingCoreType::ANY_CORE ||
+                core_type == ov::hint::SchedulingCoreType::PCORE_ONLY ||
+                core_type == ov::hint::SchedulingCoreType::ECORE_ONLY) {
+                schedulingCoreType = core_type;
+            } else {
+                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::scheduling_core_type.name()
+                           << ". Expected only " << ov::hint::SchedulingCoreType::ANY_CORE << "/"
+                           << ov::hint::SchedulingCoreType::PCORE_ONLY << "/"
+                           << ov::hint::SchedulingCoreType::ECORE_ONLY << std::endl;
+            }
         } else if (key == ov::hint::use_hyper_threading.name()) {
             if (val == PluginConfigParams::YES) {
                 useHyperThreading = true;
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 27f83229002d8b..0f22ce45bb1484 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -7,6 +7,7 @@
 #include <threading/ie_istreams_executor.hpp>
 #include <ie_performance_hints.hpp>
 #include <ie/ie_common.h>
+#include <openvino/runtime/properties.hpp>
 #include <openvino/util/common_util.hpp>
 #include "utils/debug_caps_config.h"
 
@@ -49,6 +50,7 @@ struct Config {
     size_t rtCacheCapacity = 5000ul;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
+    ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
     bool useHyperThreading = true;
     bool changedHyperThreading = false;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index ea5111015d9894..e8edac810940cd 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -86,7 +86,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                         (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
                         n_threads_per_stream = proc_type_table[0][MAIN_CORE_PROC];
                     } else if (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC]) {
-                        n_threads_per_stream = int(
+                        n_threads_per_stream = static_cast<int>(
                             proc_type_table[0][MAIN_CORE_PROC] /
                             ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
                     }
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index e331035567ba1b..eae73a0a3e8bf9 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -11,6 +11,7 @@
 
 #include <vector>
 
+#include "openvino/runtime/properties.hpp"
 namespace ov {
 namespace intel_cpu {
 
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index 72dc81f58f8c96..07c2f139b0c24a 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -308,6 +308,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::inference_precision.name()),
             RO_property(ov::hint::performance_mode.name()),
             RO_property(ov::hint::num_requests.name()),
+            RO_property(ov::hint::scheduling_core_type.name()),
             RO_property(ov::hint::use_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
         };
@@ -349,6 +350,9 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::scheduling_core_type) {
+        const auto core_type = config.schedulingCoreType;
+        return core_type;
     } else if (name == ov::hint::use_hyper_threading.name()) {
         const bool use_ht = config.useHyperThreading;
         return decltype(ov::hint::use_hyper_threading)::value_type(use_ht);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index b2af5acfea35f0..daaae2ecf933b6 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -514,6 +514,9 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::scheduling_core_type) {
+        const auto core_type = engConfig.schedulingCoreType;
+        return core_type;
     } else if (name == ov::hint::use_hyper_threading) {
         const bool ht_value = engConfig.useHyperThreading;
         return decltype(ov::hint::use_hyper_threading)::value_type(ht_value);
@@ -605,6 +608,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::inference_precision.name()),
                                                     RW_property(ov::hint::performance_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
+                                                    RW_property(ov::hint::scheduling_core_type.name()),
                                                     RW_property(ov::hint::use_hyper_threading.name()),
                                                     RW_property(ov::device::id.name()),
         };
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 9523d3e3467f13..b166cfd9bb7ade 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -52,6 +52,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest,
                          OVClassSetUseHyperThreadingHintConfigTest,
                          ::testing::Values("CPU"));
 
+INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest,
+                         OVClassSetSchedulingCoreTypeHintConfigTest,
+                         ::testing::Values("CPU"));
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassGetMetricTest, OVClassGetMetricTest_OPTIMIZATION_CAPABILITIES,
         ::testing::Values("CPU"));
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
index 982ba9d187e8d6..c8b8d818dcb642 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -16,6 +16,72 @@ using namespace ov;
 
 namespace {
 
+struct SchedulingCoreTypeTestCase {
+    ov::hint::SchedulingCoreType input_type;
+    std::vector<std::vector<int>> proc_type_table;
+    std::vector<std::vector<int>> result_table;
+};
+
+class SchedulingCoreTypeTests : public CommonTestUtils::TestsCommon,
+                                public testing::WithParamInterface<std::tuple<SchedulingCoreTypeTestCase>> {
+public:
+    void SetUp() override {
+        const auto& test_data = std::get<0>(GetParam());
+
+        std::vector<std::vector<int>> test_result_table =
+            ov::apply_scheduling_core_type(test_data.input_type, test_data.proc_type_table);
+
+        ASSERT_EQ(test_data.result_table, test_result_table);
+    }
+};
+
+SchedulingCoreTypeTestCase _2sockets_ALL = {
+    ov::hint::SchedulingCoreType::ANY_CORE,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+};
+
+SchedulingCoreTypeTestCase _2sockets_P_CORE_ONLY = {
+    ov::hint::SchedulingCoreType::PCORE_ONLY,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+};
+
+SchedulingCoreTypeTestCase _2sockets_E_CORE_ONLY = {
+    ov::hint::SchedulingCoreType::ECORE_ONLY,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+};
+
+SchedulingCoreTypeTestCase _1sockets_ALL = {
+    ov::hint::SchedulingCoreType::ANY_CORE,
+    {{20, 6, 8, 6}},
+    {{20, 6, 8, 6}},
+};
+
+SchedulingCoreTypeTestCase _1sockets_P_CORE_ONLY = {
+    ov::hint::SchedulingCoreType::PCORE_ONLY,
+    {{20, 6, 8, 6}},
+    {{12, 6, 0, 6}},
+};
+
+SchedulingCoreTypeTestCase _1sockets_E_CORE_ONLY = {
+    ov::hint::SchedulingCoreType::ECORE_ONLY,
+    {{20, 6, 8, 6}},
+    {{8, 0, 8, 0}},
+};
+
+TEST_P(SchedulingCoreTypeTests, SchedulingCoreType) {}
+
+INSTANTIATE_TEST_SUITE_P(SchedulingCoreTypeTable,
+                         SchedulingCoreTypeTests,
+                         testing::Values(_2sockets_ALL,
+                                         _2sockets_P_CORE_ONLY,
+                                         _2sockets_E_CORE_ONLY,
+                                         _1sockets_ALL,
+                                         _1sockets_P_CORE_ONLY,
+                                         _1sockets_E_CORE_ONLY));
+
 struct UseHTTestCase {
     bool use_ht_value;
     bool use_ht_changed;
@@ -29,9 +95,7 @@ class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithPara
         const auto& test_data = std::get<0>(GetParam());
 
         std::vector<std::vector<int>> test_result_table =
-            ov::apply_hyper_threading(test_data.use_ht_value,
-                                                 test_data.use_ht_changed,
-                                                 test_data.proc_type_table);
+            ov::apply_hyper_threading(test_data.use_ht_value, test_data.use_ht_changed, test_data.proc_type_table);
 
         ASSERT_EQ(test_data.result_table, test_result_table);
     }
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index fa9b3f22fcbac9..4beca9da873252 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -121,6 +121,7 @@ using OVClassLoadNetworkTest = OVClassQueryNetworkTest;
 using OVClassSetGlobalConfigTest = OVClassBaseTestP;
 using OVClassSetModelPriorityConfigTest = OVClassBaseTestP;
 using OVClassSetExecutionModeHintConfigTest = OVClassBaseTestP;
+using OVClassSetSchedulingCoreTypeHintConfigTest = OVClassBaseTestP;
 using OVClassSetUseHyperThreadingHintConfigTest = OVClassBaseTestP;
 using OVClassSetTBBForceTerminatePropertyTest = OVClassBaseTestP;
 using OVClassSetLogLevelConfigTest = OVClassBaseTestP;
@@ -612,6 +613,25 @@ TEST_P(OVClassSetExecutionModeHintConfigTest, SetConfigNoThrow) {
     ASSERT_EQ(ov::hint::ExecutionMode::PERFORMANCE, ie.get_property(target_device, ov::hint::execution_mode));
 }
 
+TEST_P(OVClassSetSchedulingCoreTypeHintConfigTest, SetConfigNoThrow) {
+    ov::Core ie = createCoreWithTemplate();
+
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::scheduling_core_type);
+
+    ov::hint::SchedulingCoreType defaultMode{};
+    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::scheduling_core_type));
+    (void)defaultMode;
+
+    ASSERT_EQ(ov::hint::SchedulingCoreType::ANY_CORE, ie.get_property(target_device, ov::hint::scheduling_core_type));
+
+    ie.set_property(target_device, ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY));
+    ASSERT_EQ(ov::hint::SchedulingCoreType::PCORE_ONLY, ie.get_property(target_device, ov::hint::scheduling_core_type));
+    ie.set_property(target_device, ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY));
+    ASSERT_EQ(ov::hint::SchedulingCoreType::ECORE_ONLY, ie.get_property(target_device, ov::hint::scheduling_core_type));
+    ie.set_property(target_device, ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE));
+    ASSERT_EQ(ov::hint::SchedulingCoreType::ANY_CORE, ie.get_property(target_device, ov::hint::scheduling_core_type));
+}
+
 TEST_P(OVClassSetUseHyperThreadingHintConfigTest, SetConfigNoThrow) {
     ov::Core ie = createCoreWithTemplate();
 

From d9d1df2fe3f930a5cd08ca020aec08082594cd7a Mon Sep 17 00:00:00 2001
From: Marcin Kacprzak <marcin.kacprzak@intel.com>
Date: Tue, 28 Mar 2023 12:47:12 +0200
Subject: [PATCH 126/296] [GNA] Implemented ExecutionMode support in GNA Plugin
 (#16396)

---
 .../intel_gna/src/gna_plugin_config.cpp       | 23 ++++++++++---
 .../intel_gna/src/gna_plugin_config.hpp       |  1 +
 ..._intergration.cpp => core_integration.cpp} |  5 ++-
 .../gna_executable_network_metrics_test.cpp   |  3 +-
 .../tests/unit/gna_plugin_config_test.cpp     | 33 ++++++++++++++++++-
 5 files changed, 55 insertions(+), 10 deletions(-)
 rename src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/{core_intergration.cpp => core_integration.cpp} (99%)

diff --git a/src/plugins/intel_gna/src/gna_plugin_config.cpp b/src/plugins/intel_gna/src/gna_plugin_config.cpp
index 0d48a7c313f5d7..bcd003334e22bc 100644
--- a/src/plugins/intel_gna/src/gna_plugin_config.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin_config.cpp
@@ -173,14 +173,25 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
         } else if (key == ov::hint::performance_mode) {
             performance_mode = ov::util::from_string(value, ov::hint::performance_mode);
         } else if (key == ov::inference_precision) {
-            std::stringstream ss(value);
-            ss >> inference_precision;
+            inference_precision = ov::util::from_string<ov::element::Type>(value);
             if ((inference_precision != ov::element::i8) && (inference_precision != ov::element::i16)) {
-                THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be i16 or i8, but was: " << value;
+                THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be I16 or I8, but was: " << value;
+            }
+            gnaPrecision = inference_precision == ov::element::i8 ? InferenceEngine::Precision::I8
+                                                                  : InferenceEngine::Precision::I16;
+        } else if (key == ov::hint::execution_mode) {
+            execution_mode = ov::util::from_string<ov::hint::ExecutionMode>(value);
+            if ((execution_mode != ov::hint::ExecutionMode::ACCURACY) &&
+                (execution_mode != ov::hint::ExecutionMode::PERFORMANCE)) {
+                THROW_GNA_EXCEPTION << "Unsupported execution mode, should be ACCURACY or PERFORMANCE, but was: "
+                                    << value;
+            }
+            // Update gnaPrecision basing on execution_mode only if inference_precision is not set
+            if (config.count(ov::inference_precision.name()) == 0) {
+                gnaPrecision = execution_mode == ov::hint::ExecutionMode::PERFORMANCE ? InferenceEngine::Precision::I8
+                                                                                      : InferenceEngine::Precision::I16;
             }
-            gnaPrecision = (inference_precision == ov::element::i8) ? Precision::I8 : Precision::I16;
         } else if (key == GNA_CONFIG_KEY(PRECISION)) {
-            check_compatibility(ov::inference_precision.name());
             auto precision = Precision::FromStr(value);
             if (precision != Precision::I8 && precision != Precision::I16) {
                 THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: "
@@ -313,6 +324,7 @@ void Config::AdjustKeyMapValues() {
     } else {
         keyConfigMap[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
     }
+    keyConfigMap[ov::hint::execution_mode.name()] = ov::util::to_string(execution_mode);
     OPENVINO_SUPPRESS_DEPRECATED_START
     if (gnaFlags.pwl_design_algorithm != ov::intel_gna::PWLDesignAlgorithm::UNDEFINED) {
         keyConfigMap[ov::intel_gna::pwl_design_algorithm.name()] = ov::util::to_string(gnaFlags.pwl_design_algorithm);
@@ -364,6 +376,7 @@ const Parameter Config::GetImpactingModelCompilationProperties(bool compiled) {
         {ov::intel_gna::pwl_design_algorithm.name(), model_mutability},
         {ov::intel_gna::pwl_max_error_percent.name(), model_mutability},
         {ov::inference_precision.name(), model_mutability},
+        {ov::hint::execution_mode.name(), model_mutability},
         {ov::hint::num_requests.name(), model_mutability},
     };
     return supported_properties;
diff --git a/src/plugins/intel_gna/src/gna_plugin_config.hpp b/src/plugins/intel_gna/src/gna_plugin_config.hpp
index e1e538b6eb5f7b..04542e361aaf52 100644
--- a/src/plugins/intel_gna/src/gna_plugin_config.hpp
+++ b/src/plugins/intel_gna/src/gna_plugin_config.hpp
@@ -60,6 +60,7 @@ struct Config {
     // default precision of GNA hardware model
     ov::element::Type inference_precision = ov::element::undefined;
     InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;
+    ov::hint::ExecutionMode execution_mode = ov::hint::ExecutionMode::ACCURACY;
 
     std::string embedded_export_path;
 
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_intergration.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
similarity index 99%
rename from src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_intergration.cpp
rename to src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 97208f2a95bae5..1aa99fe9df9817 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_intergration.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -140,9 +140,8 @@ TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedPrecisionHint) {
     OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
     ASSERT_EQ(ov::element::i16, precision);
 
-    ASSERT_THROW(
-        core.set_property("GNA", {ov::inference_precision(ov::element::i8), {GNA_CONFIG_KEY(PRECISION), "I16"}}),
-        ov::Exception);
+    OV_ASSERT_NO_THROW(
+        core.set_property("GNA", {ov::inference_precision(ov::element::i8), {GNA_CONFIG_KEY(PRECISION), "I16"}}));
     ASSERT_THROW(core.set_property("GNA", ov::inference_precision(ov::element::i32)), ov::Exception);
     ASSERT_THROW(core.set_property("GNA", ov::inference_precision(ov::element::undefined)), ov::Exception);
     ASSERT_THROW(core.set_property("GNA", {{ov::inference_precision.name(), "ABC"}}), ov::Exception);
diff --git a/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp b/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp
index dcc0200e370f4f..6bd7875a964f75 100644
--- a/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp
@@ -61,6 +61,7 @@ TEST_F(GnaExecutableNetworkMetricsTest, TestSupportedProperties) {
         "OPTIMIZATION_CAPABILITIES FULL_DEVICE_NAME GNA_LIBRARY_FULL_VERSION CACHING_PROPERTIES "
         "GNA_DEVICE_MODE PERFORMANCE_HINT LOG_LEVEL EXECUTION_DEVICES "
         "GNA_SCALE_FACTOR_PER_INPUT GNA_FIRMWARE_MODEL_IMAGE GNA_HW_EXECUTION_TARGET GNA_HW_COMPILE_TARGET "
-        "GNA_PWL_DESIGN_ALGORITHM GNA_PWL_MAX_ERROR_PERCENT INFERENCE_PRECISION_HINT PERFORMANCE_HINT_NUM_REQUESTS";
+        "GNA_PWL_DESIGN_ALGORITHM GNA_PWL_MAX_ERROR_PERCENT INFERENCE_PRECISION_HINT EXECUTION_MODE_HINT "
+        "PERFORMANCE_HINT_NUM_REQUESTS";
     Run(ov::supported_properties.name(), supportedProperties);
 }
diff --git a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
index 06ef1278ef2501..c3a9aeaf7d8ce8 100644
--- a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
@@ -32,7 +32,8 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
     {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
     {CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE},
     {CONFIG_KEY(PERFORMANCE_HINT), "UNDEFINED"},
-    {CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), "1"}};
+    {CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), "1"},
+    {ov::hint::execution_mode.name(), ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::ACCURACY)}};
 IE_SUPPRESS_DEPRECATED_END
 
 class GNAPluginConfigTest : public ::testing::Test {
@@ -237,3 +238,33 @@ TEST_F(GNAPluginConfigTest, GnaConfigLogLevel) {
     EXPECT_EQ(config.gnaFlags.log_level, ov::log::Level::TRACE);
     EXPECT_THROW(config.UpdateFromMap({{CONFIG_KEY(LOG_LEVEL), "LOG_UNSUPPORTED"}}), ov::Exception);
 }
+
+TEST_F(GNAPluginConfigTest, GnaConfigExecutionModeUpdatesGnaPrecision) {
+    SetAndCompare(ov::hint::execution_mode.name(), "PERFORMANCE");
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I8);
+    SetAndCompare(ov::hint::execution_mode.name(), "ACCURACY");
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I16);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionUpdatesGnaPrecision) {
+    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I8);
+    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I16);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionHasHigherPriorityI16) {
+    SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name());
+    SetAndCompare(ov::hint::execution_mode.name(),
+                  ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::PERFORMANCE));
+    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I16);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionHasHigherPriorityI8) {
+    SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name());
+    SetAndCompare(ov::hint::execution_mode.name(),
+                  ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::ACCURACY));
+    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
+    EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I8);
+}

From 49d150b3b8bad59070144c8c20ce1bd947868373 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Tue, 28 Mar 2023 13:02:16 +0200
Subject: [PATCH 127/296] Review PSROIPooling class for shape inference aspects
 (#16447)

* Review ROIPooling class
- check interval shape and label propagation
- add template shape_infer
- add shape infer into cpu plugin
- add test with StaticShape

* Use get_output_roi instead of get_output_size

* Add missing includes

* Review PSROIPooling operator
- review interval and label propagation
- add template shape_infer implementation
- add shape_infer to cpu plugin
---
 .../include/openvino/op/psroi_pooling.hpp     |  35 ++
 .../include/psroi_pooling_shape_inference.hpp |  99 ++++
 .../include/roi_pooling_shape_inference.hpp   |  26 +-
 src/core/src/op/psroi_pooling.cpp             | 116 ++---
 src/core/tests/type_prop/psroi_pooling.cpp    | 479 ++++++++++--------
 .../utils/shape_inference/shape_inference.cpp |   2 +
 .../psroi_pooling_shape_inference_test.cpp    |  97 ++++
 7 files changed, 558 insertions(+), 296 deletions(-)
 create mode 100644 src/core/shape_inference/include/psroi_pooling_shape_inference.hpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp

diff --git a/src/core/include/openvino/op/psroi_pooling.hpp b/src/core/include/openvino/op/psroi_pooling.hpp
index 39610f8bd06c05..9f35139ac03e5c 100644
--- a/src/core/include/openvino/op/psroi_pooling.hpp
+++ b/src/core/include/openvino/op/psroi_pooling.hpp
@@ -43,21 +43,56 @@ class OPENVINO_API PSROIPooling : public Op {
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
+    /**
+     * @brief Set the output channel dimension size.
+     * @param output_dim Channel dimension size.
+     */
+    void set_output_dim(size_t output_dim);
     size_t get_output_dim() const {
         return m_output_dim;
     }
+
+    /**
+     * @brief Set the output groups number.
+     * @param group_size Number of groups.
+     */
+    void set_group_size(size_t group_size);
     size_t get_group_size() const {
         return m_group_size;
     }
+
+    /**
+     * @brief Set the spatial scale.
+     * @param scale Spatial scale value.
+     */
+    void set_spatial_scale(float scale);
     float get_spatial_scale() const {
         return m_spatial_scale;
     }
+
+    /**
+     * @brief Set the number of bins over image width.
+     * @param x Number of bins over width (x) axis.
+     */
+    void set_spatial_bins_x(int x);
     int get_spatial_bins_x() const {
         return m_spatial_bins_x;
     }
+
+    /**
+     * @brief Set the number of bins over image height.
+     * @param y Number of bins over height (y) axis.
+     */
+    void set_spatial_bins_y(int y);
     int get_spatial_bins_y() const {
         return m_spatial_bins_y;
     }
+
+    /**
+     * @brief Set the pooling mode.
+     * @param mode Pooling mode name.
+     */
+    void set_mode(std::string mode);
     const std::string& get_mode() const {
         return m_mode;
     }
diff --git a/src/core/shape_inference/include/psroi_pooling_shape_inference.hpp b/src/core/shape_inference/include/psroi_pooling_shape_inference.hpp
new file mode 100644
index 00000000000000..c47b1211e669f5
--- /dev/null
+++ b/src/core/shape_inference/include/psroi_pooling_shape_inference.hpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cmath>
+
+#include "compare.hpp"
+#include "dimension_util.hpp"
+#include "openvino/op/roi_pooling.hpp"
+#include "roi_pooling_shape_inference.hpp"
+
+namespace ov {
+namespace op {
+namespace psroi_pooling {
+namespace validate {
+template <class TROIPooling, class TShape>
+void feat_input_shape(const TROIPooling* op, const TShape feat_shape) {
+    using namespace ov::util;
+
+    roi_pooling::validate::feat_intput_shape(op, feat_shape);
+
+    if (feat_shape.rank().is_static()) {
+        const auto& mode = op->get_mode();
+        const auto& num_channels = feat_shape[1];
+        if (mode == "average") {
+            const auto group_area = op->get_group_size() * op->get_group_size();
+            NODE_VALIDATION_CHECK(
+                op,
+                num_channels.compatible(group_area * op->get_output_dim()),
+                "Number of input's channels must be a multiply of output_dim * group_size * group_size");
+        } else if (mode == "bilinear") {
+            const auto bins_area = op->get_spatial_bins_x() * op->get_spatial_bins_y();
+            NODE_VALIDATION_CHECK(
+                op,
+                num_channels.compatible(bins_area * op->get_output_dim()),
+                "Number of input's channels must be a multiply of output_dim * spatial_bins_x * spatial_bins_y");
+        }
+    }
+}
+
+template <class TROIPooling>
+void output_group_attr(const TROIPooling* op) {
+    NODE_VALIDATION_CHECK(op, op->get_group_size() > 0, "group_size has to be greater than 0");
+}
+
+template <class TROIPooling>
+void bins_attr(const TROIPooling* op) {
+    if (op->get_mode() == "bilinear") {
+        NODE_VALIDATION_CHECK(op, op->get_spatial_bins_x() > 0, "spatial_bins_x has to be greater than 0");
+        NODE_VALIDATION_CHECK(op, op->get_spatial_bins_y() > 0, "spatial_bins_y has to be greater than 0");
+    }
+}
+
+template <class TROIPooling>
+void mode_attr(const TROIPooling* op) {
+    const auto& mode = op->get_mode();
+    NODE_VALIDATION_CHECK(op,
+                          mode == "average" || mode == "bilinear",
+                          "Expected 'average' or 'bilinear' mode. Got " + mode);
+}
+}  // namespace validate
+}  // namespace psroi_pooling
+
+namespace v0 {
+template <class TShape>
+std::vector<TShape> shape_infer(const PSROIPooling* op, const std::vector<TShape>& input_shapes) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    using namespace ov::util;
+
+    const auto& feat_shape = input_shapes[0];
+    const auto& rois_shape = input_shapes[1];
+
+    psroi_pooling::validate::mode_attr(op);
+    psroi_pooling::validate::output_group_attr(op);
+    psroi_pooling::validate::bins_attr(op);
+    roi_pooling::validate::scale_attr(op);
+
+    psroi_pooling::validate::feat_input_shape(op, feat_shape);
+    roi_pooling::validate::rois_input_shape(op, rois_shape);
+
+    TShape out_shape;
+    out_shape.reserve(4);
+
+    out_shape.emplace_back(rois_shape.rank().is_static() ? rois_shape[0] : dim::inf_bound);
+    out_shape.emplace_back(op->get_output_dim());
+    out_shape.insert(out_shape.end(), 2, op->get_group_size());
+
+    return {out_shape};
+}
+
+template <class TShape>
+void shape_infer(const PSROIPooling* op, const std::vector<TShape>& input_shapes, std::vector<TShape>& output_shapes) {
+    output_shapes = shape_infer(op, input_shapes);
+}
+}  // namespace v0
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/roi_pooling_shape_inference.hpp b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp
index 1568ce3cbe960c..0de62e9c090724 100644
--- a/src/core/shape_inference/include/roi_pooling_shape_inference.hpp
+++ b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp
@@ -12,8 +12,16 @@
 
 namespace ov {
 namespace op {
-namespace pooling {
+namespace roi_pooling {
 namespace validate {
+template <class TROIPooling, class TShape>
+void feat_intput_shape(const TROIPooling* op, const TShape& feat_shape) {
+    NODE_VALIDATION_CHECK(op,
+                          feat_shape.rank().compatible(4),
+                          "Expected a 4D tensor for the feature maps input. Got: ",
+                          feat_shape);
+}
+
 template <class TROIPooling, class TShape>
 void rois_input_shape(const TROIPooling* op, const TShape rois_shape) {
     if (rois_shape.rank().is_static()) {
@@ -66,7 +74,7 @@ void method_attr(const TROIPooling* op) {
                           method);
 }
 }  // namespace validate
-}  // namespace pooling
+}  // namespace roi_pooling
 
 namespace v0 {
 template <class TShape>
@@ -78,15 +86,11 @@ std::vector<TShape> shape_infer(const ROIPooling* op, const std::vector<TShape>&
     const auto& rois_shape = input_shapes[1];
     const auto& feat_rank = feat_shape.rank();
 
-    NODE_VALIDATION_CHECK(op,
-                          feat_rank.compatible(4),
-                          "Expected a 4D tensor for the feature maps input. Got: ",
-                          feat_shape);
-
-    pooling::validate::rois_input_shape(op, rois_shape);
-    pooling::validate::output_roi_attr(op);
-    pooling::validate::scale_attr(op);
-    pooling::validate::method_attr(op);
+    roi_pooling::validate::feat_intput_shape(op, feat_shape);
+    roi_pooling::validate::rois_input_shape(op, rois_shape);
+    roi_pooling::validate::output_roi_attr(op);
+    roi_pooling::validate::scale_attr(op);
+    roi_pooling::validate::method_attr(op);
 
     TShape out_shape;
     out_shape.reserve(4);
diff --git a/src/core/src/op/psroi_pooling.cpp b/src/core/src/op/psroi_pooling.cpp
index 1b211567980159..44173d09612199 100644
--- a/src/core/src/op/psroi_pooling.cpp
+++ b/src/core/src/op/psroi_pooling.cpp
@@ -2,22 +2,27 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/psroi_pooling.hpp"
+#include "openvino/op/psroi_pooling.hpp"
 
 #include "itt.hpp"
-#include "ngraph/attribute_visitor.hpp"
+#include "openvino/core/attribute_visitor.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "psroi_pooling_shape_inference.hpp"
 
 using namespace std;
-using namespace ngraph;
 
-ov::op::v0::PSROIPooling::PSROIPooling(const Output<Node>& input,
-                                       const Output<Node>& coords,
-                                       const size_t output_dim,
-                                       const size_t group_size,
-                                       const float spatial_scale,
-                                       int spatial_bins_x,
-                                       int spatial_bins_y,
-                                       const string& mode)
+namespace ov {
+namespace op {
+namespace v0 {
+
+PSROIPooling::PSROIPooling(const Output<Node>& input,
+                           const Output<Node>& coords,
+                           const size_t output_dim,
+                           const size_t group_size,
+                           const float spatial_scale,
+                           int spatial_bins_x,
+                           int spatial_bins_y,
+                           const string& mode)
     : Op({input, coords}),
       m_output_dim(output_dim),
       m_group_size(group_size),
@@ -28,7 +33,7 @@ ov::op::v0::PSROIPooling::PSROIPooling(const Output<Node>& input,
     constructor_validate_and_infer_types();
 }
 
-bool ngraph::op::v0::PSROIPooling::visit_attributes(AttributeVisitor& visitor) {
+bool PSROIPooling::visit_attributes(AttributeVisitor& visitor) {
     OV_OP_SCOPE(v0_PSROIPooling_visit_attributes);
     visitor.on_attribute("output_dim", m_output_dim);
     visitor.on_attribute("group_size", m_group_size);
@@ -39,70 +44,22 @@ bool ngraph::op::v0::PSROIPooling::visit_attributes(AttributeVisitor& visitor) {
     return true;
 }
 
-void ov::op::v0::PSROIPooling::validate_and_infer_types() {
+void PSROIPooling::validate_and_infer_types() {
     OV_OP_SCOPE(v0_PSROIPooling_validate_and_infer_types);
-    auto feat_maps_et = get_input_element_type(0);
-    auto coords_et = get_input_element_type(1);
+    const auto& feat_maps_et = get_input_element_type(0);
+    const auto& coords_et = get_input_element_type(1);
     NODE_VALIDATION_CHECK(this,
                           feat_maps_et.is_real(),
                           "Feature maps' data type must be floating point. Got " + feat_maps_et.get_type_name());
     NODE_VALIDATION_CHECK(this,
                           coords_et.is_real(),
                           "Coords' data type must be floating point. Got " + coords_et.get_type_name());
-    NODE_VALIDATION_CHECK(this,
-                          m_mode == "average" || m_mode == "bilinear",
-                          "Expected 'average' or 'bilinear' mode. Got " + m_mode);
-    NODE_VALIDATION_CHECK(this, m_group_size > 0, "group_size has to be greater than 0");
-    if (m_mode == "bilinear") {
-        NODE_VALIDATION_CHECK(this, m_spatial_bins_x > 0, "spatial_bins_x has to be greater than 0");
-        NODE_VALIDATION_CHECK(this, m_spatial_bins_y > 0, "spatial_bins_y has to be greater than 0");
-    }
 
-    const ov::PartialShape& feat_map_pshape = get_input_partial_shape(0);
-    const ov::PartialShape& coords_pshape = get_input_partial_shape(1);
-    if (feat_map_pshape.rank().is_dynamic() || coords_pshape.rank().is_dynamic()) {
-        set_output_type(0, feat_maps_et, ov::PartialShape::dynamic());
-    } else {
-        NODE_VALIDATION_CHECK(this,
-                              feat_map_pshape.rank().get_length() == 4,
-                              "PSROIPooling expects 4 dimensions for input. Got ",
-                              feat_map_pshape.rank().get_length());
-        NODE_VALIDATION_CHECK(this,
-                              coords_pshape.rank().get_length() == 2,
-                              "PSROIPooling expects 2 dimensions for box coordinates. Got ",
-                              coords_pshape.rank().get_length());
-
-        if (feat_map_pshape[1].is_static()) {
-            auto num_input_channels = feat_map_pshape[1].get_interval().get_min_val();
-            if (m_mode == "average") {
-                NODE_VALIDATION_CHECK(this,
-                                      num_input_channels % (m_group_size * m_group_size) == 0,
-                                      "Number of input's channels must be a multiply of group_size * group_size");
-                NODE_VALIDATION_CHECK(this,
-                                      m_output_dim == num_input_channels / (m_group_size * m_group_size),
-                                      "output_dim must be equal to input channels divided by "
-                                      "group_size * group_size");
-            } else if (m_mode == "bilinear") {
-                NODE_VALIDATION_CHECK(this,
-                                      num_input_channels % (m_spatial_bins_x * m_spatial_bins_y) == 0,
-                                      "Number of input's channels must be a multiply of "
-                                      "spatial_bins_x * spatial_bins_y");
-                NODE_VALIDATION_CHECK(
-                    this,
-                    m_output_dim == static_cast<size_t>(num_input_channels / (m_spatial_bins_x * m_spatial_bins_y)),
-                    "output_dim must be equal to input channels divided by "
-                    "spatial_bins_x * spatial_bins_y");
-            }
-        }
-        std::vector<Dimension> output_shape{coords_pshape[0], static_cast<Dimension::value_type>(m_output_dim)};
-        for (int64_t i = 2; i < feat_map_pshape.rank().get_length(); i++) {
-            output_shape.emplace_back(m_group_size);
-        }
-        set_output_type(0, feat_maps_et, output_shape);
-    }
+    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    set_output_type(0, feat_maps_et, output_shapes[0]);
 }
 
-shared_ptr<Node> ov::op::v0::PSROIPooling::clone_with_new_inputs(const OutputVector& new_args) const {
+shared_ptr<Node> PSROIPooling::clone_with_new_inputs(const OutputVector& new_args) const {
     OV_OP_SCOPE(v0_PSROIPooling_clone_with_new_inputs);
     check_new_args_count(this, new_args);
     return make_shared<PSROIPooling>(new_args.at(0),
@@ -114,3 +71,30 @@ shared_ptr<Node> ov::op::v0::PSROIPooling::clone_with_new_inputs(const OutputVec
                                      m_spatial_bins_y,
                                      m_mode);
 }
+
+void PSROIPooling::set_output_dim(size_t output_dim) {
+    m_output_dim = output_dim;
+}
+
+void PSROIPooling::set_group_size(size_t group_size) {
+    m_group_size = group_size;
+}
+
+void PSROIPooling::set_spatial_scale(float scale) {
+    m_spatial_scale = scale;
+}
+
+void PSROIPooling::set_spatial_bins_x(int x) {
+    m_spatial_bins_x = x;
+}
+
+void PSROIPooling::set_spatial_bins_y(int y) {
+    m_spatial_bins_y = y;
+}
+
+void PSROIPooling::set_mode(std::string mode) {
+    m_mode = std::move(mode);
+}
+}  // namespace v0
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/tests/type_prop/psroi_pooling.cpp b/src/core/tests/type_prop/psroi_pooling.cpp
index 77df086f1f0e7b..7d60b9babb4160 100644
--- a/src/core/tests/type_prop/psroi_pooling.cpp
+++ b/src/core/tests/type_prop/psroi_pooling.cpp
@@ -2,224 +2,265 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/psroi_pooling.hpp"
-
+#include "common_test_utils/test_assertions.hpp"
 #include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/type_prop.hpp"
-
-using namespace ngraph;
-
-TEST(type_prop, psroi_pooling_average) {
-    auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-    auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-    auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-    ASSERT_EQ(op->get_shape(), (Shape{150, 2, 6, 6}));
-    ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-}
-
-TEST(type_prop, psroi_pooling_bilinear) {
-    auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-    auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-    auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 18, 6, 1.0f, 2, 2, "bilinear");
-    ASSERT_EQ(op->get_shape(), (Shape{150, 18, 6, 6}));
-    ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-}
-
-TEST(type_prop, psroi_pooling_invalid_type) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::i32, Shape{1, 72, 4, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Feature maps' data type must be floating point"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::i32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Coords' data type must be floating point"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_invalid_mode) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "invalid_mode");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Expected 'average' or 'bilinear' mode"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_invalid_shapes) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("PSROIPooling expects 4 dimensions for input"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 1, 72, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("PSROIPooling expects 2 dimensions for box coordinates"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_invalid_group_size) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 0, 1.0f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("group_size has to be greater than 0"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 5, 1.0f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Number of input's channels must be a multiply of group_size * group_size"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_invalid_output_dim) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 17, 2, 1.0f, 0, 0, "average");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(
-            error.what(),
-            std::string("output_dim must be equal to input channels divided by group_size * group_size"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_invalid_spatial_bins) {
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 17, 2, 1.0f, 0, 0, "bilinear");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("spatial_bins_x has to be greater than 0"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 17, 2, 1.0f, 1, 0, "bilinear");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("spatial_bins_y has to be greater than 0"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 17, 2, 1.0f, 2, 5, "bilinear");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Number of input's channels must be a multiply of "
-                                         "spatial_bins_x * spatial_bins_y"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-
-    try {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 5, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 10, 2, 1.0f, 2, 4, "bilinear");
-        FAIL() << "Exception expected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("output_dim must be equal to input channels divided by "
-                                         "spatial_bins_x * spatial_bins_y"));
-    } catch (...) {
-        FAIL() << "Unknown exception was thrown";
-    }
-}
-
-TEST(type_prop, psroi_pooling_dynamic_ranks) {
-    {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, PartialShape::dynamic());
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{150, 5});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        ASSERT_EQ(op->get_output_partial_shape(0), PartialShape::dynamic());
-        ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-    }
-    {
-        auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, PartialShape::dynamic());
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        ASSERT_EQ(op->get_output_partial_shape(0), PartialShape::dynamic());
-        ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-    }
-}
-
-TEST(type_prop, psroi_pooling_dynamic_num_boxes) {
-    auto inputs = std::make_shared<op::Parameter>(element::Type_t::f32, Shape{1, 72, 4, 5});
-    auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, PartialShape{{Dimension::dynamic(), 5}});
-    auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-    ASSERT_EQ(op->get_output_partial_shape(0), (PartialShape{{Dimension::dynamic(), 2, 6, 6}}));
-    ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-}
-
-TEST(type_prop, psroi_pooling_static_rank_dynamic_shape) {
-    {
-        auto inputs = std::make_shared<op::Parameter>(
-            element::Type_t::f32,
-            PartialShape{{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32,
-                                                      PartialShape{{Dimension::dynamic(), Dimension::dynamic()}});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        ASSERT_EQ(op->get_output_partial_shape(0), (PartialShape{{Dimension::dynamic(), 2, 6, 6}}));
-        ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-    }
-    {
-        auto inputs = std::make_shared<op::Parameter>(
-            element::Type_t::f32,
-            PartialShape{{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}});
-        auto coords = std::make_shared<op::Parameter>(element::Type_t::f32, PartialShape{{200, Dimension::dynamic()}});
-        auto op = std::make_shared<op::PSROIPooling>(inputs, coords, 2, 6, 0.0625f, 0, 0, "average");
-        ASSERT_EQ(op->get_shape(), (Shape{200, 2, 6, 6}));
-        ASSERT_EQ(op->get_element_type(), element::Type_t::f32);
-    }
+#include "openvino/opsets/opset11.hpp"
+#include "type_prop.hpp"
+
+using namespace ov;
+using namespace ov::opset11;
+using namespace testing;
+
+class TypePropPSROIPoolingV0 : public TypePropOpTest<op::v0::PSROIPooling> {
+protected:
+    float spatial_scale = 0.625f;
+    int bin_not_used = 0;
+    Shape pooling_roi_2x2{2, 2};
+};
+
+TEST_F(TypePropPSROIPoolingV0, basic_average) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "average");
+
+    EXPECT_EQ(op->get_shape(), (Shape{150, 2, 6, 6}));
+    EXPECT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST_F(TypePropPSROIPoolingV0, basic_bilinear) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    auto op = make_op(inputs, coords, 18, 6, 1.0f, 2, 2, "bilinear");
+
+    EXPECT_EQ(op->get_shape(), (Shape{150, 18, 6, 6}));
+    EXPECT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_features_element_type) {
+    const auto inputs = std::make_shared<Parameter>(element::i32, Shape{1, 72, 4, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Feature maps' data type must be floating point"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_rois_element_type) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 5});
+    const auto coords = std::make_shared<Parameter>(element::u16, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Coords' data type must be floating point"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_pooling_mode) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "invalid"),
+                    NodeValidationFailure,
+                    HasSubstr("Expected 'average' or 'bilinear' mode"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_features_rank) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Expected a 4D tensor for the feature maps input"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_rois_rank) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 2});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 6, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Expected a 2D tensor for the ROIs input with box coordinates"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_group_size) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 2});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 0, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("group_size has to be greater than 0"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_number_of_channels_and_group_size_in_avg_mode) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 2});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 2, 5, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Number of input's channels must be a multiply of output_dim * group_size * group_size"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_output_dim_in_avg_mode) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 4, 2});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 17, 2, spatial_scale, bin_not_used, bin_not_used, "average"),
+                    NodeValidationFailure,
+                    HasSubstr("Number of input's channels must be a multiply of output_dim * group_size * group_size"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_spatial_bins_x) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 5, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 17, 2, spatial_scale, 0, 1, "bilinear"),
+                    NodeValidationFailure,
+                    HasSubstr("spatial_bins_x has to be greater than 0"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_spatial_bins_y) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 5, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(auto op = make_op(inputs, coords, 17, 2, spatial_scale, 1, 0, "bilinear"),
+                    NodeValidationFailure,
+                    HasSubstr("spatial_bins_y has to be greater than 0"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_number_of_channels_and_spatial_bins_in_bilinear_mode) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 5, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(
+        auto op = make_op(inputs, coords, 17, 2, spatial_scale, 2, 5, "bilinear"),
+        NodeValidationFailure,
+        HasSubstr("Number of input's channels must be a multiply of output_dim * spatial_bins_x * spatial_bins_y"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, invalid_output_dim_in_bilinear_mode) {
+    const auto inputs = std::make_shared<Parameter>(element::f32, Shape{1, 72, 5, 5});
+    const auto coords = std::make_shared<Parameter>(element::f32, Shape{150, 5});
+
+    OV_EXPECT_THROW(
+        auto op = make_op(inputs, coords, 10, 2, spatial_scale, 2, 4, "bilinear"),
+        NodeValidationFailure,
+        HasSubstr("Number of input's channels must be a multiply of output_dim * spatial_bins_x * spatial_bins_y"));
+}
+
+TEST_F(TypePropPSROIPoolingV0, features_dynamic_rank) {
+    auto coords_shape = PartialShape{150, 5};
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, PartialShape::dynamic());
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({150, 2, 6, 6}));  // 4d
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, rois_dynamic_rank) {
+    auto feat_shape = PartialShape{1, 72, 4, 5};
+    set_shape_labels(feat_shape, 10);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, feat_shape);
+    const auto coords = std::make_shared<Parameter>(element::f16, PartialShape::dynamic());
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 2, 6, 6}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, dynamic_num_boxes) {
+    auto coords_shape = PartialShape{{Dimension::dynamic(), 5}};
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, PartialShape::dynamic());
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 2, 6, 6}));
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, feat_static_rank_dynamic_shape) {
+    auto feat_shape = PartialShape::dynamic(4);
+    auto coords_shape = PartialShape{{Dimension::dynamic(), 5}};
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, feat_shape);
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 2, 6, 6}));  // 4d
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, feat_and_rois_static_rank_dynamic_shape) {
+    auto feat_shape = PartialShape::dynamic(4);
+    auto coords_shape = PartialShape::dynamic(2);
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, feat_shape);
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 2, 6, 6}));  // 4d
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, feat_and_rois_interval_shapes) {
+    auto feat_shape = PartialShape{{1, 2}, {10, 100}, {10, 20}, {30, 90}};
+    auto coords_shape = PartialShape{{3, 10}, {1, 5}};
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, feat_shape);
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+    const auto op = make_op(inputs, coords, 2, 6, spatial_scale, 0, 0, "average");
+
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{3, 10}, 2, 6, 6}));  // 4d
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
+}
+
+TEST_F(TypePropPSROIPoolingV0, default_ctor) {
+    auto feat_shape = PartialShape{2, {10, 100}, 10, 10};
+    auto coords_shape = PartialShape{{3, 10}, {1, 5}};
+    set_shape_labels(feat_shape, 10);
+    set_shape_labels(coords_shape, 20);
+
+    const auto inputs = std::make_shared<Parameter>(element::f16, feat_shape);
+    const auto coords = std::make_shared<Parameter>(element::f16, coords_shape);
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{inputs, coords});
+    op->set_output_dim(2);
+    op->set_group_size(6);
+    op->set_spatial_scale(spatial_scale);
+    op->set_mode("average");
+    op->validate_and_infer_types();
+
+    EXPECT_FLOAT_EQ(op->get_spatial_scale(), spatial_scale);
+    EXPECT_EQ(op->get_mode(), "average");
+    EXPECT_EQ(op->get_group_size(), 6);
+    EXPECT_EQ(op->get_input_size(), 2);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_element_type(), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{3, 10}, 2, 6, 6}));  // 4d
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)),
+                ElementsAre(20, ov::no_label, ov::no_label, ov::no_label));
 }
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
index 1961157ae93ddc..5cc51a792b9f3d 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
@@ -53,6 +53,7 @@
 #include "one_hot_shape_inference.hpp"
 #include "pad_shape_inference.hpp"
 #include "proposal_shape_inference.hpp"
+#include "psroi_pooling_shape_inference.hpp"
 #include "range_shape_inference.hpp"
 #include "rdft_shape_inference.hpp"
 #include "read_value_shape_inference.hpp"
@@ -589,6 +590,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(ov::op::internal::AUGRUSequence, entryIO),
     _OV_OP_SHAPE_INFER_REG(Pad, entryIOC),
     _OV_OP_SHAPE_INFER_REG(Proposal, entryIO),
+    _OV_OP_SHAPE_INFER_REG(PSROIPooling, entryIO),
     _OV_OP_SHAPE_INFER_REG(Range, entryIOC),
     _OV_OP_SHAPE_INFER_REG(RDFT, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ReadValue, entryIO),
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp
new file mode 100644
index 00000000000000..b5b0b6ca2940ed
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp
@@ -0,0 +1,97 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class PSROIPoolingV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest<op::v0::PSROIPooling> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+
+    float scale = 0.45f;
+    size_t group = 3;
+    int bins_x = 4;
+    int bins_y = 3;
+};
+
+TEST_F(PSROIPoolingV0StaticShapeInferenceTest, default_ctor_avg_mode) {
+    op = make_op();
+    op->set_output_dim(5);
+    op->set_group_size(3);
+    op->set_spatial_scale(scale);
+    op->set_mode("average");
+
+    input_shapes = ShapeVector{{1, 45, 10, 10}, {3, 5}};
+    auto shape_infer = make_shape_inference(op);
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({3, 5, 3, 3}));
+}
+
+TEST_F(PSROIPoolingV0StaticShapeInferenceTest, default_ctor_bilinear_mode) {
+    op = make_op();
+    op->set_output_dim(5);
+    op->set_group_size(8);
+    op->set_spatial_bins_x(5);
+    op->set_spatial_bins_y(3);
+    op->set_spatial_scale(scale);
+    op->set_mode("bilinear");
+
+    input_shapes = ShapeVector{{1, 75, 10, 10}, {2, 5}};
+    auto shape_infer = make_shape_inference(op);
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 8, 8}));
+}
+
+TEST_F(PSROIPoolingV0StaticShapeInferenceTest, inputs_dynamic_rank) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+
+    op = make_op(feat, rois, 4, group, scale, 0, 0, "average");
+
+    input_shapes = ShapeVector{{2, 36, 100, 100}, {10, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({10, 4, 3, 3}));
+}
+
+TEST_F(PSROIPoolingV0StaticShapeInferenceTest, inputs_static_rank) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(4));
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(2));
+
+    op = make_op(feat, rois, 2, 1, scale, bins_x, bins_y, "bilinear");
+
+    input_shapes = ShapeVector{{2, 24, 20, 100}, {1, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 1, 1}));
+}
+
+TEST_F(PSROIPoolingV0StaticShapeInferenceTest, invalid_rois_batch_size) {
+    const auto feat = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic(4));
+    const auto rois = std::make_shared<op::v0::Parameter>(element::f64, PartialShape::dynamic());
+
+    op = make_op(feat, rois, 2, 1, scale, bins_x, bins_y, "bilinear");
+
+    input_shapes = ShapeVector{{2, 24, 20, 100}, {1, 6}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This "
+                              "dimension is expected to be equal to 5"));
+}

From 6fc0b6479ee8e6b0fbcc779f2d24542a9a0c251c Mon Sep 17 00:00:00 2001
From: Ekaterina Aidova <ekaterina.aidova@intel.com>
Date: Tue, 28 Mar 2023 16:06:21 +0400
Subject: [PATCH 128/296] [PT FE]: revert usage mo.convert_model in pt layer
 tests (#16573)

* [PT FE]: revert usage mo.convert_model in tests

* fix failed test
---
 .../python/src/openvino/frontend/pytorch/decoder.py    |  4 ++--
 src/frontends/pytorch/src/op/add.cpp                   |  2 +-
 src/frontends/pytorch/src/utils.hpp                    |  2 +-
 .../pytorch_tests/pytorch_layer_test_class.py          | 10 ++--------
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
index b30fc0934d385b..d994a771de8417 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
@@ -104,7 +104,7 @@ def __init__(self, pt_module, graph_element=None, example_input=None, freeze=Tru
         else:
             self.graph_element = graph_element
         self.pt_module = pt_module
-        self.raw_inputs = [inp for inp in self.graph_element.inputs()]
+        self.raw_inputs = list(self.graph_element.inputs())
         self.raw_outputs = list(self.graph_element.outputs())
         if self._input_signature is not None and self.raw_inputs[0].debugName() == "self":
             self._input_signature.insert(0, "self")
@@ -171,7 +171,7 @@ def get_input_debug_name(self, index: int) -> str:
         return self._raw_input(index).debugName()
 
     def get_input_signature_name(self, index: int) -> str:
-        if self._input_signature is not None:
+        if self._input_signature is not None and index < len(self._input_signature):
             return self._input_signature[index]
         return self.get_input_debug_name(index)
 
diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp
index 65ecfe021c5b7e..f0a997b6f8fa3a 100644
--- a/src/frontends/pytorch/src/op/add.cpp
+++ b/src/frontends/pytorch/src/op/add.cpp
@@ -26,7 +26,7 @@ OutputVector translate_add(const NodeContext& context) {
         // Case when two lists gets concatenated
         FRONT_END_OP_CONVERSION_CHECK(false, "aten::add is used for concatenation of lists, not possible to convert");
     }
-    align_eltwise_input_types(context, lhs, rhs);
+    align_eltwise_input_types(context, lhs, rhs, true);
     if (!context.input_is_none(2)) {
         auto converted_alpha = context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(context.get_input(2), rhs));
         rhs = context.mark_node(std::make_shared<ov::op::v1::Multiply>(converted_alpha, rhs));
diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp
index 029b349c77bac2..bf83fa95ac3506 100644
--- a/src/frontends/pytorch/src/utils.hpp
+++ b/src/frontends/pytorch/src/utils.hpp
@@ -92,7 +92,7 @@ OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& contex
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None.");
     auto lhs = context.get_input(0);
     auto rhs = context.get_input(1);
-    align_eltwise_input_types(context, lhs, rhs);
+    align_eltwise_input_types(context, lhs, rhs, true);
     return {context.mark_node(std::make_shared<T>(lhs, rhs))};
 }
 
diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
index eec93596006522..a5ad7748f91737 100644
--- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
+++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
@@ -52,15 +52,8 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti
                 inp, np.ndarray) else inp for inp in inputs]
             trace_model = kwargs.get('trace_model', False)
             freeze_model = kwargs.get('freeze_model', True)
-            use_mo_convert = kwargs.get("use_mo_convert", True)
-            if not freeze_model or not use_mo_convert:
-                model, converted_model = self.convert_directly_via_frontend(
-                    model, torch_inputs, trace_model, dynamic_shapes, inputs, freeze_model)
-            else:
-                model, converted_model = self.convert_via_mo(
-                    model, torch_inputs, trace_model, dynamic_shapes, inputs)
+            model, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, inputs, freeze_model)
             graph = model.inlined_graph
-            print(graph)
 
             if kind is not None and not isinstance(kind, (tuple, list)):
                 kind = [kind]
@@ -181,6 +174,7 @@ def convert_directly_via_frontend(self, model, example_input, trace_model, dynam
                 model = torch.jit.trace(model, example_input)
             else:
                 model = torch.jit.script(model)
+        print(model.inlined_graph)
         decoder = TorchScriptPythonDecoder(model, freeze=freeze_model)
         im = fe.load(decoder)
         om = fe.convert(im)

From 55e9cae54ff46cf72b30d44eae57803b1b704ad3 Mon Sep 17 00:00:00 2001
From: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
Date: Tue, 28 Mar 2023 16:50:02 +0200
Subject: [PATCH 129/296] [GNA] Pre/Post processing via evaluate (#15691)

---
 .../intel_gna/src/descriptions/gna_desc.hpp   |   3 +
 src/plugins/intel_gna/src/gna_data_types.hpp  |  17 +-
 .../intel_gna/src/gna_graph_patterns.hpp      |   3 +
 src/plugins/intel_gna/src/gna_plugin.cpp      | 257 +++++++++++-------
 src/plugins/intel_gna/src/gna_plugin.hpp      |  20 +-
 .../src/gna_transformations_pipeline.cpp      |   3 +-
 .../src/gna_transformations_pipeline.hpp      |   4 +-
 .../intel_gna/src/memory/gna_memory_state.cpp |  12 +-
 .../src/optimizer/gna_pass_manager.cpp        |   2 +
 .../{ => pre_post_process}/preprocessing.cpp  |   2 +
 .../{ => pre_post_process}/preprocessing.hpp  |   4 +-
 .../pre_post_process/transposition_info.cpp   | 101 +++++++
 .../pre_post_process/transposition_info.hpp   |  60 ++++
 .../src/{ => serial}/gna_model_serial.cpp     | 151 +++++-----
 .../src/{ => serial}/gna_model_serial.hpp     |  17 +-
 .../serial/headers/2dot8/gna_model_header.hpp |   2 +-
 .../serial/headers/2dot9/gna_model_header.hpp | 223 +++++++++++++++
 .../headers/latest/gna_model_header.hpp       |   6 +-
 .../tests/unit/gna_model_serial_test.cpp      |   2 +-
 19 files changed, 686 insertions(+), 203 deletions(-)
 rename src/plugins/intel_gna/src/{ => pre_post_process}/preprocessing.cpp (94%)
 rename src/plugins/intel_gna/src/{ => pre_post_process}/preprocessing.hpp (92%)
 create mode 100644 src/plugins/intel_gna/src/pre_post_process/transposition_info.cpp
 create mode 100644 src/plugins/intel_gna/src/pre_post_process/transposition_info.hpp
 rename src/plugins/intel_gna/src/{ => serial}/gna_model_serial.cpp (86%)
 rename src/plugins/intel_gna/src/{ => serial}/gna_model_serial.hpp (91%)
 create mode 100644 src/plugins/intel_gna/src/serial/headers/2dot9/gna_model_header.hpp

diff --git a/src/plugins/intel_gna/src/descriptions/gna_desc.hpp b/src/plugins/intel_gna/src/descriptions/gna_desc.hpp
index 503e0b549e28f4..92ffcfdb24d66a 100644
--- a/src/plugins/intel_gna/src/descriptions/gna_desc.hpp
+++ b/src/plugins/intel_gna/src/descriptions/gna_desc.hpp
@@ -37,6 +37,9 @@ struct GnaDesc {
     uint32_t allocated_size = 0;
     std::vector<void*> ptrs = {};  // ptr per each infer request
 
+    // pre/post processing model
+    std::shared_ptr<ov::Model> pre_post_process_model = nullptr;
+
     // help methods
     uint32_t get_required_size() const {
         return num_elements * tensor_precision.size();
diff --git a/src/plugins/intel_gna/src/gna_data_types.hpp b/src/plugins/intel_gna/src/gna_data_types.hpp
index fa146130f0a432..d2e276b35a6544 100644
--- a/src/plugins/intel_gna/src/gna_data_types.hpp
+++ b/src/plugins/intel_gna/src/gna_data_types.hpp
@@ -16,22 +16,6 @@
 #include "layers/gna_split_layer.hpp"
 #include "memory/gna_memory.hpp"
 
-struct TranspositionInfo {
-    bool transpose;
-    size_t num_transpose_rows;
-    size_t num_transpose_columns;
-};
-
-using TranspositionInfoMap = std::map<std::string, std::vector<TranspositionInfo>>;
-
-static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo>& transpositionInfo) {
-    auto partToTranspose =
-        std::find_if(std::begin(transpositionInfo), std::end(transpositionInfo), [](const TranspositionInfo& infoPart) {
-            return infoPart.transpose;
-        });
-    return partToTranspose != std::end(transpositionInfo);
-}
-
 namespace ov {
 namespace intel_gna {
 
@@ -45,6 +29,7 @@ using ConcatConnection = std::unordered_map<std::string, GNAConcatLayer>;
 using SplitConnection = std::unordered_map<std::string, GNASplitLayer>;
 using CropConnection = std::unordered_map<std::string, GNACropLayer>;
 using ConstConnections = std::unordered_map<std::string, void*>;
+using PrePostProcessModels = std::unordered_map<std::string, std::shared_ptr<ov::Model>>;
 
 }  // namespace intel_gna
 }  // namespace ov
diff --git a/src/plugins/intel_gna/src/gna_graph_patterns.hpp b/src/plugins/intel_gna/src/gna_graph_patterns.hpp
index 72031f72817a57..fdc5feb523ab47 100644
--- a/src/plugins/intel_gna/src/gna_graph_patterns.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_patterns.hpp
@@ -12,10 +12,13 @@
 #include "layers/gna_layer_info.hpp"
 #include "log/debug.hpp"
 #include "ops/util/util.hpp"
+#include "pre_post_process/transposition_info.hpp"
 
 namespace ov {
 namespace intel_gna {
 
+using TranspositionInfo = pre_post_processing::TranspositionInfo;
+
 /**
  * @brief checks if it's a reshape from 4d to 3d tensor
  * @param layer Non-functional layer
diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp
index 85977792ecea06..e4d69a035d1652 100644
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@@ -39,7 +39,6 @@
 #include "gna_fused_iterator.hpp"
 #include "gna_graph_patterns.hpp"
 #include "gna_itt.hpp"
-#include "gna_model_serial.hpp"
 #include "gna_plugin_config.hpp"
 #include "gna_tensor_tools.hpp"
 #include "gna_transformations_pipeline.hpp"
@@ -47,12 +46,14 @@
 #include "log/log.hpp"
 #include "memory/gna_memory_state.hpp"
 #include "orientation_helper.hpp"
-#include "preprocessing.hpp"
+#include "pre_post_process/preprocessing.hpp"
+#include "pre_post_process/transposition_info.hpp"
 #include "request/model_wrapper_factory.hpp"
 #include "request/worker_factory.hpp"
 #include "request/worker_pool_impl.hpp"
 #include "runtime/gna_float_runtime.hpp"
 #include "scale_factor_helper.hpp"
+#include "serial/gna_model_serial.hpp"
 
 using namespace ov::intel_gna::ngraph_util;
 
@@ -81,6 +82,7 @@ using namespace InferenceEngine::details;
 
 using namespace ov::intel_gna::memory;
 using namespace ov::intel_gna::frontend;
+using namespace ov::intel_gna::pre_post_processing;
 
 namespace InferenceEngine {
 template <>
@@ -341,6 +343,27 @@ void GNAPlugin::ImportFrames(void* ptr_dst,
     }
 }
 
+void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
+                               InferenceEngine::Blob::Ptr output_blob,
+                               std::shared_ptr<ov::Model> model) {
+    const ov::element::Type input_type = details::convertPrecision(input_blob->getTensorDesc().getPrecision());
+    const ov::element::Type output_type = details::convertPrecision(output_blob->getTensorDesc().getPrecision());
+    const ov::Shape& input_shape = input_blob->getTensorDesc().getDims();
+    const ov::Shape& output_shape = output_blob->getTensorDesc().getDims();
+
+    for (auto param : model->get_parameters()) {
+        param->set_element_type(input_type);
+    }
+    model->validate_nodes_and_infer_types();
+
+    ov::TensorVector inputs = {ov::Tensor(input_type, input_shape, input_blob->cbuffer().as<void*>())};
+    ov::TensorVector results = {ov::Tensor(output_type, output_shape, output_blob->buffer().as<void*>())};
+
+    if (!model->evaluate(results, inputs)) {
+        THROW_GNA_EXCEPTION << "Failed to evaluate model " << model->get_friendly_name() << std::endl;
+    }
+}
+
 GNAPlugin::GNAPlugin() : graphCompiler(config) {
     Init();
     UpdateFieldsFromConfig();
@@ -463,6 +486,12 @@ void GNAPlugin::UpdateInputs(const std::vector<std::shared_ptr<const ov::Node>>&
         const std::string ie_name = param->get_friendly_name();
         (*inputs_ptr_)[ie_name].name = param->get_friendly_name();
         (*inputs_ptr_)[ie_name].tensor_names = param->get_output_tensor(0).get_names();
+
+        // find pre-processing model
+        auto subgraph_it = m_input_output_subgraphs.find(ie_name);
+        if (subgraph_it != m_input_output_subgraphs.end()) {
+            (*inputs_ptr_)[ie_name].pre_post_process_model = subgraph_it->second;
+        }
     }
 }
 
@@ -472,6 +501,12 @@ void GNAPlugin::UpdateOutputs(const std::vector<std::shared_ptr<const ov::Node>>
         const std::string ie_name = ov::op::util::create_ie_output_name(result->input_value(0));
         outputs_[ie_name].name = ie_name;
         outputs_[ie_name].tensor_names = result->get_output_tensor(0).get_names();
+
+        // find postprocessing model
+        auto subgraph_it = m_input_output_subgraphs.find(ie_name);
+        if (subgraph_it != m_input_output_subgraphs.end()) {
+            outputs_[ie_name].pre_post_process_model = subgraph_it->second;
+        }
     }
 }
 
@@ -668,7 +703,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
     if (_network.getFunction()) {
         CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
         auto model = clonedNetwork.getFunction();
-        transformer.apply(model);
+        transformer.apply(model, &m_input_output_subgraphs);
         limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
         convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
     }
@@ -679,6 +714,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
     transformer.convert_precision_legacy(network);
 
     //  Check the network
+
     std::string error;
     if (!limitations::AreLayersSupported(network, error)) {
         THROW_GNA_EXCEPTION << error.c_str();
@@ -951,6 +987,15 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
                  {TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}});
         }
     }
+
+    // TODO: Need to remove this conversion when ngraph NCHW->NHWC transformation is enabled
+    if (!transpose_inputs_info.empty()) {
+        ConvertTransposeMapToModel(transpose_inputs_info, inputs_ptr_->Get());
+    }
+    if (!transpose_outputs_info.empty()) {
+        ConvertTransposeMapToModel(transpose_outputs_info, outputs_.Get());
+    }
+
     DumpXNNToFile();
 
 #ifdef PLOT
@@ -1072,40 +1117,42 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
 
     int inputNum = 0;
     for (auto& input : inputs) {
-        auto inputLayout = input.second->getTensorDesc().getLayout();
-        if (inputLayout != InferenceEngine::Layout::C && inputLayout != InferenceEngine::Layout::NC &&
-            inputLayout != InferenceEngine::Layout::CN && inputLayout != InferenceEngine::Layout::CHW &&
-            inputLayout != InferenceEngine::Layout::NCHW) {
+        std::string input_name = input.first;
+        InferenceEngine::Layout input_layout = input.second->getTensorDesc().getLayout();
+
+        if (input_layout != InferenceEngine::Layout::C && input_layout != InferenceEngine::Layout::NC &&
+            input_layout != InferenceEngine::Layout::CN && input_layout != InferenceEngine::Layout::CHW &&
+            input_layout != InferenceEngine::Layout::NCHW) {
             THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
                                    "Layout::CHW. But was: "
-                                << input.second->getTensorDesc().getLayout();
+                                << input_layout;
         }
 
-        if (inputLayout == InferenceEngine::Layout::NCHW || inputLayout == InferenceEngine::Layout::CHW) {
+        if (input_layout == InferenceEngine::Layout::NCHW || input_layout == InferenceEngine::Layout::CHW) {
             // specific case that can be squeezed to 2d
-            inputLayout = InferenceEngine::Layout::NC;
+            input_layout = InferenceEngine::Layout::NC;
         }
 
-        auto is1D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
-        auto is3D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
+        auto is1D = input_layout == InferenceEngine::Layout::C;
+        auto is3D = input_layout == InferenceEngine::Layout::CHW;
 
-        if (inputs_ptr_->at(input.first).ptrs.empty()) {
+        if (inputs_ptr_->at(input_name).ptrs.empty()) {
             // should not happen in user code however might happen if there any non executable network based integration
             // of GNAPlugin instance
-            THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input.first << " not set";
+            THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input_name << " not set";
         }
 
-        if (inputs_ptr_->at(input.first).ptrs[index] == nullptr) {
+        if (inputs_ptr_->at(input_name).ptrs[index] == nullptr) {
             // should not happen in user code however might happen if there any non executable network based integration
             // of GNAPlugin instance
-            THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input.first << " at inferRequest #"
+            THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input_name << " at inferRequest #"
                                 << index << " not set";
         }
-        const auto inputOrientation = inputs_ptr_->at(input.first).orientation;
+        const auto inputOrientation = inputs_ptr_->at(input_name).orientation;
         if (inputOrientation == kDnnUnknownOrientation) {
             // should not happen in user code however might happen if there any non executable network based integration
             // of GNAPlugin instance
-            THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input.first << " not set";
+            THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input_name << " not set";
         }
 
         for (auto& output : outputs_.Get()) {
@@ -1125,43 +1172,49 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
         auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : (gnaFlags->input_low_precision ? 1 : 2);
         auto importedBytes = importedElements * importedFrames * importedElementSizeBytes;
 
-        if (inputs_ptr_->at(input.first).get_required_size() < importedBytes) {
-            THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input.first
-                                << ", allocated size: " << inputs_ptr_->at(input.first).get_required_size()
+        if (inputs_ptr_->at(input_name).get_required_size() < importedBytes) {
+            THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input_name
+                                << ", allocated size: " << inputs_ptr_->at(input_name).get_required_size()
                                 << ", but input blob size: " << importedBytes;
         }
 
-        ImportFrames(inputs_ptr_->at(input.first).ptrs[index],
+        // Perform pre-processing on CPU.
+        // When we need to perform pre-processing on CPU using ngraph model we copy user input to the buffer,
+        // then set preprocessing output blob as gna input blob.
+        std::shared_ptr<ov::Model> model = inputs_ptr_->at(input_name).pre_post_process_model;
+        Blob::Ptr buff_blob = nullptr;
+        TensorDesc buff_tensor_desc(input.second->getTensorDesc());
+        buff_tensor_desc.setPrecision(inputs_ptr_->at(input_name).tensor_precision);
+
+        if (model) {
+            // WA: evaluate gather with int16 precision as fp16
+            if (buff_tensor_desc.getPrecision() == Precision::I16) {
+                buff_tensor_desc.setPrecision(Precision::FP16);
+            }
+            buff_blob = make_blob_with_precision(buff_tensor_desc);
+            buff_blob->allocate();
+        } else {
+            buff_blob = make_blob_with_precision(buff_tensor_desc, inputs_ptr_->at(input_name).ptrs[index]);
+        }
+
+        ImportFrames(buff_blob->buffer(),
                      input.second->cbuffer().as<float*>(),
                      input.second->getTensorDesc().getPrecision(),
-                     gnaFlags->sw_fp32 ? kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
+                     gnaFlags->sw_fp32 ? kScaleFactorDefault : inputs_ptr_->at(input_name).scale_factor,
                      inputOrientation,
                      importedFrames,
                      targetGroups,
                      importedElements,
                      importedElements);
 
-        auto transpose_info = transpose_inputs_info.find(input.first);
-        if (transpose_info != std::end(transpose_inputs_info)) {
-            size_t batchSize = (dims.size() > 1) ? dims[0] : 1;
-            size_t elementsPerBatch = (dims.size() > 1) ? InferenceEngine::details::product(dims) / dims[0] : dims[0];
-            size_t transposed_data_size = 0;
-            for (const auto& part_transposition_info : transpose_info->second) {
-                transposed_data_size +=
-                    part_transposition_info.num_transpose_rows * part_transposition_info.num_transpose_columns;
-            }
-            if (elementsPerBatch != transposed_data_size) {
-                THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size
-                                    << ") do not match input buffer length of " << elementsPerBatch;
-            }
-            auto input_ptr = reinterpret_cast<uint8_t*>(inputs_ptr_->at(input.first).ptrs[index]);
-            ConvertTensorFromNCHWToNHWC(gnadevice ? 2 : 4,
-                                        batchSize,
-                                        elementsPerBatch,
-                                        input_ptr,
-                                        true,
-                                        transpose_info->second);
+        if (model) {
+            Precision output_prc = buff_blob->getTensorDesc().getPrecision();
+            SizeVector output_dims = model->get_result()->get_shape();
+            TensorDesc output_desc(output_prc, output_dims, InferenceEngine::Layout::ANY);
+            Blob::Ptr output_blob = make_blob_with_precision(output_desc, inputs_ptr_->at(input_name).ptrs[index]);
+            PrePostProcess(buff_blob, output_blob, model);
         }
+
         ++inputNum;
     }
 
@@ -1229,58 +1282,64 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
     dnn->WriteInputAndOutputTextGNA(*worker.model());
 #endif
     for (auto&& outputBlobIt : requestResult) {
-        auto& outputBlob = outputBlobIt.second;
-        auto& outputDesc = outputs_.at(outputBlobIt.first);
-        if (outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::C &&
-            outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NC &&
-            outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CN &&
-            outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NCHW &&
-            outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CHW &&
-            outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR) {
+        const std::string& output_name = outputBlobIt.first;
+        Blob::Ptr output_blob = outputBlobIt.second;
+        const InferenceEngine::Layout output_layout = output_blob->getTensorDesc().getLayout();
+
+        if (output_layout != InferenceEngine::Layout::C && output_layout != InferenceEngine::Layout::NC &&
+            output_layout != InferenceEngine::Layout::CN && output_layout != InferenceEngine::Layout::NCHW &&
+            output_layout != InferenceEngine::Layout::CHW && output_layout != InferenceEngine::Layout::SCALAR) {
             THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
                                    "Layout::CHW. But was "
-                                << outputBlob->getTensorDesc().getLayout();
+                                << output_layout;
         }
 
-        auto dims = outputBlob->getTensorDesc().getDims();
-        auto is1D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
-        auto isScalar = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::SCALAR;
-        auto is3D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
+        auto dims = output_blob->getTensorDesc().getDims();
+        auto is1D = output_layout == InferenceEngine::Layout::C;
+        auto isScalar = output_layout == InferenceEngine::Layout::SCALAR;
+        auto is3D = output_layout == InferenceEngine::Layout::CHW;
         auto batchSize = (is1D || isScalar || is3D) ? 1 : dims[0];
         auto elementsPerBatch =
-            isScalar ? 1
-                     : (is1D ? dims.front() : InferenceEngine::details::product(++std::begin(dims), std::end(dims)));
-
-        auto transpose_output_info = transpose_outputs_info.find(outputBlobIt.first);
-        if (transpose_output_info != std::end(transpose_outputs_info) &&
-            FoundPartToTranspose(transpose_output_info->second)) {
-            size_t transposed_data_size = 0;
-            for (const auto& part_transposition_info : transpose_output_info->second) {
-                transposed_data_size +=
-                    part_transposition_info.num_transpose_rows * part_transposition_info.num_transpose_columns;
-            }
-            if (elementsPerBatch != transposed_data_size) {
-                THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size
-                                    << ") do not match output buffer length of " << elementsPerBatch;
-            }
-            ConvertTensorFromNCHWToNHWC(outputDesc.tensor_precision.size(),
-                                        batchSize,
-                                        elementsPerBatch,
-                                        reinterpret_cast<uint8_t*>(outputDesc.ptrs[request_idx]),
-                                        true,
-                                        transpose_output_info->second);
+            isScalar ? 1 : (is1D ? dims.front() : details::product(++std::begin(dims), std::end(dims)));
+
+        OutputDesc& gna_output_desc = outputs_.at(output_name);
+        Blob::Ptr gna_output_blob = nullptr;
+
+        // Perform postprocessing on CPU
+        std::shared_ptr<ov::Model> model = gna_output_desc.pre_post_process_model;
+        if (model) {
+            // WA: evaluate gather with int16 precision as fp16
+            Precision preproc_prc = (gna_output_desc.tensor_precision == Precision::I16)
+                                        ? Precision(Precision::FP16)
+                                        : gna_output_desc.tensor_precision;
+            const SizeVector& input_dims = model->get_parameters().front()->get_shape();
+            TensorDesc input_desc(preproc_prc, input_dims, InferenceEngine::Layout::ANY);
+            Blob::Ptr input_blob = make_blob_with_precision(input_desc, gna_output_desc.ptrs[request_idx]);
+
+            const SizeVector& output_dims = model->get_result()->get_shape();
+            TensorDesc output_desc(preproc_prc, output_dims, InferenceEngine::Layout::ANY);
+            gna_output_blob = make_blob_with_precision(output_desc);
+            gna_output_blob->allocate();
+
+            PrePostProcess(input_blob, gna_output_blob, model);
+        } else {
+            log::debug() << "Postprocessing for output " << output_name << " is not required" << std::endl;
+            TensorDesc output_desc(gna_output_desc.tensor_precision,
+                                   gna_output_desc.dims,
+                                   gna_output_desc.model_layout);
+            gna_output_blob = make_blob_with_precision(output_desc, gna_output_desc.ptrs[request_idx]);
         }
 
-        ExportScores(outputBlob->buffer(),
-                     outputDesc.ptrs[request_idx],
-                     outputDesc.orientation,
+        ExportScores(output_blob->buffer(),
+                     gna_output_blob->cbuffer(),
+                     gna_output_desc.orientation,
                      batchSize,
                      batchSize,
                      elementsPerBatch,
                      elementsPerBatch,
                      elementsPerBatch,
-                     outputDesc.tensor_precision,
-                     outputDesc.model_precision);
+                     gna_output_desc.tensor_precision,
+                     gna_output_desc.model_precision);
 
         if (gnadevice) {
 #ifdef PLOT
@@ -1295,11 +1354,11 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
             num_infers++;
             if (f) {
                 if (isScalar) {
-                    fprintf(f, "%d ", outputBlob->cbuffer().as<int32_t*>()[0]);
+                    fprintf(f, "%d ", output_blob->cbuffer().as<int32_t*>()[0]);
                 } else {
                     for (int i = 0; i < batchSize; i++) {
                         for (int j = 0; j < dims[dims.size() - 1]; j++) {
-                            fprintf(f, "%d ", outputBlob->cbuffer().as<int32_t*>()[dims[dims.size() - 1] * i + j]);
+                            fprintf(f, "%d ", output_blob->cbuffer().as<int32_t*>()[dims[dims.size() - 1] * i + j]);
                         }
                         fprintf(f, "\n");
                     }
@@ -1307,25 +1366,25 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
                 fprintf(f, "\n\n");
             }
 #endif
-            switch (outputBlob->getTensorDesc().getPrecision()) {
+            switch (output_blob->getTensorDesc().getPrecision()) {
             case InferenceEngine::Precision::FP32:
-                UnscaleAndCast(outputBlob->buffer().as<float*>(),
-                               outputBlob->buffer().as<int32_t*>(),
+                UnscaleAndCast(output_blob->buffer().as<float*>(),
+                               output_blob->buffer().as<int32_t*>(),
                                elementsPerBatch,
                                batchSize,
-                               outputDesc.scale_factor);
+                               gna_output_desc.scale_factor);
                 break;
 
             case InferenceEngine::Precision::I32:
-                UnscaleAndCast(outputBlob->buffer().as<int32_t*>(),
-                               outputBlob->buffer().as<int32_t*>(),
+                UnscaleAndCast(output_blob->buffer().as<int32_t*>(),
+                               output_blob->buffer().as<int32_t*>(),
                                elementsPerBatch,
                                batchSize,
-                               outputDesc.scale_factor);
+                               gna_output_desc.scale_factor);
                 break;
 
             default:
-                THROW_GNA_EXCEPTION << "Unsupported target precision: " << outputBlob->getTensorDesc().getPrecision()
+                THROW_GNA_EXCEPTION << "Unsupported target precision: " << output_blob->getTensorDesc().getPrecision()
                                     << std::endl;
                 break;
             }
@@ -1333,12 +1392,12 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
 #ifdef PLOT
             if (f) {
                 if (isScalar) {
-                    fprintf(f, "%.7f ", outputBlob->cbuffer().as<float*>()[0]);
+                    fprintf(f, "%.7f ", output_blob->cbuffer().as<float*>()[0]);
                 } else {
-                    auto dims = outputBlob->getTensorDesc().getDims();
+                    auto dims = output_blob->getTensorDesc().getDims();
                     for (int i = 0; i < batchSize; i++) {
                         for (int j = 0; j < dims[dims.size() - 1]; j++) {
-                            fprintf(f, "%.7f ", outputBlob->cbuffer().as<float*>()[dims[dims.size() - 1] * i + j]);
+                            fprintf(f, "%.7f ", output_blob->cbuffer().as<float*>()[dims[dims.size() - 1] * i + j]);
                         }
                         fprintf(f, "\n");
                     }
@@ -1503,6 +1562,14 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
         }
     }
 
+    //  Support model versions <= 2.8
+    if (!transpose_inputs_info.empty()) {
+        ConvertTransposeMapToModel(transpose_inputs_info, inputs_ptr_->Get());
+    }
+    if (!transpose_outputs_info.empty()) {
+        ConvertTransposeMapToModel(transpose_outputs_info, outputs_.Get());
+    }
+
     for (auto&& memory : mt) {
         GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
         std::string name;
diff --git a/src/plugins/intel_gna/src/gna_plugin.hpp b/src/plugins/intel_gna/src/gna_plugin.hpp
index f046afef4c9092..817f94fd1d6a49 100644
--- a/src/plugins/intel_gna/src/gna_plugin.hpp
+++ b/src/plugins/intel_gna/src/gna_plugin.hpp
@@ -27,11 +27,11 @@
 #include "gna_plugin_config.hpp"
 #include "log/debug.hpp"
 #include "log/log.hpp"
+#include "pre_post_process/transposition_info.hpp"
 
 namespace ov {
 namespace intel_gna {
 namespace request {
-
 class ModelWrapper;
 class WorkerPool;
 class Worker;
@@ -51,8 +51,11 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
     GNAGraphCompiler graphCompiler;
 
     uint32_t activeLayerIndex = 0xffffffff;
-    TranspositionInfoMap transpose_inputs_info;
-    TranspositionInfoMap transpose_outputs_info;
+    // TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
+    // migration is finished. Those structures are needed to support the exported models <= 2.8.
+    pre_post_processing::TranspositionInfoMap transpose_inputs_info;
+    pre_post_processing::TranspositionInfoMap transpose_outputs_info;
+    PrePostProcessModels m_input_output_subgraphs;
 
     uint32_t dnn_dump_write_index = 0;
     intel_dnn_number_type_t output_type = kDnnInt;
@@ -188,6 +191,17 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
     void InitGNADevice();
 
     void DumpXNNToFile() const;
+    /**
+     * @brief Run ngraph model on CPU to modify input or output (transposing, gathering)
+     * Method supports only models with 1 input and 1 output.
+     * @param input_blob input blob memory
+     * @param output_blob output blob memory
+     * @param model ngraph function needs to be executed to modify input blob and put result to the output blob
+     * @return void
+     */
+    void PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
+                        InferenceEngine::Blob::Ptr output_blob,
+                        std::shared_ptr<ov::Model> model);
 
     void ImportFrames(void* ptr_dst,
                       const void* ptr_src,
diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
index 6aa47cabe3dbb4..a615418c4dcb60 100644
--- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
+++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
@@ -57,7 +57,8 @@
 namespace ov {
 namespace intel_gna {
 
-void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model) {
+void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
+                                    ov::intel_gna::PrePostProcessModels* subgraph_cpu_map) {
     OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply");
 
     fake_quantized = ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp
index ecf6462397d3b2..97ec483bcc40ad 100644
--- a/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp
+++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp
@@ -7,6 +7,7 @@
 #include <memory>
 
 #include "cpp/ie_cnn_network.h"
+#include "gna_data_types.hpp"
 #include "gna_plugin_config.hpp"
 #include "openvino/core/model.hpp"
 
@@ -18,7 +19,8 @@ class TransformationsPipeline {
     explicit TransformationsPipeline(const Config& config) : config(config) {
         effective_compile_target = config.target->get_effective_compile_target();
     }
-    void apply(const std::shared_ptr<ov::Model>& model);
+    void apply(const std::shared_ptr<ov::Model>& model,
+               ov::intel_gna::PrePostProcessModels* subgraph_cpu_map = nullptr);
     IE_SUPPRESS_DEPRECATED_START
     void apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy);
     void convert_precision_legacy(InferenceEngine::CNNNetwork& network);
diff --git a/src/plugins/intel_gna/src/memory/gna_memory_state.cpp b/src/plugins/intel_gna/src/memory/gna_memory_state.cpp
index fd1e246c7ac541..94574fea65d252 100644
--- a/src/plugins/intel_gna/src/memory/gna_memory_state.cpp
+++ b/src/plugins/intel_gna/src/memory/gna_memory_state.cpp
@@ -7,7 +7,7 @@
 #include "frontend/quantized_layer_params.hpp"
 #include "gna_graph_tools.hpp"
 #include "ie_layouts.h"
-#include "preprocessing.hpp"
+#include "pre_post_process/preprocessing.hpp"
 
 namespace ov {
 namespace intel_gna {
@@ -70,11 +70,11 @@ void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
             auto quantized =
                 InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
             auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
-            ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
-                           newState->buffer().as<float*>(),
-                           1,
-                           data_elements,
-                           scale_factor);
+            pre_post_processing::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
+                                                newState->buffer().as<float*>(),
+                                                1,
+                                                data_elements,
+                                                scale_factor);
         } else {
             THROW_GNA_EXCEPTION
                 << "Failed to SetState for VariableState " << name
diff --git a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
index 58c98eea18b2c8..e4c4437c62417d 100644
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
@@ -43,11 +43,13 @@
 #include "layers/gna_layer_info.hpp"
 #include "log/debug.hpp"
 #include "log/log.hpp"
+#include "pre_post_process/transposition_info.hpp"
 
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 using namespace ov::intel_gna::frontend;
 using namespace ov::intel_gna::common;
+using namespace ov::intel_gna::pre_post_processing;
 
 namespace ov {
 namespace intel_gna {
diff --git a/src/plugins/intel_gna/src/preprocessing.cpp b/src/plugins/intel_gna/src/pre_post_process/preprocessing.cpp
similarity index 94%
rename from src/plugins/intel_gna/src/preprocessing.cpp
rename to src/plugins/intel_gna/src/pre_post_process/preprocessing.cpp
index f494468021b178..0d5c9d2fef1e59 100644
--- a/src/plugins/intel_gna/src/preprocessing.cpp
+++ b/src/plugins/intel_gna/src/pre_post_process/preprocessing.cpp
@@ -6,6 +6,7 @@
 
 namespace ov {
 namespace intel_gna {
+namespace pre_post_processing {
 
 int16_t ConvertFloatToInt16(float src) {
     float rounding_value = (src > 0) ? 0.5f : -0.5f;
@@ -42,5 +43,6 @@ void ConvertToInt16(int16_t* ptr_dst,
     }
 }
 
+}  // namespace pre_post_processing
 }  // namespace intel_gna
 }  // namespace ov
diff --git a/src/plugins/intel_gna/src/preprocessing.hpp b/src/plugins/intel_gna/src/pre_post_process/preprocessing.hpp
similarity index 92%
rename from src/plugins/intel_gna/src/preprocessing.hpp
rename to src/plugins/intel_gna/src/pre_post_process/preprocessing.hpp
index 10eb9f9ccbff51..5fc97c5449f2a9 100644
--- a/src/plugins/intel_gna/src/preprocessing.hpp
+++ b/src/plugins/intel_gna/src/pre_post_process/preprocessing.hpp
@@ -8,6 +8,7 @@
 
 namespace ov {
 namespace intel_gna {
+namespace pre_post_processing {
 
 void ConvertToInt16(int16_t* ptr_dst,
                     const float* ptr_src,
@@ -36,5 +37,6 @@ inline void UnscaleAndCast(T2* ptr_dst,
     }
 }
 
+}  // namespace pre_post_processing
 }  // namespace intel_gna
-}  // namespace ov
+}  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_gna/src/pre_post_process/transposition_info.cpp b/src/plugins/intel_gna/src/pre_post_process/transposition_info.cpp
new file mode 100644
index 00000000000000..583e30290dd2c2
--- /dev/null
+++ b/src/plugins/intel_gna/src/pre_post_process/transposition_info.cpp
@@ -0,0 +1,101 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "transposition_info.hpp"
+
+#include <vector>
+
+#include "log/debug.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/core/shape.hpp"
+#include "openvino/opsets/opset10.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace pre_post_processing {
+
+using namespace ov::opset10;
+
+std::shared_ptr<ov::Model> ToProcessModel(const TranspositionInfo& t_info) {
+    int32_t c_size = t_info.num_transpose_rows;
+    int32_t hw_size = t_info.num_transpose_columns;
+
+    if (!t_info.transpose) {
+        return nullptr;
+    }
+
+    ov::PartialShape input_shape{1, c_size, hw_size};
+    auto param = std::make_shared<Parameter>(ov::element::f32, input_shape);
+
+    // legacy way was to swap C and HW dimensions in the reshaped tensor
+    std::vector<int32_t> reshape_pattern{-1, c_size, hw_size};
+    auto reshape_const =
+        std::make_shared<Constant>(ov::element::i32, ov::Shape{reshape_pattern.size()}, reshape_pattern);
+    auto reshape = std::make_shared<Reshape>(param, reshape_const, false);
+
+    // CHW -> HWC or HWC -> CHW
+    std::vector<int8_t> transpose_order{0, 2, 1};
+    auto transpose_const =
+        std::make_shared<Constant>(ov::element::i8, ov::Shape{transpose_order.size()}, transpose_order);
+    auto transpose = std::make_shared<Transpose>(reshape, transpose_const);
+
+    auto result = std::make_shared<Result>(transpose);
+
+    std::shared_ptr<ov::Model> model =
+        std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+
+    return model;
+}
+
+std::shared_ptr<ov::Model> ToProcessModel(const std::vector<TranspositionInfo>& transposes) {
+    // count transposition parts need to be transposed
+    int count_transposes = std::count_if(transposes.begin(), transposes.end(), [](TranspositionInfo t_info) {
+        return t_info.transpose || t_info.num_transpose_rows != 1 || t_info.num_transpose_columns != 1;
+    });
+    if (count_transposes == 0) {
+        return nullptr;
+    }
+
+    // case when the input should be transposed entirely
+    if (transposes.size() == 1) {
+        return ToProcessModel(transposes.front());
+    }
+
+    std::vector<int32_t> indexes = {};
+    for (auto& transpose : transposes) {
+        size_t c_size = transpose.num_transpose_rows;
+        size_t hw_size = transpose.num_transpose_columns;
+        if (c_size == 0 || hw_size == 0) {
+            THROW_GNA_EXCEPTION << "Incorrect transposition dimentions";
+        }
+        size_t chw_size = c_size * hw_size;
+        size_t id = indexes.size();
+        for (size_t i{0}; i < chw_size; ++i) {
+            size_t idx = (transpose.transpose) ? hw_size * (i % c_size) + i / c_size : i;
+            indexes.push_back(id + idx);
+        }
+    }
+
+    auto param = std::make_shared<Parameter>(ov::element::f32, ov::Shape{1, indexes.size()});
+    // legacy way was to swap C and HW dimensions in the reshaped tensor
+    std::vector<int32_t> reshape_pattern{-1, static_cast<int32_t>(indexes.size())};
+    auto reshape_const =
+        std::make_shared<Constant>(ov::element::i32, ov::Shape{reshape_pattern.size()}, reshape_pattern);
+    auto reshape = std::make_shared<Reshape>(param, reshape_const, false);
+
+    // CHW -> HWC or HWC -> CHW
+    auto gather_indexes = std::make_shared<Constant>(ov::element::i32, ov::Shape{indexes.size()}, indexes);
+    auto gather_axis = std::make_shared<Constant>(ov::element::i8, ov::Shape{1}, std::vector<int8_t>{1});
+    auto gather = std::make_shared<Gather>(reshape, gather_indexes, gather_axis);
+
+    auto result = std::make_shared<Result>(gather);
+
+    std::shared_ptr<ov::Model> model =
+        std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+
+    return model;
+}
+
+}  // namespace pre_post_processing
+}  // namespace intel_gna
+}  // namespace ov
diff --git a/src/plugins/intel_gna/src/pre_post_process/transposition_info.hpp b/src/plugins/intel_gna/src/pre_post_process/transposition_info.hpp
new file mode 100644
index 00000000000000..234c559ff25871
--- /dev/null
+++ b/src/plugins/intel_gna/src/pre_post_process/transposition_info.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "openvino/core/model.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace pre_post_processing {
+
+struct TranspositionInfo {
+    bool transpose;
+    size_t num_transpose_rows;
+    size_t num_transpose_columns;
+};
+
+using TranspositionInfoMap = std::map<std::string, std::vector<TranspositionInfo>>;
+
+/*
+ * Converts TranspositionInfo struct to ngraph function.
+ * This method creates ngraph function with Transpose layer.
+ */
+std::shared_ptr<ov::Model> ToProcessModel(const TranspositionInfo& t_info);
+/*
+ * Converts several TranspositionInfo structures to ngraph function.
+ * This method creates ngraph function with Gather layer.
+ */
+std::shared_ptr<ov::Model> ToProcessModel(const std::vector<TranspositionInfo>& transposes);
+
+/*
+ * Converts transposition maps to ngraph model, which will be ran on CPU as pre/post-processing step.
+ * This conversion is needed to support the exported models version <= 2.8 (OV < 2023.0)
+ * @return
+ */
+template <class T1, class T2>
+void ConvertTransposeMapToModel(T1& transposes, T2& nodes) {
+    for (auto&& node : nodes) {
+        auto t_it = transposes.find(node.name);
+        if (t_it != transposes.end() && !t_it->second.empty()) {
+            node.pre_post_process_model = ToProcessModel(t_it->second);
+        }
+    }
+};
+
+static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo>& transposes) {
+    auto part_to_transpose =
+        std::find_if(std::begin(transposes), std::end(transposes), [](const TranspositionInfo& t_info) {
+            return t_info.transpose;
+        });
+    return part_to_transpose != std::end(transposes);
+}
+
+}  // namespace pre_post_processing
+}  // namespace intel_gna
+}  // namespace ov
diff --git a/src/plugins/intel_gna/src/gna_model_serial.cpp b/src/plugins/intel_gna/src/serial/gna_model_serial.cpp
similarity index 86%
rename from src/plugins/intel_gna/src/gna_model_serial.cpp
rename to src/plugins/intel_gna/src/serial/gna_model_serial.cpp
index f7567b9963faf7..9efb44fee81dea 100644
--- a/src/plugins/intel_gna/src/gna_model_serial.cpp
+++ b/src/plugins/intel_gna/src/serial/gna_model_serial.cpp
@@ -16,18 +16,19 @@
 #    include <malloc.h>
 #else
 #    include <mm_malloc.h>
-
-#    include <serial/headers/2dot2/gna_model_header.hpp>
-#    include <serial/headers/2dot5/gna_model_header.hpp>
-#    include <serial/headers/2dot7/gna_model_header.hpp>
-#    include <serial/headers/2dot8/gna_model_header.hpp>
-
 #endif
 
 #include "common/versioning.hpp"
 #include "gna2_model_helper.hpp"
 #include "gna_model_serial.hpp"
 #include "gna_plugin.hpp"
+#include "openvino/pass/serialize.hpp"
+#include "openvino/runtime/core.hpp"
+#include "serial/headers/2dot2/gna_model_header.hpp"
+#include "serial/headers/2dot5/gna_model_header.hpp"
+#include "serial/headers/2dot7/gna_model_header.hpp"
+#include "serial/headers/2dot8/gna_model_header.hpp"
+#include "serial/headers/2dot9/gna_model_header.hpp"
 #include "serial/headers/latest/gna_model_header.hpp"
 
 using namespace ov::intel_gna;
@@ -48,6 +49,25 @@ inline void writeString(const std::string& str, std::ostream& os) {
     writeNBytes(c_str, str_len, os);
 }
 
+inline void write_pre_processing_model(const std::shared_ptr<ov::Model>& model, std::ostream& os) {
+    // allocate buffer for ir.xml
+    std::ostringstream xml_buf;
+    // allocate buffer for ir.bin
+    std::ostringstream bin_buf;
+
+    // serialize IR to stream buffer (.xml + .bin)
+    ov::pass::Serialize serializer(xml_buf, bin_buf);
+    serializer.run_on_model(model);
+
+    // write IR
+    writeString(xml_buf.str(), os);
+
+    // write BIN
+    size_t ir_bin_size = bin_buf.str().size();
+    writeBits(ir_bin_size, os);
+    writeNBytes(bin_buf.str().c_str(), ir_bin_size, os);
+}
+
 template <class T>
 inline void readBits(T& obj, std::istream& is) {
     is.read(reinterpret_cast<char*>(&obj), sizeof(T));
@@ -168,11 +188,12 @@ header_latest::ModelHeader GNAModelSerial::ReadHeader(std::istream& is) {
         case 6:
         case 7:
         case 8:
+        case 9:
             readNBytes(&header, sizeof(header_latest::ModelHeader), is);
             break;
         default:
             THROW_GNA_EXCEPTION
-                << "Imported file unsupported. minor version should have values in range 1 to 8 and is: "
+                << "Imported file unsupported. minor version should have values in range 1 to 9 and is: "
                 << header.version.minor;
         }
         break;
@@ -217,11 +238,12 @@ header_latest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream& is) {
             break;
         }
         case 8:
+        case 9:
             readNBytes(&endPoint, sizeof(header_latest::RuntimeEndPoint), is);
             break;
         default:
             THROW_GNA_EXCEPTION
-                << "Imported file unsupported. minor version should have values in range 1 to 8 and is: "
+                << "Imported file unsupported. minor version should have values in range 1 to 9 and is: "
                 << model_header_.version.minor;
         }
         break;
@@ -269,7 +291,8 @@ void GNAModelSerial::Import(void* basePointer,
                 (model_header_.version.minor >= 3) ? readString(is) : std::string("input" + std::to_string(inputIndex));
             inputs[name] = InputDesc(name);
         }
-        if (model_header_.version.minor >= 5) {
+        // Plugin uses ngraph pre/post-processing function to transpose inputs/outputs starting from version 2.9
+        if (model_header_.version.minor >= 5 && model_header_.version.minor <= 8) {
             // 3. Read transposition input info
             for (int inputIx = 0; inputIx < model_header_.nTransposeInputs; ++inputIx) {
                 std::string inputName;
@@ -287,7 +310,7 @@ void GNAModelSerial::Import(void* basePointer,
         }
     }
     // 5. Read Inputs endpoints
-    ImportInputs(is, basePointer, inputs);
+    ImportNodes(is, basePointer, inputs);
     // 6. Read output names
     if (model_header_.version.major == 2) {
         for (auto outputIndex = 0; outputIndex < model_header_.nOutputs; outputIndex++) {
@@ -297,7 +320,7 @@ void GNAModelSerial::Import(void* basePointer,
         }
     }
     // 7. Read outputs
-    ImportOutputs(is, basePointer, outputs);
+    ImportNodes(is, basePointer, outputs);
 
     for (auto operation = gna2model_->Operations; operation != gna2model_->Operations + gna2model_->NumberOfOperations;
          ++operation) {
@@ -463,10 +486,8 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
         // Write the input name
         writeString(input.name, os);
     }
-    // 3. Write transposition input info
-    ExportTranspositionInfo(os, inputs_transpose_info_);
-    // 4. Write transposition output info
-    ExportTranspositionInfo(os, outputs_transpose_info_);
+    // 3. Write transposition input info - removed in v.2.9
+    // 4. Write transposition output info - removed in v.2.9
     // 5. Write input endpoints and tensor names
     for (const auto& input : inputs_.Get()) {
         // write RuntimeEndPoint
@@ -475,6 +496,13 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
         for (const auto& tname : input.tensor_names) {
             writeString(tname, os);
         }
+        // write pre-processing model
+        if (input.pre_post_process_model) {
+            write_pre_processing_model(input.pre_post_process_model, os);
+        } else {
+            // write empty string to detect  that model is absent during the import
+            writeString("", os);
+        }
     }
     // 6. Write outputs names
     for (auto& output : outputs_.Get()) {
@@ -489,6 +517,14 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
         for (auto& tname : output.tensor_names) {
             writeString(tname, os);
         }
+
+        // write post-processing model
+        if (output.pre_post_process_model) {
+            write_pre_processing_model(output.pre_post_process_model, os);
+        } else {
+            // write empty string to detect  that model is absent during the import
+            writeString("", os);
+        }
     }
     // 8. Write layers
     for (const auto& layer : layers) {
@@ -563,61 +599,49 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
     version_.Export(os);
 }
 
-void GNAModelSerial::ImportInputs(std::istream& is, void* basePtr, GnaInputs& inputs) {
-    for (auto& input : inputs.Get()) {
+template <class T>
+void GNAModelSerial::ImportNodes(std::istream& is, void* base_ptr, T& nodes) {
+    for (auto& node : nodes.Get()) {
         header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
 
-        input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(basePtr) + ep.descriptor_offset));
-        input.orientation = ep.orientation;
-        input.num_elements = ep.elements_count;
-        input.scale_factor = ep.scaleFactor;
-        input.model_precision =
+        node.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(base_ptr) + ep.descriptor_offset));
+        node.orientation = ep.orientation;
+        node.num_elements = ep.elements_count;
+        node.scale_factor = ep.scaleFactor;
+        node.model_precision =
             InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
-        input.set_precision(ep.element_size);
-        input.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
-        input.allocated_size = input.get_required_size();
+        node.set_precision(ep.element_size);
+        node.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
+        node.allocated_size = node.get_required_size();
 
         auto inputDims = InferenceEngine::SizeVector();
         for (auto i = 0; i < ep.shape.NumberOfDimensions; ++i) {
             inputDims.push_back(ep.shape.Dimensions[i]);
         }
-        input.dims = inputDims;
+        node.dims = inputDims;
 
         // read tensor names
         for (uint8_t tId = 0; tId < ep.tensor_names_count; ++tId) {
-            input.tensor_names.insert(readString(is));
+            node.tensor_names.insert(readString(is));
         }
-
-        AppendTensorNameIfNeeded(input);
-    }
-}
-
-void GNAModelSerial::ImportOutputs(std::istream& is, void* basePtr, GnaOutputs& outputs) {
-    for (auto& output : outputs.Get()) {
-        header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
-
-        output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(basePtr) + ep.descriptor_offset));
-        output.orientation = ep.orientation;
-        output.num_elements = ep.elements_count;
-        output.scale_factor = ep.scaleFactor;
-        output.set_precision(ep.element_size);
-        output.model_precision =
-            InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
-        output.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
-        output.allocated_size = output.get_required_size();
-
-        auto outputDims = InferenceEngine::SizeVector();
-        for (auto i = 0; i < ep.shape.NumberOfDimensions; ++i) {
-            outputDims.push_back(ep.shape.Dimensions[i]);
-        }
-        output.dims = outputDims;
-
-        // read tensor names
-        for (uint8_t tId = 0; tId < ep.tensor_names_count; ++tId) {
-            output.tensor_names.insert(readString(is));
+        AppendTensorNameIfNeeded(node);
+
+        // read pre-sprocessing model
+        if (model_header_.version.major == 2 && model_header_.version.minor >= 9) {
+            std::string ir_xml_str = readString(is);
+            if (!ir_xml_str.empty()) {
+                // read IR bin
+                size_t ir_bin_size = 0;
+                readBits(ir_bin_size, is);
+
+                ov::Tensor ir_bin_tensor(ov::element::u8, ov::Shape({ir_bin_size}));
+                readNBytes(ir_bin_tensor.data(), ir_bin_size, is);
+
+                // restore model
+                ov::Core core;
+                node.pre_post_process_model = core.read_model(ir_xml_str, ir_bin_tensor);
+            }
         }
-
-        AppendTensorNameIfNeeded(output);
     }
 }
 
@@ -637,19 +661,6 @@ void GNAModelSerial::ImportTranspositionInfo(std::istream& is,
     }
 }
 
-void GNAModelSerial::ExportTranspositionInfo(std::ostream& os, const TranspositionInfoMap& transpositionInfoMap) const {
-    for (const auto& transpositionInfo : transpositionInfoMap) {
-        auto nameSize = strlen(transpositionInfo.first.c_str());
-        writeBits(static_cast<uint32_t>(nameSize), os);
-        writeNBytes(transpositionInfo.first.c_str(), nameSize, os);
-        auto fragmentsNum = transpositionInfo.second.size();
-        writeBits(static_cast<uint32_t>(fragmentsNum), os);
-        for (const auto& transposeFragmentInfo : transpositionInfo.second) {
-            writeNBytes(&transposeFragmentInfo, sizeof(TranspositionInfo), os);
-        }
-    }
-}
-
 void GNAModelSerial::AppendTensorNameIfNeeded(GnaDesc& nodeDesc) const {
     static constexpr header_2_dot_8::ModelHeader::Version kHasTensorNamesVersion;
 
diff --git a/src/plugins/intel_gna/src/gna_model_serial.hpp b/src/plugins/intel_gna/src/serial/gna_model_serial.hpp
similarity index 91%
rename from src/plugins/intel_gna/src/gna_model_serial.hpp
rename to src/plugins/intel_gna/src/serial/gna_model_serial.hpp
index 57e7028f16ddb9..09de53cddc1b56 100644
--- a/src/plugins/intel_gna/src/gna_model_serial.hpp
+++ b/src/plugins/intel_gna/src/serial/gna_model_serial.hpp
@@ -12,8 +12,15 @@
 #include "descriptions/gna_desc.hpp"
 #include "gna2-model-api.h"
 #include "gna_device_allocation.hpp"
+#include "pre_post_process/transposition_info.hpp"
 #include "serial/headers/latest/gna_model_header.hpp"
 
+namespace ov {
+namespace intel_gna {
+
+using TranspositionInfo = pre_post_processing::TranspositionInfo;
+using TranspositionInfoMap = pre_post_processing::TranspositionInfoMap;
+
 /**
  * @brief helper class for GNAGraph serialization tasks
  */
@@ -40,16 +47,13 @@ class GNAModelSerial {
     ov::intel_gna::header_latest::ModelHeader model_header_;
     GNAVersionSerializer version_;
 
-    void ImportInputs(std::istream& is, void* basePtr, ov::intel_gna::GnaInputs& inputs);
-
-    void ImportOutputs(std::istream& is, void* basePtr, ov::intel_gna::GnaOutputs& outputs);
+    template <class T>
+    void ImportNodes(std::istream& is, void* basePtr, T& inputs);  // inputs or outputs
 
     void ImportTranspositionInfo(std::istream& is,
                                  std::string& name,
                                  std::vector<TranspositionInfo>& transpositionInfo);
 
-    void ExportTranspositionInfo(std::ostream& os, const TranspositionInfoMap& transpositionInfoMap) const;
-
     /**
      * @brief Update input or output description to support importing of < 2.8 format where tensor_names were not
      * present
@@ -126,3 +130,6 @@ class GNAModelSerial {
      */
     void Export(const GnaAllocations& allocations, std::ostream& os) const;
 };
+
+}  // namespace intel_gna
+}  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_gna/src/serial/headers/2dot8/gna_model_header.hpp b/src/plugins/intel_gna/src/serial/headers/2dot8/gna_model_header.hpp
index 5140bfca34d8f8..5021f663c17911 100644
--- a/src/plugins/intel_gna/src/serial/headers/2dot8/gna_model_header.hpp
+++ b/src/plugins/intel_gna/src/serial/headers/2dot8/gna_model_header.hpp
@@ -176,7 +176,7 @@ struct RuntimeEndPoint {
         *this = header_2_dot_8::RuntimeEndPoint(ep_v7);
     }
 
-    RuntimeEndPoint(header_2_dot_7::RuntimeEndPoint& old) {
+    RuntimeEndPoint(const header_2_dot_7::RuntimeEndPoint& old) {
         scaleFactor = old.scaleFactor;
         descriptor_ptr = old.descriptor_ptr;
         element_size = old.element_size;
diff --git a/src/plugins/intel_gna/src/serial/headers/2dot9/gna_model_header.hpp b/src/plugins/intel_gna/src/serial/headers/2dot9/gna_model_header.hpp
new file mode 100644
index 00000000000000..323871c6f13a13
--- /dev/null
+++ b/src/plugins/intel_gna/src/serial/headers/2dot9/gna_model_header.hpp
@@ -0,0 +1,223 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+
+#include "backend/dnn_types.hpp"
+#include "gna_data_types.hpp"
+#include "serial/headers/2dot8/gna_model_header.hpp"
+
+#pragma pack(push, 1)
+
+namespace ov {
+namespace intel_gna {
+namespace header_2_dot_9 {
+
+/**
+ Maximal number of supported shape dimensions.
+ */
+#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8
+
+/**
+ * @brief Header version 2.9
+ */
+struct ModelHeader {
+    /**
+     *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
+     */
+    char gnam[4] = {};
+    /**
+     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
+     * usually it is an indicator of working with version of model different that is current export function produce
+     */
+    uint32_t headerSize = 0u;
+    struct Version {
+        /**
+         * @details Version of format Major – unsigned int, ex: 0x0001
+         * every change in the header or in the layers definition should be reflected in version change
+         * for backward compatibility new parsers can read old versions of model with certain restrictions
+         */
+        uint16_t major = 2u;
+        /**
+         * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
+         * changes in minor version are not affected layout of model
+         */
+        uint32_t minor = 9u;
+    } version;
+    /**
+     * @brief Memory required to be allocated using GNAAlloc()
+     */
+    uint64_t gnaMemSize = 0ull;
+    /**
+     * @brief Number of GNA Layers
+     */
+    uint64_t layersCount = 0ull;
+    /**
+     * @brief Grouping level
+     * This is depricted field and used for old models only (<=2.6)
+     */
+    uint32_t nGroup = 0u;
+
+    /**
+     * Convolution related setting - they are affecting input transformation
+     */
+    uint32_t nRotateRows = 0u;
+    uint32_t nRotateColumns = 0u;
+    bool doRotateInput = false;
+
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
+
+    /**
+     * Convolution related setting - they are affecting output transformation
+     */
+    uint32_t nRotateOutputRows = 0u;
+    uint32_t nRotateOutputColumns = 0u;
+    bool doRotateOutput = false;
+
+    uint32_t nTransposeInputs = 0u;
+    uint32_t nTransposeOutputs = 0u;
+
+    /**
+     * Reserved Data might be here
+     */
+
+    ModelHeader() = default;
+
+    ModelHeader(header_2_dot_1::ModelHeader const& old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        version.minor = old.version.minor;
+    }
+
+    ModelHeader(header_2_dot_4::ModelHeader const& old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        nRotateOutputRows = old.nRotateOutputRows;
+        nRotateOutputColumns = old.nRotateOutputColumns;
+        doRotateOutput = old.doRotateOutput;
+        version.minor = old.version.minor;
+    }
+};
+#pragma pack(pop)
+
+/*
+ * In runtime endpoint mostly same as in serial version, except of descriptor field
+ */
+struct RuntimeEndPoint {
+    /**
+     * if scale factor is different then pased into infer , network might need to be requantized
+     */
+    float scaleFactor = 0;
+    /**
+     * Pointer descriptor
+     */
+    void* descriptor_ptr = nullptr;
+    /**
+     * Endpoint resolution in bytes.
+     */
+    uint32_t element_size = 0;
+    /**
+     * Number of elements
+     */
+    uint32_t elements_count = 0;
+    /**
+     * Offset in bytes of pointer descriptor
+     */
+    uint64_t descriptor_offset = 0ull;
+    /**
+     Shape specifying dimension values.
+    */
+    struct Shape {
+        /**
+         Number of dimensions or rank or order.
+        */
+        uint32_t NumberOfDimensions = 0;
+        /**
+         array specifying value of each dimension.
+        Set all zeros for scalars.
+        */
+        uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0};
+    } shape;
+    /**
+     * Blob layout
+     */
+    uint8_t layout = InferenceEngine::Layout::NC;
+    /**
+     * Blob precision
+     */
+    uint8_t precision = InferenceEngine::Precision::FP32;
+    /**
+     * Number of tensor names
+     */
+    uint8_t tensor_names_count = 0;
+
+    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
+
+    RuntimeEndPoint() = default;
+
+    // support of previous versions
+    RuntimeEndPoint(const header_2_dot_6::RuntimeEndPoint& old, uint32_t ngroup) {
+        header_2_dot_7::RuntimeEndPoint ep_v7 = header_2_dot_7::RuntimeEndPoint(old, ngroup);
+        *this = header_2_dot_9::RuntimeEndPoint(ep_v7);
+    }
+
+    // support of previous versions
+    RuntimeEndPoint(const header_2_dot_7::RuntimeEndPoint& old) {
+        header_2_dot_8::RuntimeEndPoint ep_v8 = header_2_dot_8::RuntimeEndPoint(old);
+        *this = header_2_dot_9::RuntimeEndPoint(ep_v8);
+    }
+
+    RuntimeEndPoint(header_2_dot_8::RuntimeEndPoint& old) {
+        scaleFactor = old.scaleFactor;
+        descriptor_ptr = old.descriptor_ptr;
+        element_size = old.element_size;
+        elements_count = old.elements_count;
+        orientation = old.orientation;
+        layout = old.layout;
+        precision = old.precision;
+        descriptor_offset = old.descriptor_offset;
+        shape.NumberOfDimensions = old.shape.NumberOfDimensions;
+        for (uint32_t i = 0; i < shape.NumberOfDimensions; i++) {
+            shape.Dimensions[i] = old.shape.Dimensions[i];
+        }
+        tensor_names_count = 0;
+    }
+
+    RuntimeEndPoint(double scaleFactor,
+                    void* descriptor_ptr,
+                    uint32_t element_size,
+                    uint32_t elements_count,
+                    Shape shape,
+                    uint8_t layout,
+                    uint8_t precision,
+                    uint8_t tensor_names_count,
+                    intel_dnn_orientation_t orientation)
+        : scaleFactor(static_cast<float>(scaleFactor)),
+          descriptor_ptr(descriptor_ptr),
+          element_size(element_size),
+          elements_count(elements_count),
+          shape(shape),
+          layout(layout),
+          precision(precision),
+          tensor_names_count(tensor_names_count),
+          orientation(orientation) {}
+};
+
+}  // namespace header_2_dot_9
+}  // namespace intel_gna
+}  // namespace ov
diff --git a/src/plugins/intel_gna/src/serial/headers/latest/gna_model_header.hpp b/src/plugins/intel_gna/src/serial/headers/latest/gna_model_header.hpp
index 859763f34f8960..d86b9b81b25b1a 100644
--- a/src/plugins/intel_gna/src/serial/headers/latest/gna_model_header.hpp
+++ b/src/plugins/intel_gna/src/serial/headers/latest/gna_model_header.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "serial/headers/2dot8/gna_model_header.hpp"
+#include "serial/headers/2dot9/gna_model_header.hpp"
 
 namespace ov {
 namespace intel_gna {
 namespace header_latest {
 
-using ModelHeader = header_2_dot_8::ModelHeader;
-using RuntimeEndPoint = header_2_dot_8::RuntimeEndPoint;
+using ModelHeader = header_2_dot_9::ModelHeader;
+using RuntimeEndPoint = header_2_dot_9::RuntimeEndPoint;
 
 template <typename A, typename B>
 bool IsFirstVersionLower(const A& first, const B& second) {
diff --git a/src/plugins/intel_gna/tests/unit/gna_model_serial_test.cpp b/src/plugins/intel_gna/tests/unit/gna_model_serial_test.cpp
index d31adc22aeb0aa..e8acac9eb5cb77 100644
--- a/src/plugins/intel_gna/tests/unit/gna_model_serial_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_model_serial_test.cpp
@@ -8,7 +8,7 @@
 // to suppress deprecated definition errors
 #define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
 #include "common/versioning.hpp"
-#include "gna_model_serial.hpp"
+#include "serial/gna_model_serial.hpp"
 
 using namespace testing;
 

From 05b0c58521e7c68a1fb65ef119916c87fe1e92a6 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 28 Mar 2023 19:41:20 +0400
Subject: [PATCH 130/296] Add doc for ENABLE_QSPECTRE option (#16605)

* Add doc for ENABLE_QSPECTRE option

* Updated the link
---
 docs/dev/cmake_options_for_custom_comiplation.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/dev/cmake_options_for_custom_comiplation.md b/docs/dev/cmake_options_for_custom_comiplation.md
index 50d3e933c4bb9c..fde6d2f54a38d6 100644
--- a/docs/dev/cmake_options_for_custom_comiplation.md
+++ b/docs/dev/cmake_options_for_custom_comiplation.md
@@ -158,6 +158,9 @@ In this case OpenVINO CMake scripts take `TBBROOT` environment variable into acc
 * `ENABLE_INTEGRITYCHECK` builds DLLs with [/INTEGRITYCHECK] flag:
     * `OFF` is default.
     * Available on MSVC compiler only.
+* `ENABLE_QSPECTRE` builds with [/Qspectre] flag:
+    * `OFF` is default.
+    * Available on MSVC compiler only.
 
 ## Additional Resources
 
@@ -172,6 +175,7 @@ In this case OpenVINO CMake scripts take `TBBROOT` environment variable into acc
 [precompiled headers]:https://cmake.org/cmake/help/git-stage/command/target_precompile_headers.html
 [unity build]:https://cmake.org/cmake/help/latest/prop_tgt/UNITY_BUILD.html
 [/INTEGRITYCHECK]:https://docs.microsoft.com/en-us/cpp/build/reference/integritycheck-require-signature-check?view=msvc-160
+[/Qspectre]:https://learn.microsoft.com/en-us/cpp/build/reference/qspectre?view=msvc-170
 [Intel TBB]:https://software.intel.com/content/www/us/en/develop/tools/threading-building-blocks.html
 [Python]:https://www.python.org/
 [Java]:https://www.java.com/ru/
@@ -189,4 +193,4 @@ In this case OpenVINO CMake scripts take `TBBROOT` environment variable into acc
 [TensorFlow Lite]:https://www.tensorflow.org/lite
 [PyTorch]:https://www.tensorflow.org/lite
 [FlatBuffers]:https://google.github.io/flatbuffers/
-[oneTBB]:https://github.com/oneapi-src/oneTBB
\ No newline at end of file
+[oneTBB]:https://github.com/oneapi-src/oneTBB

From 44f0419a0bd6085e69ee6db78d831e6069b8e31c Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 28 Mar 2023 17:45:08 +0200
Subject: [PATCH 131/296] Get mo version once (#16576)

---
 tools/mo/openvino/tools/mo/convert_impl.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index f773639ae07295..0b4e4ed7acd317 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -39,7 +39,7 @@
 from openvino.tools.mo.utils.logger import init_logger, progress_printer
 from openvino.tools.mo.utils.utils import refer_to_faq_msg
 from openvino.tools.mo.utils.telemetry_utils import send_params_info, send_framework_info
-from openvino.tools.mo.utils.version import get_simplified_mo_version, get_simplified_ie_version, get_version
+from openvino.tools.mo.utils.version import get_simplified_mo_version, get_simplified_ie_version, get_version, simplify_version
 from openvino.tools.mo.utils.versions_checker import check_requirements  # pylint: disable=no-name-in-module
 from openvino.tools.mo.utils.telemetry_utils import get_tid
 from openvino.tools.mo.moc_frontend.check_config import legacy_extensions_used
@@ -743,9 +743,11 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args):
         show_mo_convert_help()
         return None, None
 
-    telemetry = tm.Telemetry(tid=get_tid(), app_name='Model Optimizer', app_version=get_simplified_mo_version())
+    version = get_version()
+    simplified_mo_version = simplify_version(version)
+    telemetry = tm.Telemetry(tid=get_tid(), app_name='Model Optimizer', app_version=simplified_mo_version)
     telemetry.start_session('mo')
-    telemetry.send_event('mo', 'version', get_simplified_mo_version())
+    telemetry.send_event('mo', 'version', simplified_mo_version)
     # Initialize logger with 'ERROR' as default level to be able to form nice messages
     # before arg parser deliver log_level requested by user
     init_logger('ERROR', False)
@@ -799,7 +801,7 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args):
         ov_model, legacy_path = driver(argv, {"conversion_parameters": non_default_params})
 
         # add MO meta data to model
-        ov_model.set_rt_info(get_version(), "MO_version")
+        ov_model.set_rt_info(version, "MO_version")
         ov_model.set_rt_info(get_rt_version(), "Runtime_version")
         ov_model.set_rt_info(str(legacy_path), "legacy_frontend")
         for key, value in non_default_params.items():

From 17c3e67336c6e0f0f9aff14f65abe76821bcf47a Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 28 Mar 2023 20:43:11 +0400
Subject: [PATCH 132/296] [TF FE] Add layer test for Mish activation function
 (#16557)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 tests/layer_tests/requirements.txt                    |  1 +
 .../layer_tests/tensorflow_tests/test_tf_UnaryOps.py  | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt
index 6aa2b645d2341f..10c0d6fcfccf8e 100644
--- a/tests/layer_tests/requirements.txt
+++ b/tests/layer_tests/requirements.txt
@@ -3,3 +3,4 @@ numpy>=1.19.2
 torch
 torchvision
 pytest
+tensorflow-addons
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py
index c7ba4784c1b38d..b2ed7ad562a3f4 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py
@@ -54,6 +54,8 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend)
 
         """
         import tensorflow as tf
+        import tensorflow_addons as tfa
+
         self.current_op_type = op_type
         op_type_to_tf = {
             'Abs': tf.math.abs,
@@ -72,6 +74,7 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend)
             'Floor': tf.math.floor,
             'Log': tf.math.log,
             'LogicalNot': tf.math.logical_not,
+            'Mish': tfa.activations.mish,
             'Negative': tf.math.negative,
             'Sigmoid': tf.nn.sigmoid,
             'Sign': tf.math.sign,
@@ -95,7 +98,11 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend)
             tf_x_shape = permute_nchw_to_nhwc(tf_x_shape, use_new_frontend)
 
             input = tf.compat.v1.placeholder(type, tf_x_shape, 'Input')
-            op_type_to_tf[self.current_op_type](input, name='Operation')
+            if self.current_op_type == 'Mish':
+                # Mish has no attribute name
+                op_type_to_tf[self.current_op_type](input)
+            else:
+                op_type_to_tf[self.current_op_type](input, name='Operation')
 
             tf.compat.v1.global_variables_initializer()
             tf_net = sess.graph_def
@@ -155,6 +162,7 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend)
                                          'LogicalNot',
                                          'Square',
                                          'Erf',
+                                         'Mish',
                                          ])
     @pytest.mark.precommit
     def test_unary_op_precommit(self, params, ie_device, precision, ir_version, temp_dir, op_type,
@@ -198,6 +206,7 @@ def test_unary_op_precommit(self, params, ie_device, precision, ir_version, temp
                                          'Asinh',
                                          'Square',
                                          'Erf',
+                                         'Mish',
                                          ])
     @pytest.mark.nightly
     def test_unary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type,

From 253e4eb366ddcdb6f4c0f4eb6bf3a92faebb15a6 Mon Sep 17 00:00:00 2001
From: Paul Youngsoo Ahn <paul.y.ahn@intel.com>
Date: Wed, 29 Mar 2023 01:48:19 +0900
Subject: [PATCH 133/296] [GPU] Remove duplicated OpenCL kernel compilation on
 static model (#16262)

* * update kernel_ids using hash value
* Change set to unordered_map for kernels_code
* replace unique_id to hash value
* Remove hash_val params
* remove redundant codes (#16262)
** Remove unique_id in program_node
** Remove gen_kernel_id
** Remove set_kernels_source
** Remove remove_kernels
** Remove kernel_idx in kernels_cache

* * Use kernel_impl_params instead of kernel_id
* Divide batch when entry_point are duplicated
* rollback removing unique_id

* * Fix get_kernel failure issue (#102467)
 - Modify has function of custom_gpu_primitive and generic_layer
 - Add ==operation of generic_layer for _kernels map in kernels_cache
 - Fix invalid kernel_impl_params related to unique_ptr life cycle issue

* Improve kernels_cache (#102467)
* Move add_kernels_source step to build_implementations
* Change replace kernels_code key to kernel_impl_params
* Return kernel vector in get_kernels

* Modify function name to get_kernels (#102467)

* Fix functions related graph serialization (#102467)

* Fix failure to run dynamic model (#102467)

* Add unit test

* Code review follow-up
- Add const to input params
- Add missing code to check kernel duplication in kernels_cache

* Add const to input params (#102467)

* [GPU] update hash and ==operator for generic_layer and custom_gpu_primitive (#102467)

* [GPU] override get_kernels_source in generic_layer and custom_gpu_primitive (#102467)

* [GPU] Fix onednn build error (#102467)

* [GPU] Fix Lin build error (#102467)

* [GPU] kernels_cache::get_kernels return vector of clone of cldnn::kernel (#102467)

* Updated serialization logics for improved kernel caches (#16262)

* primitive key kernel cache for serialization
* kernel serialization with binaries hash
* fix kernel cache init function for deserialization
* removed unnecessary codes

* [GPU] Update commnet and fix test failure (#16262)

* [GPU] Fix custom_gpu_primitive unit test failures (#16262)

* [GPU] Improved kernels cache serialization (#16262)
* removed hash in serialization logic
* update not to create a new kernels_cache for serialization
* code refactoring in serialization logic

* [GPU] Follow-up code review (#16262)

* [GPU] modify lock(#16262)

* [GPU] Fix custom_gpu_primitive unit test failure (#16262)

---------

Co-authored-by: Eddy Kim <eddy.kim@intel.com>
---
 .../include/intel_gpu/graph/program.hpp       |   5 +-
 .../graph/serialization/binary_buffer.hpp     |   8 +-
 .../primitives/custom_gpu_primitive.hpp       |  35 ++-
 .../include/intel_gpu/runtime/layout.hpp      |   3 +-
 .../graph_optimizer/build_implementations.cpp |  13 +-
 .../graph/graph_optimizer/compile_graph.cpp   |   6 +-
 .../graph_optimizer/post_input_reorder.cpp    |   4 +-
 .../graph_optimizer/post_optimize_weights.cpp |   4 +-
 .../remove_redundant_reorders.cpp             |   4 +-
 .../src/graph/impls/common/condition.cpp      |   2 +-
 .../intel_gpu/src/graph/impls/common/loop.cpp |   2 +-
 .../graph/impls/common/wait_for_events.cpp    |   2 +-
 .../intel_gpu/src/graph/impls/cpu/assign.cpp  |   2 +-
 .../src/graph/impls/cpu/detection_output.cpp  |   2 +-
 .../graph/impls/cpu/non_max_suppression.cpp   |   2 +-
 .../src/graph/impls/cpu/proposal.cpp          |   2 +-
 .../src/graph/impls/cpu/read_value.cpp        |   2 +-
 .../src/graph/impls/ocl/custom_primitive.cpp  |  35 ++-
 .../src/graph/impls/ocl/generic_layer.cpp     |  37 ++-
 .../impls/ocl/kernel_selector_helper.cpp      |   9 +-
 .../src/graph/impls/ocl/primitive_base.hpp    |  59 ++--
 .../impls/onednn/concatenation_onednn.cpp     |   4 +-
 .../graph/impls/onednn/convolution_onednn.cpp |   2 +-
 .../impls/onednn/deconvolution_onednn.cpp     |   2 +-
 .../impls/onednn/fully_connected_onednn.cpp   |   4 +-
 .../src/graph/impls/onednn/gemm_onednn.cpp    |   2 +-
 .../src/graph/impls/onednn/pooling_onednn.cpp |   2 +-
 .../impls/onednn/primitive_onednn_base.h      |   4 +-
 .../graph/impls/onednn/reduction_onednn.cpp   |   2 +-
 .../src/graph/impls/onednn/reorder_onednn.cpp |   2 +-
 .../src/graph/include/generic_layer.hpp       |  94 ++++++-
 .../src/graph/include/primitive_inst.h        |  18 +-
 .../src/graph/kernel_impl_params.cpp          |   9 +-
 src/plugins/intel_gpu/src/graph/network.cpp   |  11 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    |  13 +-
 src/plugins/intel_gpu/src/graph/program.cpp   |  12 -
 .../intel_gpu/src/runtime/kernels_cache.cpp   | 266 +++++++++---------
 .../intel_gpu/src/runtime/kernels_cache.hpp   |  86 +++---
 .../tests/passes/kernels_cache_test.cpp       |  92 ++++++
 39 files changed, 564 insertions(+), 299 deletions(-)
 create mode 100644 src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
index fa5ba0af7512fa..195469d4f43524 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@@ -239,16 +239,13 @@ struct program {
                              std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                              bool is_internal);
     static void init_primitives();
-    kernel_id add_kernel(const std::shared_ptr<kernel_string>& kernel_sring);
-    kernel::ptr get_kernel(kernel_id id);
     kernels_cache& get_kernels_cache() const;
 
     // returns {-1, -1} if it failed to estimate by allocating given batch size
     std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
 
-    void remove_kernel(kernel_id id);
-
     using ImplementationsCache = cldnn::LruCacheThreadSafe<kernel_impl_params, std::shared_ptr<primitive_impl>, kernel_impl_params::Hasher>;
+
     ImplementationsCache& get_implementations_cache() const { return *_impls_cache; }
     ICompilationContext& get_compilation_context() const { return *_compilation_context; }
     void cancel_compilation_context();
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
index 182865306e4611..1cd0427d2dc853 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
@@ -23,8 +23,8 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
             "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
     }
 
-    void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
-    void* getKernlImplParams() const { return _impl_params; }
+    void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
+    void* getKernelImplParams() const { return _impl_params; }
 
 private:
     std::ostream& stream;
@@ -42,8 +42,8 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
             "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
     }
 
-    void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
-    void* getKernlImplParams() const { return _impl_params; }
+    void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
+    void* getKernelImplParams() const { return _impl_params; }
     void setNetwork(void* network) { _network = network; }
     void* getNetwork() const { return _network; }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/custom_gpu_primitive.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/custom_gpu_primitive.hpp
index 29e82c30ed4d4e..aea0fe18e6270e 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/custom_gpu_primitive.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/custom_gpu_primitive.hpp
@@ -29,6 +29,10 @@ struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive> {
     struct arg_desc {
         arg_type type;
         arg_index index;
+
+        bool operator==(const arg_desc& rhs) const {
+            return (type == rhs.type && index == rhs.index);
+        }
     };
 
     /// @brief Constructs custom_gpu_primitive primitive
@@ -77,7 +81,14 @@ struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive> {
     size_t hash() const override {
         size_t seed = primitive::hash();
         seed = hash_combine(seed, kernel_entry_point);
-        seed = hash_combine(seed, kernels_code.size());
+        for (auto& args : kernel_arguments) {
+            seed = hash_combine(seed, args.index);
+            seed = hash_combine(seed, args.type);
+        }
+        seed = hash_combine(seed, build_options);
+        seed = hash_range(seed, kernels_code.begin(), kernels_code.end());
+        seed = hash_range(seed, gws.begin(), gws.end());
+        seed = hash_range(seed, lws.begin(), lws.end());
         return seed;
     }
 
@@ -87,9 +98,25 @@ struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive> {
 
         auto rhs_casted = downcast<const custom_gpu_primitive>(rhs);
 
-        return kernel_entry_point == rhs_casted.kernel_entry_point &&
-               build_options == rhs_casted.build_options &&
-               kernels_code.size() == rhs_casted.kernels_code.size();
+        if (kernel_entry_point != rhs_casted.kernel_entry_point)
+            return false;
+
+        if (build_options != rhs_casted.build_options)
+            return false;
+
+        if (kernel_arguments != rhs_casted.kernel_arguments)
+            return false;
+
+        if (kernels_code != rhs_casted.kernels_code)
+            return false;
+
+        if (gws != rhs_casted.gws)
+            return false;
+
+        if (lws != rhs_casted.lws)
+            return false;
+
+        return true;
     }
 };
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
index 344fa5a19636a4..431ff60c26a4ed 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
@@ -531,7 +531,8 @@ struct layout {
 
         auto pshape = get_partial_shape();
         for (size_t idx = 0; idx < pshape.size(); idx++) {
-            seed = hash_combine(seed, pshape[idx].get_length());
+            auto v = pshape[idx].is_dynamic() ? -1 : pshape[idx].get_length();
+            seed = hash_combine(seed, v);
         }
         return seed;
     }
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
index 0017110f16fa93..4c1b1008434144 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/build_implementations.cpp
@@ -16,11 +16,18 @@ void build_implementations::run(program& p) {
     }
 
     auto& cache = p.get_kernels_cache();
+    for (auto& n : p.get_processing_order()) {
+        if (auto impl = n->get_selected_impl()) {
+            auto params = n->get_kernel_impl_params();
+            cache.add_kernels_source(*params, impl->get_kernels_source());
+        }
+    }
     cache.build_all();
     for (auto& n : p.get_processing_order()) {
-        if (n->get_selected_impl()) {
-            n->get_selected_impl()->init_kernels(cache);
-            n->get_selected_impl()->reset_kernels_source();
+        if (auto impl = n->get_selected_impl()) {
+            auto params = n->get_kernel_impl_params();
+            impl->init_kernels(cache, *params);
+            impl->reset_kernels_source();
         }
     }
     cache.reset();
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
index 83d2fb224afcb3..536c2f9805de2b 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@@ -65,13 +65,9 @@ void compile_graph::run(program& p) {
             can_select_impl = false;
 
         if (can_select_impl) {
-            tasks.push_back([node, &p, &exception] {
+            tasks.push_back([node, &exception] {
                 try {
                     node->selected_impl = node->type()->choose_impl(*node);
-                    if (node->selected_impl) {
-                        auto kernel_ids = p.get_kernels_cache().add_kernels_source(node->selected_impl->get_kernels_source());
-                        node->selected_impl->set_kernel_ids(kernel_ids);
-                    }
                 } catch(...) {
                     exception = std::current_exception();
                 }
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp
index 4904254913296d..592a3c811fa909 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp
@@ -70,8 +70,8 @@ void post_input_reorder::run(program& p) {
                 node->set_output_layout(previous_layout, false);
                 reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
                 if (auto impl = reorder.get_selected_impl()) {
-                    auto kernel_ids = p.get_kernels_cache().add_kernels_source(impl->get_kernels_source());
-                    impl->set_kernel_ids(kernel_ids);
+                    auto params = reorder.get_kernel_impl_params();
+                    p.get_kernels_cache().add_kernels_source(*params, impl->get_kernels_source());
                 }
             }
         }
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
index 66af672ed97ffa..44b6d0088e2d82 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
@@ -57,8 +57,8 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
             if ((!g_node.is_constant()) && (!reorder.second)) {
                 g_node.set_selected_impl(g_node.type()->choose_impl(g_node));
                 if (auto impl = g_node.get_selected_impl()) {
-                    auto kernel_ids = p.get_kernels_cache().add_kernels_source(impl->get_kernels_source());
-                    impl->set_kernel_ids(kernel_ids);
+                    auto params = g_node.get_kernel_impl_params();
+                    p.get_kernels_cache().add_kernels_source(*params, impl->get_kernels_source());
                 }
             }
         }
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
index 9eeca090fb7cb7..c6407b7aa9853a 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -36,8 +36,8 @@ void remove_redundant_reorders::run(program& p) {
         node.set_unique_id();
         node.set_selected_impl(node.type()->choose_impl(node));
         if (auto impl = node.get_selected_impl()) {
-            auto kernel_ids = p.get_kernels_cache().add_kernels_source(impl->get_kernels_source());
-            impl->set_kernel_ids(kernel_ids);
+            auto params = node.get_kernel_impl_params();
+            p.get_kernels_cache().add_kernels_source(*params, impl->get_kernels_source());
         }
     };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
index c760faa8a43cd3..1e8291f736ef8d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
@@ -54,7 +54,7 @@ struct condition_impl : typed_primitive_impl<condition> {
         return make_unique<condition_impl>(arg);
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
 
 private:
     primitive_id _node_id;
diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
index c52cd78e2b1cc8..eda9ed6f775406 100644
--- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
@@ -23,7 +23,7 @@ struct loop_impl : typed_primitive_impl<loop> {
         return make_unique<loop_impl>(*this);
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
 
     loop_impl() : parent() {}
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp
index 2a9674bb8ad0b7..4815477ddf1bb9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp
@@ -29,7 +29,7 @@ class wait_for_events_impl : public primitive_impl {
         return make_unique<wait_for_events_impl>(*this);
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache&, const kernel_impl_params&) override {}
     void set_arguments(primitive_inst& /*instance*/) override {}
     kernel_arguments_data get_arguments(const primitive_inst& /*instance*/) const override {
         kernel_arguments_data args;
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
index fea869d1ac3ee1..3ef0e5d584348a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
@@ -61,7 +61,7 @@ struct assign_impl : public typed_primitive_impl<assign> {
         return ev_set_memory;
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
 
 public:
     static std::unique_ptr<primitive_impl> create(const assign_node& arg, const kernel_impl_params& impl_param) {
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
index 1fb39cc1bf4789..f7d67235fefc8d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
@@ -845,7 +845,7 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
         return ev;
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
 
     static std::unique_ptr<primitive_impl> create(const detection_output_node& arg, const kernel_impl_params&) {
         return make_unique<detection_output_impl>(arg);
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp
index c5c87e4e1589fe..bc6483fd9e1d3c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp
@@ -419,7 +419,7 @@ struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
     static std::unique_ptr<primitive_impl> create(const non_max_suppression_node&, const kernel_impl_params&) {
         return make_unique<non_max_suppression_impl>();
     }
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache&, const kernel_impl_params&) override {}
 };
 namespace detail {
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp
index 6fe3fac8676023..3c8cc3dd2590dd 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp
@@ -426,7 +426,7 @@ struct proposal_impl : typed_primitive_impl<proposal> {
         return ev;
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache&, const kernel_impl_params&) override {}
 
     static std::unique_ptr<primitive_impl> create(const proposal_node& arg, const kernel_impl_params& impl_param) {
         const layout& l = impl_param.input_layouts[2];
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
index 2b85f61d3ad265..feec1a8dbf992a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
@@ -62,7 +62,7 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
         return instance.get_network().get_stream().create_user_event(true);
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
 
 public:
     static std::unique_ptr<primitive_impl> create(const read_value_node& arg, const kernel_impl_params& impl_param) {
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp
index c4a901b72e847f..0defcbe29388fd 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp
@@ -28,7 +28,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
 
     std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
     std::vector<kernel::ptr> _kernels;
-    kernel_id _kernel_id;
+    std::string _cached_kernel_id;
 
     std::unique_ptr<primitive_impl> clone() const override {
         return make_unique<custom_gpu_primitive_impl>(*this);
@@ -40,7 +40,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
     custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
     : cl_kernel(other.cl_kernel)
     , _kernels({})
-    , _kernel_id(other._kernel_id) {
+    , _cached_kernel_id(other._cached_kernel_id) {
         for (const auto& kernel : other._kernels) {
             _kernels.emplace_back(kernel->clone());
         }
@@ -49,12 +49,27 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
     custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
                              std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
         : cl_kernel(cl_kernel)
-        , _kernels() {
-        _kernel_id = arg.get_program().add_kernel(cl_kernel->code.kernelString);
+        , _kernels()
+        , _cached_kernel_id() { }
+
+    std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
+        std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
+        kernel_strings.push_back(cl_kernel->code.kernelString);
+        return kernel_strings;
+    }
+
+    void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) override {
+        _kernels.clear();
+        auto compiled_kernels = kernels_cache.get_kernels(params);
+        _kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end());
     }
 
-    void init_kernels(const kernels_cache& kernels_cache) override {
-        _kernels.emplace_back(kernels_cache.get_kernel(_kernel_id));
+    void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
+        _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_id));
+    }
+
+    void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
+        _cached_kernel_id = kernels_cache.get_cached_kernel_id(_kernels[0]);
     }
 
     void set_arguments_impl(custom_gpu_primitive_inst& instance) override {
@@ -78,23 +93,19 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
         return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.is_output());
     }
 
-    std::vector<std::string> get_kernel_ids() const override {
-        return {_kernel_id};
-    }
-
     std::vector<kernel::ptr> get_kernels() const override {
         return _kernels;
     }
 
     void save(BinaryOutputBuffer& ob) const override {
         ob << *cl_kernel;
-        ob << _kernel_id;
+        ob << _cached_kernel_id;
     }
 
     void load(BinaryInputBuffer& ib) override {
         cl_kernel = std::make_shared<kernel_selector::cl_kernel_data>();
         ib >> *cl_kernel;
-        ib >> _kernel_id;
+        ib >> _cached_kernel_id;
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp
index 24f5464a7f7b59..3a0dea449bd832 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generic_layer.cpp
@@ -15,7 +15,7 @@ struct generic_layer_impl : typed_primitive_impl<generic_layer> {
 
     kernel_selector::cl_kernel_data _cl_kernel_data;
     std::vector<kernel::ptr> _kernels;
-    kernel_id _kernel_id;
+    std::string _cached_kernel_id;
 
     DECLARE_OBJECT_TYPE_SERIALIZATION
 
@@ -28,7 +28,7 @@ struct generic_layer_impl : typed_primitive_impl<generic_layer> {
     generic_layer_impl(const generic_layer_impl& other)
     : _cl_kernel_data(other._cl_kernel_data)
     , _kernels({})
-    , _kernel_id(other._kernel_id) {
+    , _cached_kernel_id(other._cached_kernel_id) {
         if (other._kernels.empty()) {
             throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty");
         }
@@ -37,22 +37,41 @@ struct generic_layer_impl : typed_primitive_impl<generic_layer> {
 
     generic_layer_impl(const generic_layer_node& arg)
         : _cl_kernel_data(*arg.get_primitive()->generic_params.clKernel.get())
-        , _kernels() {
-        _kernel_id = arg.get_program().add_kernel(arg.get_primitive()->generic_params.clKernel->code.kernelString);
+        , _kernels()
+        , _cached_kernel_id() { }
+
+    std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
+        std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
+        kernel_strings.push_back(_cl_kernel_data.code.kernelString);
+        return kernel_strings;
+    }
+
+    std::vector<kernel::ptr> get_kernels() const override {
+        return _kernels;
     }
 
     void save(BinaryOutputBuffer& ob) const override {
         ob <<_cl_kernel_data;
-        ob << _kernel_id;
+        ob << _cached_kernel_id;
     }
 
     void load(BinaryInputBuffer& ib) override {
         ib >> _cl_kernel_data;
-        ib >> _kernel_id;
+        ib >> _cached_kernel_id;
+    }
+
+    void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) override {
+        _kernels.clear();
+        auto compiled_kernels = kernels_cache.get_kernels(params);
+        _kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end());
+    }
+
+    void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
+        _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_id));
     }
 
-    void init_kernels(const kernels_cache& kernels_cache) override {
-        _kernels.push_back(kernels_cache.get_kernel(_kernel_id));
+    void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
+        _cached_kernel_id = kernels_cache.get_cached_kernel_id(_kernels[0]);
     }
 
     void set_arguments_impl(generic_layer_inst& instance) override {
@@ -114,7 +133,7 @@ struct generic_layer_cpu : typed_primitive_impl<generic_layer> {
         return ev;
     }
 
-    void init_kernels(const kernels_cache&) override {}
+    void init_kernels(const kernels_cache&, const kernel_impl_params&) override {}
 };
 
 static std::unique_ptr<primitive_impl> create(const generic_layer_node& arg, const kernel_impl_params&) {
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
index eceab93bba66c5..035958a8c1a538 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp
@@ -101,12 +101,13 @@ bool query_local_block_io_supported(engine& e, const ExecutionConfig& config) {
     kernel_string->batch_compilation = true;
 
     try {
+        kernel_impl_params dummy_params;
         auto _kernels_cache_device_query = std::unique_ptr<kernels_cache>(new kernels_cache(e, config, 0));
-        auto id = _kernels_cache_device_query->set_kernel_source(kernel_string, false);
+        _kernels_cache_device_query->add_kernels_source(dummy_params, {kernel_string}, false);
         _kernels_cache_device_query->build_all();
 
-        auto kernel = _kernels_cache_device_query->get_kernel(id);
-        cache[device] = _kernels_cache_device_query->validate_simple_kernel_execution(kernel);
+        auto _kernels = _kernels_cache_device_query->get_kernels(dummy_params);
+        cache[device] = _kernels_cache_device_query->validate_simple_kernel_execution(_kernels[0]);
     } catch (std::exception& /*ex*/) {
         cache[device] = false;
     }
@@ -1202,7 +1203,7 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p
     const auto& config = program->get_config();
     const auto& device_info = engine.get_device_info();
 
-    params.uniqueID = std::to_string(param_info.unique_id);
+    params.uniqueID = std::to_string(param_info.hash());
     params.engineInfo.supports_fp16 = device_info.supports_fp16;
     params.engineInfo.supports_fp64 = device_info.supports_fp64;
     params.engineInfo.supports_fp16_denorms = device_info.supports_fp16_denorms;
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 7ec313355ceb85..a3ec06609ece86 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -32,10 +32,10 @@ For example, all gpu convolution implementations should derive from typed_primit
 template <class PType>
 struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     kernel_selector::kernel_data _kernel_data;
-    std::vector<kernel_id> _kernel_ids;
+    std::vector<std::string> _cached_kernel_ids;
     std::vector<kernel::ptr> _kernels;
 
-    typed_primitive_impl_ocl() :  _kernel_data({}), _kernel_ids({}), _kernels({}) {
+    typed_primitive_impl_ocl() :  _kernel_data({}), _cached_kernel_ids({}), _kernels({}) {
         _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
         _kernel_data.weightsReorderParams.cpuKernel = nullptr;
         _kernel_data.weightsReorderParams.clKernel = nullptr;
@@ -44,7 +44,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
     : typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name, other._is_dynamic)
     , _kernel_data(other._kernel_data)
-    , _kernel_ids(other._kernel_ids)
+    , _cached_kernel_ids(other._cached_kernel_ids)
     , _kernels({}) {
         _kernels.reserve(other._kernels.size());
         for (size_t k = 0; k < other._kernels.size(); ++k) {
@@ -68,20 +68,19 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
 
     // Cache blob format:
     //     [ kernel_selector::kernel_data ]
-    //     [ kernel_id ]
-    //     [ kernel_arguments ]
+    //     [ kernel_ids ]
     void save(BinaryOutputBuffer& ob) const override {
         ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
         ob << _kernel_data.internalBufferSizes;
         ob << _kernel_data.kernels;
-        ob << _kernel_ids;
+        ob << _cached_kernel_ids;
     }
 
     void load(BinaryInputBuffer& ib) override {
         ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
         ib >> _kernel_data.internalBufferSizes;
         ib >> _kernel_data.kernels;
-        ib >> _kernel_ids;
+        ib >> _cached_kernel_ids;
     }
 
     template<typename ImplType>
@@ -134,20 +133,32 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         return stream.enqueue_marker(events, is_output);
     }
 
-    void init_kernels(const kernels_cache& kernels_cache) override {
+    void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) override {
+        if (is_cpu()) {
+            return;
+        }
+
+        _kernels.clear();
+        if (!_kernel_data.kernels.empty()) {
+            auto compiled_kernels = kernels_cache.get_kernels(params);
+            _kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end());
+        }
+    }
+
+    void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
         if (is_cpu()) {
             return;
         }
         _kernels.clear();
 
-        _kernels.reserve(_kernel_ids.size());
-        for (size_t k = 0; k < _kernel_ids.size(); ++k) {
-            _kernels.emplace_back(kernels_cache.get_kernel(_kernel_ids[k]));
+        _kernels.reserve(_cached_kernel_ids.size());
+        for (size_t k = 0; k < _cached_kernel_ids.size(); ++k) {
+            _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_ids[k]));
         }
     }
 
-    std::vector<std::string> get_kernel_ids() const override {
-        return _kernel_ids;
+    void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
+        _cached_kernel_ids = kernels_cache.get_cached_kernel_ids(_kernels);
     }
 
     std::vector<kernel::ptr> get_kernels() const override {
@@ -258,10 +269,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         return aggregate_events(all_events, stream, group_events);
     }
 
-    void set_kernel_ids(std::vector<kernel_id> kernel_ids) override {
-        _kernel_ids = kernel_ids;
-    }
-
     std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
         std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
         for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
@@ -283,18 +290,26 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         }
     }
 
-    void set_kernels(std::map<const std::string, kernel::ptr>& kernels) override {
+    void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) override {
         if (is_cpu())
             return;
 
-        _kernel_ids.clear();
+        size_t total_kernels_num = std::accumulate(kernels.begin(), kernels.end(), 0,
+            [](size_t val, cldnn::kernels_cache::compiled_kernels::value_type& p) {
+                return (val + p.second.size());
+            });
+
         _kernels.clear();
-        _kernels.reserve(kernels.size());
+        _kernels.reserve(total_kernels_num);
+
         for (auto& k : kernels) {
-            _kernel_ids.push_back(k.first);
-            _kernels.emplace_back(std::move(k.second));
+            _kernels.insert(_kernels.end(), k.second.begin(), k.second.end());
         }
     }
+
+    std::vector<kernel::ptr> get_kernels() override {
+        return _kernels;
+    }
 };
 
 }  // namespace ocl
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp
index d991b891e62b7b..c892906e3112b1 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp
@@ -78,7 +78,7 @@ struct concatenation_onednn : typed_primitive_onednn_impl<concatenation, dnnl::c
 
         parent::save(ob);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernelImplParams());
         auto prim = impl_params->typed_desc<concatenation>();
         ob << prim->axis;
 
@@ -101,7 +101,7 @@ struct concatenation_onednn : typed_primitive_onednn_impl<concatenation, dnnl::c
         int64_t prim_axis;
         ib >> prim_axis;
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
         auto prim_desc = get_concatenation_primitive_descriptor(*impl_params, ib.get_engine(), *_attrs, prim_axis);
         _pd = *prim_desc;
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
index 2022d23972c779..b69dca2d709a62 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@@ -196,7 +196,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
             _attrs->set_zero_points_mask(DNNL_ARG_SRC, _zero_point_mask);
         }
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
 
         auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
         auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp
index 2d228d67344e8c..b13dfbf16a287b 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp
@@ -108,7 +108,7 @@ struct deconvolution_onednn : typed_primitive_onednn_impl<deconvolution> {
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::load(ib);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
 
         auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
         auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp
index 7cf62d141e4a1a..74945255f87bb0 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp
@@ -159,7 +159,7 @@ struct fully_connected_onednn : typed_primitive_onednn_impl<fully_connected> {
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::save(ob);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernelImplParams());
         auto prim = impl_params->typed_desc<fully_connected>();
         size_t input_size = prim->input_size;
         bool has_bias = !prim->bias.empty();
@@ -181,7 +181,7 @@ struct fully_connected_onednn : typed_primitive_onednn_impl<fully_connected> {
         ib >> input_size;
         ib >> has_bias;
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
         auto prim_desc = get_fully_connected_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs);
         _pd = *prim_desc;
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
index 1b0fbfe1041a80..0007eb29073c8c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
@@ -178,7 +178,7 @@ struct gemm_onednn : typed_primitive_onednn_impl<gemm> {
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::save(ob);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernelImplParams());
         auto prim = impl_params->typed_desc<gemm>();
         bool gemm_with_bias = prim->dependencies().size() == 3;
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
index e1d592eecd70a1..32d9534ad46698 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
@@ -108,7 +108,7 @@ struct pooling_onednn : typed_primitive_onednn_impl<pooling> {
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::load(ib);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
 
         dnnl::algorithm alg;
         ib >> make_data(&alg, sizeof(dnnl::algorithm));
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 30247fb7735d57..f90d110e28227c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -200,7 +200,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
                 _attrs->set_fpmath_mode(_fmath_mode);
             }
             {
-                const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+                const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
                 const std::vector<cldnn::fused_primitive_desc_onednn>& fused_desc = impl_params->fused_desc_onednn;
                 dnnl::post_ops _post_ops;
                 int post_ops_len;
@@ -451,7 +451,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         return args;
     }
 
-    void init_kernels(const kernels_cache&) override { }
+    void init_kernels(const kernels_cache&, const kernel_impl_params&) override { }
 
     event::ptr aggregate_events(const std::vector<event::ptr>& events, stream& stream, bool group = false, bool is_output = false) const {
         if (events.size() == 1 && !is_output)
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp
index 3e91c34c08b3ab..05d151dbf3165d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp
@@ -119,7 +119,7 @@ struct reduction_onednn : typed_primitive_onednn_impl<reduce> {
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::load(ib);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
 
         dnnl::algorithm alg;
         ib >> make_data(&alg, sizeof(dnnl::algorithm));
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp
index 0bda1fa2f795ca..312f8c1df07638 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp
@@ -77,7 +77,7 @@ struct reorder_onednn : typed_primitive_onednn_impl<reorder, dnnl::reorder::prim
 #ifdef ONEDNN_PRIMITIVE_SERIALIZATION
         parent::load(ib);
 
-        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
+        const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());
 
         auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
         auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
diff --git a/src/plugins/intel_gpu/src/graph/include/generic_layer.hpp b/src/plugins/intel_gpu/src/graph/include/generic_layer.hpp
index 67fe411e911314..405a64e9045dae 100644
--- a/src/plugins/intel_gpu/src/graph/include/generic_layer.hpp
+++ b/src/plugins/intel_gpu/src/graph/include/generic_layer.hpp
@@ -38,10 +38,102 @@ struct generic_layer : public primitive_base<generic_layer> {
 
     size_t hash() const override {
         size_t seed = primitive::hash();
-        seed = hash_combine(seed, id);
+        seed = hash_combine(seed, generic_params.engine);
+
+        if (generic_params.cpuKernel != nullptr) {
+            auto& cpuKernel = generic_params.cpuKernel;
+            seed = hash_combine(seed, cpuKernel->GetExpectedInputLayout());
+            seed = hash_combine(seed, cpuKernel->GetExpectedInputType());
+        }
+
+        if (generic_params.clKernel != nullptr) {
+            auto& clKernel = generic_params.clKernel;
+            seed = hash_combine(seed, clKernel->skip_execution);
+
+            auto& gws = clKernel->params.workGroups.global;
+            seed = hash_range(seed, gws.begin(), gws.end());
+
+            auto& lws = clKernel->params.workGroups.local;
+            seed = hash_range(seed, lws.begin(), lws.end());
+
+            auto& arguments = clKernel->params.arguments;
+            for (auto& args : arguments) {
+                seed = hash_combine(seed, args.index);
+                seed = hash_combine(seed, args.t);
+            }
+
+            auto& scalars = clKernel->params.scalars;
+            for (auto& s : scalars) {
+                seed = hash_combine(seed, s.t);
+            }
+
+            seed = hash_combine(seed, clKernel->code.kernelString->get_hash());
+        }
         return seed;
     }
 
+    bool operator==(const primitive& rhs) const override {
+        if (!compare_common_params(rhs))
+            return false;
+
+        auto rhs_casted = downcast<const generic_layer>(rhs);
+
+        if (generic_params.engine != rhs_casted.generic_params.engine)
+            return false;
+
+        if (generic_params.cpuKernel != nullptr) {
+            if (generic_params.cpuKernel->GetExpectedInputLayout() != rhs_casted.generic_params.cpuKernel->GetExpectedInputLayout())
+                return false;
+
+            if (generic_params.cpuKernel->GetExpectedInputType() != rhs_casted.generic_params.cpuKernel->GetExpectedInputType())
+                return false;
+        }
+
+        if (generic_params.clKernel != nullptr) {
+            auto& clKernel = generic_params.clKernel;
+            auto& clKernel_rhs = rhs_casted.generic_params.clKernel;
+            if (clKernel->skip_execution != clKernel_rhs->skip_execution)
+                return false;
+
+            auto& gws       = clKernel->params.workGroups.global;
+            auto& gws_rhs   = clKernel_rhs->params.workGroups.global;
+            if (gws != gws_rhs)
+                return false;
+
+            auto& lws       = clKernel->params.workGroups.local;
+            auto& lws_rhs   = clKernel_rhs->params.workGroups.local;
+            if (lws != lws_rhs)
+                return false;
+
+            auto& arguments     = clKernel->params.arguments;
+            auto& arguments_rhs = clKernel_rhs->params.arguments;
+            if (arguments.size() != arguments_rhs.size())
+                return false;
+
+            for (size_t idx = 0; idx < arguments.size(); idx++) {
+                if (arguments[idx].index != arguments_rhs[idx].index)
+                    return false;
+
+                if (arguments[idx].t != arguments_rhs[idx].t)
+                    return false;
+            }
+
+            auto& scalars     = clKernel->params.scalars;
+            auto& scalars_rhs = clKernel_rhs->params.scalars;
+            if (scalars.size() != scalars_rhs.size())
+                return false;
+
+            for (size_t idx = 0; idx < scalars.size(); idx++) {
+                if (scalars[idx].t != scalars_rhs[idx].t)
+                    return false;
+            }
+
+            if (clKernel->code.kernelString->get_str() != clKernel_rhs->code.kernelString->get_str())
+                return false;
+        }
+        return true;
+    }
+
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
 };
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index 4dcb140c8c3f18..f3819713aa8581 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -59,15 +59,13 @@ struct primitive_impl {
     kernel_selector::weights_reorder_params _weights_reorder_params;
     // class typed_primitive_gpu_impl override this with return false;
     virtual bool is_cpu() const { return true; }
-    virtual void init_kernels(const kernels_cache&) = 0;
+    virtual void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) = 0;
+    virtual void init_by_cached_kernels(const kernels_cache&) {}
+    virtual void set_cached_kernel_ids(const kernels_cache&) {}
     virtual std::unique_ptr<primitive_impl> clone() const = 0;
-    virtual std::vector<std::string> get_kernel_ids() const {
-        return {};
-    }
     virtual std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() { return {}; }
     virtual void reset_kernels_source() {}
     virtual std::vector<kernel::ptr> get_kernels() const { return {}; }
-    virtual void set_kernel_ids(std::vector<kernel_id> kernel_ids) {}
     virtual void save(cldnn::BinaryOutputBuffer& ob) const {}
     virtual void load(cldnn::BinaryInputBuffer& ib) {}
 
@@ -88,7 +86,8 @@ struct primitive_impl {
         return primitive_impl::static_canonicalize_shapes(impl_params);
     }
 
-    virtual void set_kernels(std::map<const std::string, kernel::ptr>& kernels) {}
+    virtual void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) {}
+    virtual std::vector<kernel::ptr> get_kernels() { return {}; }
 
 protected:
     std::string _kernel_name;
@@ -163,8 +162,13 @@ class primitive_inst {
 
     event::ptr execute(const std::vector<event::ptr>& events);
     void init_kernels(const kernels_cache& kernels_cache) {
-        _impl->init_kernels(kernels_cache);
+        _impl->init_kernels(kernels_cache, *_impl_params);
     }
+
+    void init_by_cached_kernels(const kernels_cache& kernels_cache) {
+        _impl->init_by_cached_kernels(kernels_cache);
+    }
+
     void set_arguments();
 
     void validate() const {
diff --git a/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp b/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
index 821dfa950ad9f4..c7b084dce99195 100644
--- a/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
@@ -15,7 +15,9 @@
 namespace cldnn {
 
 size_t kernel_impl_params::hash() const {
-    size_t seed = desc->hash();
+    size_t seed = 0;
+    if (desc != nullptr)
+        seed = desc->hash();
     const size_t prime_number = 2654435761; // magic number to reduce hash collision rate.
     for (auto& in : input_layouts) {
         seed = hash_combine(seed, in.hash() * prime_number);
@@ -32,7 +34,10 @@ size_t kernel_impl_params::hash() const {
 }
 
 bool kernel_impl_params::operator==(const kernel_impl_params& rhs) const {
-    if (*desc != *rhs.desc)
+    if ((desc != nullptr && rhs.desc == nullptr) || (desc == nullptr && rhs.desc != nullptr))
+        return false;
+
+    if ((desc != nullptr && rhs.desc != nullptr) && *desc != *rhs.desc)
         return false;
 
     if (rhs.input_layouts.size() != input_layouts.size())
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 35d3c611a08d78..0157286fb8df35 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -405,7 +405,7 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
         ib >> *p_inst;
         _primitives[p_inst->id()] = p_inst;
         if (p_inst->get_impl() != nullptr)
-            p_inst->init_kernels(kernels_cache);
+            p_inst->init_by_cached_kernels(kernels_cache);
     }
 
     for (auto& item : _primitives) {
@@ -515,10 +515,12 @@ network::~network() {
 //     [ executable primitive_inst ]
 //     [ memory reuse information ]
 void network::save(cldnn::BinaryOutputBuffer& ob) {
-    kernels_cache kernels_cache(get_engine(), _config, 0, nullptr, {""});
+    auto& kernels_cache = _program->get_kernels_cache();
+    kernels_cache.reset();
     for (const auto& p_inst : _exec_order) {
-        if (p_inst->get_impl() != nullptr)
-            kernels_cache.add_kernels(p_inst->get_impl()->get_kernel_ids(), p_inst->get_impl()->get_kernels());
+        if (p_inst->get_impl() != nullptr) {
+            kernels_cache.add_to_cached_kernels(p_inst->get_impl()->get_kernels());
+        }
     }
     ob << kernels_cache;
 
@@ -597,6 +599,7 @@ void network::save(cldnn::BinaryOutputBuffer& ob) {
     }
 
     ob << get_ext_id_mapping();
+    kernels_cache.reset();
 }
 
 network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) {
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index c063ddf66f1990..0869965153e378 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -383,7 +383,7 @@ bool primitive_inst::update_impl() {
                     }
 
                     auto impl = _node->type()->choose_impl(*_node, updated_params);
-                    auto kernels = _program->get_kernels_cache().compile(impl->get_kernels_source());
+                    auto kernels = _program->get_kernels_cache().compile(updated_params, impl->get_kernels_source());
                     impl->set_kernels(kernels);
                     cache.add(updated_params, impl->clone());
                 });
@@ -395,7 +395,7 @@ bool primitive_inst::update_impl() {
             } else {
                 _impl = _node->type()->choose_impl(*_node, updated_params);
                 auto& kernels_cache = get_network().get_program()->get_kernels_cache();
-                auto kernels = kernels_cache.compile(_impl->get_kernels_source());
+                auto kernels = kernels_cache.compile(updated_params, _impl->get_kernels_source());
                 _impl->set_kernels(kernels);
                 cache.add(updated_params, _impl->clone());
 
@@ -736,9 +736,9 @@ event::ptr primitive_inst::update_weights() {
             GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string()
                                     << " to " << expected_layout.to_short_string() << std::endl;
             auto& kernels_cache = get_network().get_program()->get_kernels_cache();
-            auto kernels = kernels_cache.compile({weights_params.clKernel->code.kernelString});
+            auto kernels = kernels_cache.compile(*_impl_params, {weights_params.clKernel->code.kernelString});
             OPENVINO_ASSERT(kernels.size() == 1, "The output of kernel compile has issue");
-            kernel = kernels.begin()->second;
+            kernel = (kernels.begin()->second)[0];
             cache.add(kernel_key, kernel);
         }
 
@@ -1096,7 +1096,7 @@ static primitive_id find_dep_by_mem(const cldnn::primitive_inst* p_inst, memory&
 //     [ intermediate memory information ]
 void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
     _impl_params->save(ob);
-    ob.setKernlImplParams(_impl_params.get());
+    ob.setKernelImplParams(_impl_params.get());
 
     ob << _node_output_layout;
     ob << has_mutable_input();
@@ -1169,6 +1169,7 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
 
     if (_impl != nullptr) {
         ob << true;
+        _impl->set_cached_kernel_ids(_network.get_program()->get_kernels_cache());
         ob << _impl;
     } else {
         ob << false;
@@ -1186,7 +1187,7 @@ int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
 
 void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
     _impl_params->load(ib);
-    ib.setKernlImplParams(_impl_params.get());
+    ib.setKernelImplParams(_impl_params.get());
 
     ib >> _node_output_layout;
     ib >> _has_mutable_input;
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index e4d9609c33e7b6..5eff06f4276e5f 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -229,14 +229,6 @@ std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor
     return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
 }
 
-kernel_id program::add_kernel(const std::shared_ptr<kernel_string>& kernelSring) {
-    return _kernels_cache->set_kernel_source(kernelSring, false);
-}
-
-kernel::ptr program::get_kernel(kernel_id id) {
-    return _kernels_cache->get_kernel(id);
-}
-
 kernels_cache& program::get_kernels_cache() const {
     return *_kernels_cache;
 }
@@ -1640,10 +1632,6 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
     return std::make_pair(const_sum, get_engine().get_used_device_memory(allocation_type::usm_device));
 }
 
-void program::remove_kernel(kernel_id id) {
-    _kernels_cache->remove_kernel(id);
-}
-
 void program::cancel_compilation_context() {
     if (_compilation_context != nullptr)
         _compilation_context->cancel();
diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
index 6aaa2f7385df4c..17934f5d414840 100644
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
@@ -54,7 +54,6 @@ std::string reorder_options(const std::string& org_options) {
 }  // namespace
 
 namespace cldnn {
-std::atomic<size_t> kernels_cache::_kernel_idx{0};
 std::mutex kernels_cache::_mutex;
 
 std::string kernels_cache::get_cache_path() const {
@@ -90,48 +89,54 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildAll::GetProgramSource");
     std::map<std::string, std::tuple<int32_t, std::vector<batch_program>>> program_buckets;
 
-    for (const auto& code : kernels_source_code) {
-        std::string full_code = code.kernel_strings->jit + code.kernel_strings->str + code.kernel_strings->undefs;
-        std::string entry_point = code.kernel_strings->entry_point;
-        std::string options = code.kernel_strings->options;
-        bool batch_compilation = code.kernel_strings->batch_compilation;
+    for (const auto& k : kernels_source_code) {
+        auto& code = k.second;
         bool dump_custom_program = code.dump_custom_program;
 
-        if (batch_compilation) {
-            options = reorder_options(options);
-        }
+        for (auto kernel_string : code.kernel_strings) {
+            std::string full_code = kernel_string->jit + kernel_string->str + kernel_string->undefs;
+            std::string entry_point = kernel_string->entry_point;
+            std::string options = kernel_string->options;
+            bool batch_compilation = kernel_string->batch_compilation;
 
-        std::string key = options;
+            if (batch_compilation) {
+                options = reorder_options(options);
+            }
 
-        if (batch_compilation == false) {
-            key += " __PROGRAM__" + std::to_string(program_buckets.size());
-        }
+            std::string key = options;
 
-        if (dump_custom_program) {
-            key += " __DUMP_CUSTOM_PROGRAM__";  // Adding label to key so it would be separated from other programs
-        }
+            if (batch_compilation == false) {
+                key += " __PROGRAM__" + std::to_string(program_buckets.size());
+            }
 
-        auto& bucket_id = std::get<0>(program_buckets[key]);
-        auto& current_bucket = std::get<1>(program_buckets[key]);
-        if (current_bucket.empty()) { // new bucket
-            const auto& batch_id = 0;
-            // increase bucket id if and only if new bucket comes
-            bucket_id = static_cast<int32_t>(program_buckets.size() - 1);
-            current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_header_str));
-        }
+            if (dump_custom_program) {
+                key += " __DUMP_CUSTOM_PROGRAM__";  // Adding label to key so it would be separated from other programs
+            }
 
-        // Create new kernels batch when the limit is reached
-        if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch()) {
-            const auto& batch_id = static_cast<int32_t>(current_bucket.size());
-            current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_header_str));
-        }
+            auto& bucket_id = std::get<0>(program_buckets[key]);
+            auto& current_bucket = std::get<1>(program_buckets[key]);
+            if (current_bucket.empty()) { // new bucket
+                const auto& batch_id = 0;
+                // increase bucket id if and only if new bucket comes
+                bucket_id = static_cast<int32_t>(program_buckets.size() - 1);
+                current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_header_str));
+            }
 
-        auto& current_batch = current_bucket.back();
-        current_batch.dump_custom_program = dump_custom_program;
-        current_batch.entry_point_to_id[entry_point] = code.id;
+            // Create new kernels batch when the limit is reached
+            // and current kernel's entry_point is duplicated in this kernels batch
+            if (current_bucket.back().kernels_counter >= get_max_kernels_per_batch()
+                || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end()) {
+                const auto& batch_id = static_cast<int32_t>(current_bucket.size());
+                current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_header_str));
+            }
+
+            auto& current_batch = current_bucket.back();
+            current_batch.dump_custom_program = dump_custom_program;
+            current_batch.entry_point_to_id.emplace(entry_point, code.params);
 
-        current_batch.source.push_back(std::move(full_code));
-        current_batch.kernels_counter++;
+            current_batch.source.push_back(std::move(full_code));
+            current_batch.kernels_counter++;
+        }
     }
 
     // Compute hash value for each batch
@@ -165,13 +170,6 @@ kernels_cache::kernels_cache(engine& engine,
     , _prog_id(prog_id)
     , batch_header_str(std::move(batch_header_str)) { }
 
-kernel_id kernels_cache::set_kernel_source(
-    const std::shared_ptr<kernel_string>& kernel_string,
-    bool dump_custom_program) {
-    auto kernel_ids = add_kernels_source({kernel_string}, dump_custom_program);
-    return kernel_ids[0];
-}
-
 static std::vector<unsigned char> getProgramBinaries(cl::Program program) {
     // Get the size of the program binary in bytes.
     std::vector<size_t> binary_sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
@@ -189,7 +187,7 @@ static std::vector<unsigned char> getProgramBinaries(cl::Program program) {
 }
 
 // TODO: This build_batch method should be backend specific
-void kernels_cache::build_batch(const engine& build_engine, const batch_program& batch, std::map<const std::string, kernel::ptr>& compiled_kernels) {
+void kernels_cache::build_batch(const engine& build_engine, const batch_program& batch, compiled_kernels& compiled_kernels) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::build_batch");
 
     auto& cl_build_engine = dynamic_cast<const ocl::ocl_engine&>(build_engine);
@@ -280,13 +278,17 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
             std::lock_guard<std::mutex> lock(_mutex);
             for (auto& k : kernels) {
                 const auto& entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
-                const auto& k_id = batch.entry_point_to_id.find(entry_point);
-                if (k_id != batch.entry_point_to_id.end()) {
+                const auto& iter = batch.entry_point_to_id.find(entry_point);
+                if (iter != batch.entry_point_to_id.end()) {
                     cl_kernel kern = k.get();
                     cl_context context = cl_build_engine.get_cl_context().get();
                     kernel::ptr kernel = kernels_factory::create(_engine, context, kern, entry_point);
-                    const auto& kmap = std::make_pair(k_id->second, kernel);
-                    compiled_kernels.insert(kmap);
+                    auto& params = iter->second;
+                    if (compiled_kernels.find(params) != compiled_kernels.end()) {
+                        compiled_kernels[params].push_back(kernel);
+                    } else {
+                        compiled_kernels[params] = { kernel };
+                    }
                 } else {
                     throw std::runtime_error("Could not find entry point");
                 }
@@ -328,14 +330,28 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
     }
 }
 
-kernel::ptr kernels_cache::get_kernel(kernel_id id) const {
-    if (_pending_compilation)
-        throw std::runtime_error("Kernel cache is not compiled, call build_all() first!");
+kernel::ptr kernels_cache::get_kernel_from_cached_kernels(std::string id) const {
+    auto res = _cached_kernels.find(id);
+    OPENVINO_ASSERT(_cached_kernels.end() != res, "[GPU] Kernel " + id + " not found in the cached kernel cache!");
+    return res->second->clone();
+}
+
+std::vector<kernel::ptr> kernels_cache::get_kernels(kernel_impl_params params) const {
+    OPENVINO_ASSERT((_pending_compilation == false), "Kernel cache is not compiled, call build_all() first!");
+
+    std::string current_node_id;
+    if (params.desc) {
+        current_node_id = params.desc->id;
+    }
+    auto res = _kernels.find(params);
+    OPENVINO_ASSERT(_kernels.end() != res, "Kernel for {" + current_node_id + "} is not found in the kernel cache!");
 
-    auto res = _kernels.find(id);
-    if (_kernels.end() == res)
-        throw std::runtime_error("Kernel " + id + " not found in the kernel cache!");
-    return res->second;
+    std::vector<kernel::ptr> kernels;
+    kernels.reserve(res->second.size());
+    for (auto& k : res->second) {
+        kernels.emplace_back(k->clone());
+    }
+    return kernels;
 }
 
 bool kernels_cache::validate_simple_kernel_execution(kernel::ptr krl) {
@@ -430,117 +446,110 @@ void kernels_cache::reset() {
     _pending_compilation = false;
 }
 
-std::vector<kernel_id> kernels_cache::add_kernels_source(std::vector<std::shared_ptr<kernel_string>> kernel_sources, bool dump_custom_program) {
-    std::vector<kernel_id> kernel_ids;
-    kernel_ids.reserve(kernel_sources.size());
-    for (size_t i = 0; i < kernel_sources.size(); ++i) {
-        std::lock_guard<std::mutex> lock(_mutex);
-        auto kernel_string = kernel_sources[i];
-        kernel_id id = gen_kernel_id(kernel_string->entry_point);
-        auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program);
+void kernels_cache::add_kernels_source(const kernel_impl_params& params,
+                                        const std::vector<std::shared_ptr<kernel_string>>& kernel_sources,
+                                        bool dump_custom_program) {
+    std::lock_guard<std::mutex> lock(_mutex);
+
+    if (!kernel_sources.empty() && (_kernels_code.find(params) == _kernels_code.end())) {
+        auto res = _kernels_code.insert({params, {kernel_sources, params, dump_custom_program}});
 
-        assert(_kernels.find(id) == _kernels.end());
+        assert(_kernels.find(params) == _kernels.end());
         if (res.second) {
             _pending_compilation = true;
         }
-        kernel_ids.emplace_back(id);
     }
-    return kernel_ids;
 }
 
-void kernels_cache::add_kernels(const std::vector<std::string>& kernel_ids, const std::vector<kernel::ptr>& kernels) {
-    OPENVINO_ASSERT(kernel_ids.size() == kernels.size(), "[GPU] The sizes of kernel_ids and kernels are different.");
-
-    for (size_t i = 0; i < kernel_ids.size(); i++) {
-        const auto& kmap = std::make_pair(kernel_ids[i], kernels[i]);
-        _kernels.insert(kmap);
-        _kernel_idx++;
-    }
-}
+std::string kernels_cache::get_cached_kernel_id(kernel::ptr kernel) const {
+    auto ocl_kernel = std::static_pointer_cast<cldnn::ocl::ocl_kernel>(kernel);
+    const auto& entry_point = ocl_kernel->get_handle().getInfo<CL_KERNEL_FUNCTION_NAME>();
+    auto program = ocl_kernel->get_handle().getInfo<CL_KERNEL_PROGRAM>();
+    cl::vector<unsigned char> program_binaries = getProgramBinaries(program);
 
-void kernels_cache::save(BinaryOutputBuffer& ob) const {
-    OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] Not supported engine type");
+    auto iter = _cached_binaries.find(program_binaries);
+    OPENVINO_ASSERT(iter != _cached_binaries.end(), "[GPU] Not found cached kernel binaries");
 
-    std::map<std::string, std::string> entry_point_to_id;
-    for (auto iter = _kernels.begin(); iter != _kernels.end(); iter++) {
-        std::string k_id = iter->first;
-        kernel::ptr kernel = iter->second;
+    return entry_point + "@" + std::to_string(iter->second);
+}
 
-        auto ocl_kernel = std::static_pointer_cast<cldnn::ocl::ocl_kernel>(kernel);
-        const auto& entry_point = ocl_kernel->get_handle().getInfo<CL_KERNEL_FUNCTION_NAME>();
+std::vector<std::string> kernels_cache::get_cached_kernel_ids(const std::vector<kernel::ptr>& kernels) const {
+    std::vector<std::string> kernel_ids;
 
-        entry_point_to_id[entry_point] = k_id;
+    for (auto& kernel : kernels) {
+        auto key = get_cached_kernel_id(kernel);
+        kernel_ids.emplace_back(key);
     }
-    ob << entry_point_to_id;
 
-    std::unique_ptr<ocl::ocl_engine> build_engine = cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl);
+    return kernel_ids;
+}
 
-    std::vector<std::vector<unsigned char>> precompiled_kernels;
+void kernels_cache::add_to_cached_kernels(const std::vector<kernel::ptr>& kernels) {
+    static std::atomic<uint32_t> id_gen{0};
 
-    for (auto iter = _kernels.begin(); iter != _kernels.end(); iter++) {
-        kernel::ptr kernel = iter->second;
+    for (auto& kernel : kernels) {
         auto ocl_kernel = std::static_pointer_cast<cldnn::ocl::ocl_kernel>(kernel);
         auto program = ocl_kernel->get_handle().getInfo<CL_KERNEL_PROGRAM>();
-        const auto& entry_point = ocl_kernel->get_handle().getInfo<CL_KERNEL_FUNCTION_NAME>();
-        const auto& k_id = entry_point_to_id.find(entry_point);
+        cl::vector<unsigned char> program_binaries = getProgramBinaries(program);
 
-        if (k_id != entry_point_to_id.end()) {
-            cl::Program::Binaries binary_kernels = {getProgramBinaries(program)};
+        std::lock_guard<std::mutex> lock(_mutex);
+        auto iter = _cached_binaries.find(program_binaries);
+        if (iter == _cached_binaries.end()) {
+            _cached_binaries[program_binaries] = id_gen++;
+        }
+        auto key = get_cached_kernel_id(kernel);
 
-            try {
-                cl::vector<cl::Kernel> kernels;
-                cl::Program programs(build_engine->get_cl_context(), {build_engine->get_cl_device()}, binary_kernels);
-                programs.build({build_engine->get_cl_device()});
-                programs.createKernels(&kernels);
+        if (_cached_kernels.find(key) == _cached_kernels.end()) {
+            _cached_kernels[key] = kernel;
+        }
+    }
+}
 
-                for (auto& k : kernels) {
-                    const auto& entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
-                    entry_point_to_id.erase(entry_point);
-                }
+void kernels_cache::save(BinaryOutputBuffer& ob) const {
+    OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] Not supported engine type");
 
-                precompiled_kernels.push_back(std::move(binary_kernels[0]));
-            } catch (const cl::BuildError& err) {
-                std::string err_log = "";
-                for (auto& p : err.getBuildLog()) {
-                    err_log += p.second + '\n';
-                }
-                IE_THROW() << err_log;
-            }
-        }
+    ob << _cached_binaries.size();
+    for (auto& cached_binary : _cached_binaries) {
+        ob << cached_binary.second;
+        ob << cached_binary.first;
     }
-    ob << precompiled_kernels;
 }
 
 void kernels_cache::load(BinaryInputBuffer& ib) {
     OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] Not supported engine type");
 
+    std::unordered_map<uint32_t, std::vector<unsigned char>> precompiled_kernels;
+
+    size_t num_cached_binaries;
+    ib >> num_cached_binaries;
+    for (size_t i = 0; i < num_cached_binaries; ++i) {
+        uint32_t id;
+        ib >> id;
+        ib >> precompiled_kernels[id];
+    }
+
     std::unique_ptr<ocl::ocl_engine> build_engine =
         cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl);
 
-    std::map<std::string, std::string> entry_point_to_id;
-    std::vector<std::vector<unsigned char>> precompiled_kernels;
-    ib >> entry_point_to_id;
-    ib >> precompiled_kernels;
-
     try {
         std::lock_guard<std::mutex> lock(_mutex);
-        _kernels.clear();
+        _cached_kernels.clear();
 
-        for (auto& binary_kernels : precompiled_kernels) {
+        for (auto& precompiled_kernel : precompiled_kernels) {
             cl::vector<cl::Kernel> kernels;
-            cl::Program program(build_engine->get_cl_context(), {build_engine->get_cl_device()}, {binary_kernels});
+            cl::Program program(build_engine->get_cl_context(), {build_engine->get_cl_device()}, {precompiled_kernel.second});
             program.build({build_engine->get_cl_device()});
             program.createKernels(&kernels);
 
             for (auto& k : kernels) {
                 const auto& entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
-                const auto& k_id = entry_point_to_id.find(entry_point);
-                if (k_id != entry_point_to_id.end()) {
+                std::string cached_kernel_id = entry_point + "@" + std::to_string(precompiled_kernel.first);
+                const auto& iter = _cached_kernels.find(cached_kernel_id);
+                if (iter == _cached_kernels.end()) {
                     cl_kernel cl_kernel = k.get();
                     cl_context cl_context = build_engine->get_cl_context().get();
                     kernel::ptr kernel = kernels_factory::create(_engine, cl_context, cl_kernel, entry_point);
-                    _kernels.insert({k_id->second, kernel});
-                    _kernel_idx++;
+                    _cached_kernels[cached_kernel_id] = kernel;
                 }
             }
         }
@@ -553,16 +562,15 @@ void kernels_cache::load(BinaryInputBuffer& ib) {
     }
 }
 
-std::map<const std::string, kernel::ptr> kernels_cache::compile(std::vector<std::shared_ptr<kernel_string>> kernel_sources,
-                                                                                        bool dump_custom_program) {
+kernels_cache::compiled_kernels kernels_cache::compile(const kernel_impl_params& params,
+                                            const std::vector<std::shared_ptr<kernel_string>>& kernel_sources,
+                                            bool dump_custom_program) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::Compile_ThreadSafe");
     kernels_code t_kernels_code;
 
     // Get kernels code from kernel sources
-    for (size_t idx = 0; idx < kernel_sources.size(); ++idx) {
-        auto kernel_string = kernel_sources[idx];
-        kernel_id id = gen_kernel_id(kernel_string->entry_point);
-        t_kernels_code.emplace(kernel_string, id, dump_custom_program);
+    for (size_t k = 0; k < kernel_sources.size(); ++k) {
+        t_kernels_code.insert({params, {kernel_sources, params, dump_custom_program}});
     }
 
     ocl::ocl_engine& _build_engine = downcast<ocl::ocl_engine>(_engine);
@@ -571,7 +579,7 @@ std::map<const std::string, kernel::ptr> kernels_cache::compile(std::vector<std:
     std::vector<batch_program> batches;
     get_program_source(t_kernels_code, &batches);
 
-    std::map<const std::string, kernel::ptr> output_kernels;
+    compiled_kernels output_kernels;
     // Build batches
     for (size_t idx = 0; idx < batches.size(); ++idx) {
         build_batch(_build_engine, batches[idx], output_kernels);
diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
index 79f9ad625d3197..eac55ebb12ca70 100644
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
@@ -8,6 +8,7 @@
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/kernel.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/graph/kernel_impl_params.hpp"
 
 #include <map>
 #include <mutex>
@@ -25,6 +26,27 @@ namespace cldnn {
 
 class kernels_cache {
 public:
+    struct kernel_code {
+        std::vector<std::shared_ptr<kernel_string>> kernel_strings;
+        kernel_impl_params params;
+        bool dump_custom_program;
+
+        kernel_code(const std::vector<std::shared_ptr<kernel_string>>& _kernel_strings,
+                    const kernel_impl_params& _params,
+                    bool _dump_custom_program)
+            : kernel_strings(_kernel_strings),
+                params(_params),
+                dump_custom_program(_dump_custom_program) {}
+    };
+
+    struct impl_hasher {
+        size_t operator()(const kernel_impl_params &k) const {
+            return k.hash();
+        }
+    };
+
+    using kernels_code = std::unordered_map<kernel_impl_params, kernel_code, impl_hasher>;
+
     using source_code = std::vector<std::string>;
     struct batch_program {
         int32_t bucket_id;
@@ -34,7 +56,7 @@ class kernels_cache {
         source_code source;
         std::string options;
         bool dump_custom_program;
-        std::map<std::string, std::string> entry_point_to_id;
+        std::map<std::string, kernel_impl_params> entry_point_to_id;
 
         explicit batch_program(int32_t _bucket_id, int32_t _batch_id, std::string _options, const std::vector<std::string>& batch_header_str)
             : bucket_id(_bucket_id),
@@ -48,32 +70,7 @@ class kernels_cache {
         }
     };
 
-    struct kernel_code {
-        std::shared_ptr<kernel_string> kernel_strings;
-        std::string id;
-        bool dump_custom_program;
-        size_t hash_value;
-
-        kernel_code(const std::shared_ptr<kernel_string>& _kernel_strings,
-                    const std::string& _id,
-                    bool _dump_custom_program)
-            : kernel_strings(_kernel_strings),
-              id(_id),
-              dump_custom_program(_dump_custom_program),
-              hash_value(_kernel_strings->get_hash()) {}
-
-        bool operator == (const kernel_code& rhs) const {
-            return (hash_value == rhs.hash_value);
-        }
-    };
-
-    struct cmp_kernel_code {
-        bool operator()(const kernel_code& x1, const kernel_code& x2) const {
-            return (x1.hash_value < x2.hash_value);
-        }
-    };
-
-    using kernels_code = std::set<kernel_code, cmp_kernel_code>;
+    using compiled_kernels = std::unordered_map<kernel_impl_params, std::vector<kernel::ptr>, impl_hasher>;
 
 private:
     static std::mutex _mutex;
@@ -82,32 +79,27 @@ class kernels_cache {
     ExecutionConfig _config;
     uint32_t _prog_id = 0;
     kernels_code _kernels_code;
-    static std::atomic<size_t> _kernel_idx;
     std::atomic<bool> _pending_compilation{false};
-    std::map<const std::string, kernel::ptr> _kernels;
+    compiled_kernels _kernels;
+    std::map<std::vector<unsigned char>, uint32_t> _cached_binaries;
+    std::unordered_map<std::string, kernel::ptr> _cached_kernels;
     std::vector<std::string> batch_header_str;
 
     void get_program_source(const kernels_code& kernels_source_code, std::vector<batch_program>*) const;
-    void build_batch(const engine& build_engine, const batch_program& batch, std::map<const std::string, kernel::ptr>& compiled_kernels);
+    void build_batch(const engine& build_engine, const batch_program& batch, compiled_kernels& compiled_kernels);
 
     std::string get_cache_path() const;
     bool is_cache_enabled() const;
     size_t get_max_kernels_per_batch() const;
 
-    inline std::string gen_kernel_id(std::string entry_point) {
-        // we need unique id in order to avoid conflict across topologies.
-        return entry_point + "_" + std::to_string((_kernel_idx++));
-    }
-
 public:
     explicit kernels_cache(engine& engine,
                            const ExecutionConfig& config,
                            uint32_t prog_id,
                            InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr,
                            const std::vector<std::string>& batch_header_str = {});
-    kernel_id set_kernel_source(const std::shared_ptr<kernel_string>& kernel_string,
-                                bool dump_custom_program);
-    kernel::ptr get_kernel(kernel_id id) const;
+    kernel::ptr get_kernel_from_cached_kernels(std::string id) const;
+    std::vector<kernel::ptr> get_kernels(kernel_impl_params params) const;
     void set_batch_header_str(const std::vector<std::string> &batch_headers) {
         batch_header_str = std::move(batch_headers);
     }
@@ -117,14 +109,20 @@ class kernels_cache {
     // forces compilation of all pending kernels/programs
     void build_all();
     void reset();
-    void remove_kernel(kernel_id id) {
-        _kernels.erase(id);
-    }
-    std::vector<kernel_id> add_kernels_source(std::vector<std::shared_ptr<kernel_string>> kernel_sources, bool dump_custom_program = false);
-    void add_kernels(const std::vector<std::string>& kernel_ids, const std::vector<kernel::ptr>& kernels);
+
+    void add_kernels_source(const kernel_impl_params& params,
+                                const std::vector<std::shared_ptr<kernel_string>>& kernel_sources,
+                                bool dump_custom_program = false);
+    compiled_kernels compile(const kernel_impl_params& params,
+                                const std::vector<std::shared_ptr<kernel_string>>& kernel_sources,
+                                bool dump_custom_program = false);
+
+    std::string get_cached_kernel_id(kernel::ptr kernel) const;
+    std::vector<std::string> get_cached_kernel_ids(const std::vector<kernel::ptr>& kernels) const;
+    void add_to_cached_kernels(const std::vector<kernel::ptr>& kernels);
+
     void save(BinaryOutputBuffer& ob) const;
     void load(BinaryInputBuffer& ib);
-    std::map<const std::string, kernel::ptr> compile(std::vector<std::shared_ptr<kernel_string>> kernel_sources, bool dump_custom_program = false);
 };
 
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp b/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp
new file mode 100644
index 00000000000000..c122a70ed723bd
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include "intel_gpu/runtime/engine.hpp"
+
+#include "intel_gpu/graph/program.hpp"
+#include "data_inst.h"
+#include "eltwise_inst.h"
+#include "reshape_inst.h"
+#include "shape_of_inst.h"
+#include "fully_connected_inst.h"
+#include "permute_inst.h"
+#include "reduce_inst.h"
+#include "intel_gpu/graph/network.hpp"
+#include "pass_manager.h"
+#include "to_string_utils.h"
+
+#include "program_wrapper.h"
+
+#include <memory>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(kernels_cache, reuse_kernel_for_static_model_01) {
+    auto& engine = get_test_engine();
+
+    auto input0 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto input1 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto input2 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto input3 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto input4 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto input5 = engine.allocate_memory({{1, 1, 4, 5}, data_types::f16, format::bfyx});
+    auto weights1 = engine.allocate_memory({{1, 3, 2, 3 }, data_types::f16, format::bfyx});
+    auto weights2 = engine.allocate_memory({{1, 3, 2, 3 }, data_types::f16, format::bfyx});
+
+    topology topology(input_layout("input0", input0->get_layout()),
+                      input_layout("input1", input1->get_layout()),
+                      input_layout("input2", input2->get_layout()),
+                      input_layout("input3", input3->get_layout()),
+                      input_layout("input4", input4->get_layout()),
+                      input_layout("input5", input5->get_layout()),
+                      data("weights1", weights1),
+                      data("weights2", weights2),
+                      concatenation("concat1",
+                                    { input_info("input0"), input_info("input1"), input_info("input2") },
+                                    1,
+                                    data_types::f16,
+                                    padding{{0, 0, 0, 0}, 0}),
+                      convolution("conv1", input_info("concat1"), { "weights1" }, { 1, 1 }),
+                      concatenation("concat2",
+                                    { input_info("input3"), input_info("input4"), input_info("input5") },
+                                    1,
+                                    data_types::f16,
+                                    padding{{0, 0, 0, 0}, 0}),
+                      convolution("conv2", input_info("concat2"), { "weights2" }, { 1, 1 }),
+                      eltwise("sum", {input_info("concat1"), input_info("concat2")}, eltwise_mode::sum),
+                      reorder("output", input_info("sum"), {{3, 2}, data_types::f16, format::bfyx}));
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    auto prog = program::build_program(engine, topology, config, false, false);
+    auto& cache = prog->get_kernels_cache();
+    auto& conv1_node = prog->get_node("conv1");
+    auto& conv2_node = prog->get_node("conv2");
+    auto conv1_kernels = conv1_node.get_selected_impl()->get_kernels();
+    cache.add_to_cached_kernels(conv1_kernels);
+    auto conv2_kernels = conv2_node.get_selected_impl()->get_kernels();
+    cache.add_to_cached_kernels(conv2_kernels);
+    ASSERT_EQ(conv1_kernels.size(), conv2_kernels.size());
+    for (size_t idx = 0; idx < conv1_kernels.size(); idx++) {
+        auto conv1_kern = cache.get_cached_kernel_id(conv1_kernels[idx]);
+        auto conv2_kern = cache.get_cached_kernel_id(conv2_kernels[idx]);
+        ASSERT_EQ(conv1_kern, conv2_kern);
+    }
+
+    auto& concat1_node = prog->get_node("concat1");
+    auto& concat2_node = prog->get_node("concat2");
+    auto concat1_kernels = concat1_node.get_selected_impl()->get_kernels();
+    cache.add_to_cached_kernels(concat1_kernels);
+    auto concat2_kernels = concat2_node.get_selected_impl()->get_kernels();
+    cache.add_to_cached_kernels(concat2_kernels);
+    ASSERT_EQ(concat1_kernels.size(), concat2_kernels.size());
+    for (size_t idx = 0; idx < concat1_kernels.size(); idx++) {
+        auto concat1_kern = cache.get_cached_kernel_id(concat1_kernels[idx]);
+        auto concat2_kern = cache.get_cached_kernel_id(concat2_kernels[idx]);
+        ASSERT_EQ(concat1_kern, concat2_kern);
+    }
+}

From 38c924a3ae6e13d7ff941b0c60777c6e6fc20a4a Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Tue, 28 Mar 2023 20:49:26 +0400
Subject: [PATCH 134/296] [Snippets] Added support of BF16/I8/U8 for MatMul
 (#15063)

---
 src/common/snippets/CMakeLists.txt            |   2 +-
 .../snippets/include/snippets/generator.hpp   |  23 +-
 .../snippets/include/snippets/op/brgemm.hpp   |  29 +-
 .../include/snippets/op/broadcastload.hpp     |  11 +-
 .../snippets/include/snippets/op/buffer.hpp   |  32 +-
 .../snippets/include/snippets/op/load.hpp     |  12 +-
 .../include/snippets/op/memory_access.hpp     |  60 +-
 .../snippets/include/snippets/op/store.hpp    |   9 +-
 .../snippets/pass/assign_registers.hpp        |   7 +-
 .../snippets/include/snippets/utils.hpp       |  21 +
 src/common/snippets/src/generator.cpp         |  47 +-
 src/common/snippets/src/op/brgemm.cpp         | 123 +++-
 src/common/snippets/src/op/broadcastload.cpp  |   8 +-
 src/common/snippets/src/op/buffer.cpp         |  66 +-
 src/common/snippets/src/op/load.cpp           |  17 +-
 src/common/snippets/src/op/memory_access.cpp  |  71 ++-
 src/common/snippets/src/op/store.cpp          |  13 +-
 src/common/snippets/src/op/subgraph.cpp       |  80 ++-
 .../snippets/src/pass/assign_registers.cpp    |  65 +-
 .../snippets/src/pass/collapse_subgraph.cpp   |  11 +-
 .../src/pass/fuse_transpose_brgemm.cpp        |  11 +-
 .../snippets/src/pass/insert_buffer.cpp       |   4 +-
 .../snippets/src/pass/insert_load_store.cpp   |   4 +-
 .../load_movebroadcast_to_broadcastload.cpp   |   8 +-
 src/common/snippets/src/pass/loop_fusion.cpp  |   1 -
 .../snippets/src/pass/matmul_to_brgemm.cpp    |  10 +-
 src/common/snippets/src/pass/reset_buffer.cpp |   6 +-
 .../src/pass/softmax_decomposition.cpp        |   2 +-
 .../snippets/src/pass/vector_to_scalar.cpp    |   4 +-
 src/common/snippets/src/utils.cpp             |  11 +
 .../snippets/tests/include/lowering_utils.hpp |   3 +
 .../set_scalar_count_for_load_and_store.cpp   |  57 +-
 src/common/snippets/tests/src/registers.cpp   |  16 +-
 .../intel_cpu/src/emitters/cpu_generator.cpp  |  18 +-
 .../intel_cpu/src/emitters/cpu_generator.hpp  |   3 +
 .../src/emitters/jit_snippets_emitters.cpp    | 578 ++++++++++++++----
 .../src/emitters/jit_snippets_emitters.hpp    |  83 ++-
 src/plugins/intel_cpu/src/extension.cpp       |   6 +-
 src/plugins/intel_cpu/src/nodes/subgraph.cpp  |   2 +
 .../brgemm_to_brgemm_cpu.cpp                  |  96 +++
 .../brgemm_to_brgemm_cpu.hpp                  |  45 ++
 .../fuse_load_store_and_convert.cpp           |  10 +-
 .../op/brgemm_copy_b.cpp                      |  78 +++
 .../op/brgemm_copy_b.hpp                      |  51 ++
 .../op/brgemm_cpu.cpp                         | 117 ++++
 .../op/brgemm_cpu.hpp                         |  55 ++
 .../op/load_convert.cpp                       |   8 +-
 .../op/store_convert.cpp                      |   8 +-
 .../intel_cpu/src/transformation_pipeline.cpp |   2 +-
 .../skip_tests_config.cpp                     |   4 +
 .../snippets/matmul.cpp                       |  59 +-
 .../snippets/transpose_matmul.cpp             | 104 +++-
 .../plugin/shared/include/snippets/matmul.hpp |  25 +-
 .../include/snippets/transpose_matmul.hpp     |  17 +-
 .../plugin/shared/src/snippets/matmul.cpp     |  74 +--
 .../shared/src/snippets/transpose_matmul.cpp  |  84 ++-
 .../include/subgraph_lowered.hpp              |   2 +-
 .../include/subgraph_matmul.hpp               |  46 +-
 .../src/subgraph_lowered.cpp                  |   7 +-
 .../src/subgraph_matmul.cpp                   | 115 +++-
 60 files changed, 1952 insertions(+), 589 deletions(-)
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.cpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.hpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.cpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.hpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.cpp
 create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.hpp

diff --git a/src/common/snippets/CMakeLists.txt b/src/common/snippets/CMakeLists.txt
index d3a7e47c60423c..a37d5343092349 100644
--- a/src/common/snippets/CMakeLists.txt
+++ b/src/common/snippets/CMakeLists.txt
@@ -26,7 +26,7 @@ ie_faster_build(${TARGET_NAME}
 )
 
 target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime
-                                     PRIVATE ngraph_reference ov_shape_inference openvino::runtime::dev)
+                                     PRIVATE ngraph_reference openvino::runtime::dev)
 
 target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
                                           PRIVATE $<BUILD_INTERFACE:${SHAPE_INFER_INCLUDE_DIR}>)
diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp
index 939b4f4d43c33d..48715235c11f42 100644
--- a/src/common/snippets/include/snippets/generator.hpp
+++ b/src/common/snippets/include/snippets/generator.hpp
@@ -43,7 +43,6 @@ class TargetMachine {
      */
     virtual size_t get_lanes() const = 0;
 
-
     /**
      * @brief called by generator to all the emitter for a target machine
      * @return a map by node's type info with callbacks to create an instance of emitter for corresponding operation type
@@ -155,7 +154,29 @@ class Generator {
      */
     std::shared_ptr<const TargetMachine> get_target_machine() const;
 
+    /**
+    * @interface opRegType
+    * @brief Register type of operations
+    *        Note that currently there are 4 types of ops:
+    *        gpr->gpr: (Parameter, Result, LoopBegin, LoopEnd etc)
+    *        gpr->vec: or vec->gpr Load/LoadConvert, Store/StoreConvert, BroadcastLoad etc.
+    *        vec->vec: all other "normal" operations that perform calculations on vector registers: Add, BroadcastMove, Power, etc.
+    */
+    enum opRegType {gpr2gpr, gpr2vec, vec2gpr, vec2vec};
+    /**
+     * @brief gets register type by op type
+     *        TODO: Should be static attribute of emitters
+     * @return register type
+     */
+    opRegType get_op_reg_type(const std::shared_ptr<Node>& op) const;
+
 protected:
+    /**
+    * @brief gets register type by specific plugin op type
+    * @return register type
+    */
+    virtual opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const;
+
     std::shared_ptr<TargetMachine> target;
     // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then).
     //  This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method.
diff --git a/src/common/snippets/include/snippets/op/brgemm.hpp b/src/common/snippets/include/snippets/op/brgemm.hpp
index 2746d974a06400..58c70f164799a6 100644
--- a/src/common/snippets/include/snippets/op/brgemm.hpp
+++ b/src/common/snippets/include/snippets/op/brgemm.hpp
@@ -5,7 +5,7 @@
 #pragma once
 
 #include "ngraph/op/op.hpp"
-#include "ngraph/op/matmul.hpp"
+#include "memory_access.hpp"
 
 namespace ngraph {
 namespace snippets {
@@ -16,30 +16,25 @@ namespace op {
  * @brief Brgemm is a batch-reduced matrix multiplication with the support of arbitrary strides between matrices rows
  * @ingroup snippets
  */
-class Brgemm : public ngraph::op::v0::MatMul {
+class Brgemm : public MemoryAccess {
 public:
-    OPENVINO_OP("Brgemm", "SnippetsOpset", ngraph::op::v0::MatMul);
-    Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu);
+    OPENVINO_OP("Brgemm", "SnippetsOpset", MemoryAccess);
+    Brgemm(const Output<Node>& A, const Output<Node>& B,
+           const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu);
     Brgemm() = default;
 
-    bool visit_attributes(AttributeVisitor& visitor) override;
+    size_t get_offset_a() const { return get_input_offset(0); }
+    size_t get_offset_b() const { return get_input_offset(1); }
+    size_t get_offset_c() const { return get_output_offset(0); }
+
     void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
     bool has_evaluate() const override { return false; }
 
-    size_t get_offset_a() const { return m_offset_a; }
-    size_t get_offset_b() const { return m_offset_b; }
-    size_t get_offset_c() const { return m_offset_c; }
-
-    void set_offset_a(const size_t offset) { m_offset_a = offset; }
-    void set_offset_b(const size_t offset) { m_offset_b = offset; }
-    void set_offset_c(const size_t offset) { m_offset_c = offset; }
-
-private:
-    size_t m_offset_a = 0lu;  // offset for first input
-    size_t m_offset_b = 0lu;  // offset for second input
-    size_t m_offset_c = 0lu;  // offset for output
+protected:
+    ov::element::Type get_output_type() const;
+    ov::PartialShape get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const;
 };
 
 } // namespace op
diff --git a/src/common/snippets/include/snippets/op/broadcastload.hpp b/src/common/snippets/include/snippets/op/broadcastload.hpp
index 43f3a329adcf27..edcbe170a371f6 100644
--- a/src/common/snippets/include/snippets/op/broadcastload.hpp
+++ b/src/common/snippets/include/snippets/op/broadcastload.hpp
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include <snippets/op/broadcastmove.hpp>
+#include <snippets/op/memory_access.hpp>
 
 #include "ngraph/op/op.hpp"
 
@@ -17,22 +17,21 @@ namespace op {
  * @brief Is generated for broadcasting by least varying dimension for non-blocked cases and the second varying dimension for blocked
  * @ingroup snippets
  */
-class BroadcastLoad : public BroadcastMove {
+class BroadcastLoad : public MemoryAccess {
 public:
-    OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::BroadcastMove);
+    OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::MemoryAccess);
 
     BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape, size_t offset = 0lu);
     BroadcastLoad() = default;
 
-    size_t get_offset() const { return m_offset; }
-    void set_offset(const size_t offset) { m_offset = offset; }
+    size_t get_offset() const { return get_input_offset(0); }
 
     bool visit_attributes(AttributeVisitor& visitor) override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
     void validate_and_infer_types() override;
 
 private:
-    size_t m_offset = 0lu;
+    ov::PartialShape output_shape;
 };
 
 } // namespace op
diff --git a/src/common/snippets/include/snippets/op/buffer.hpp b/src/common/snippets/include/snippets/op/buffer.hpp
index f75fc95e742edb..8c6f98ac894e93 100644
--- a/src/common/snippets/include/snippets/op/buffer.hpp
+++ b/src/common/snippets/include/snippets/op/buffer.hpp
@@ -12,10 +12,9 @@ namespace op {
 
 /**
  * @interface Buffer
- * @brief The operation is for intermediate data storage
- *        - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank].
- *                 It's needed to allocate needed memory size that depends on Tile rank, for example.
- *                 Default value is -1 (full shape)
+ * @brief This is a base class for memory storage.
+ *        If Buffer has a parent, the operation is for intermediate data storage - IntermediateMemory type.
+ *        Otherwise, the operation is for allocation of new empty memory with shape `m_shape` - NewMemory type
  *        Notes:
  *               - All buffers in a graph have the same memory pointer. So if we have a few buffers,
  *                 each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
@@ -25,21 +24,30 @@ namespace op {
 class Buffer : public ngraph::op::Op {
 public:
     OPENVINO_OP("Buffer", "SnippetsOpset");
-
-    Buffer(const Output<Node>& x, const int32_t allocation_rank = -1);
     Buffer() = default;
+    Buffer(const ov::Shape& shape);
+    Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape);
+    Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank = -1);
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+    void validate_and_infer_types() override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
-    int32_t get_allocation_rank() const { return m_allocation_rank; }
-    void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; }
+    enum Type {
+        NewMemory,
+        IntermediateMemory
+    };
 
+    Type get_type() const { return m_type; }
+    ov::Shape get_allocation_shape() const { return m_shape; }
     size_t get_byte_size() const;
 
-    bool visit_attributes(AttributeVisitor& visitor) override;
-    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-    void validate_and_infer_types() override;
+    bool is_intermediate_memory() const { return m_type == Type::IntermediateMemory; }
+    bool is_new_memory() const { return m_type == Type::NewMemory; }
 
 private:
-    int32_t m_allocation_rank = -1;
+    Type m_type = Type::IntermediateMemory;
+    ov::Shape m_shape = {};
 };
 
 } // namespace op
diff --git a/src/common/snippets/include/snippets/op/load.hpp b/src/common/snippets/include/snippets/op/load.hpp
index bd0a4c5463f560..38acd0e8a10255 100644
--- a/src/common/snippets/include/snippets/op/load.hpp
+++ b/src/common/snippets/include/snippets/op/load.hpp
@@ -20,11 +20,18 @@ namespace op {
  */
 class Load : public MemoryAccess {
 public:
-    OPENVINO_OP("Load", "SnippetsOpset");
+    OPENVINO_OP("Load", "SnippetsOpset", MemoryAccess);
 
     Load(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
     Load() = default;
 
+    size_t get_offset() const { return get_input_offset(0); }
+    size_t get_count() const { return get_input_count(0); }
+
+    void set_offset(size_t offset) { set_input_offset(offset, 0); }
+    void set_count(size_t count) { set_input_count(count, 0); }
+
+    void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 };
 
@@ -41,6 +48,9 @@ class LoadReshape : public Load {
     LoadReshape(const Output<Node>& x, size_t count = 1lu, const size_t offset = 0lu, std::vector<size_t> order = {});
     LoadReshape() = default;
 
+    void set_offset(size_t offset) { set_output_offset(offset, 0); }
+    void set_count(size_t count) { set_output_count(count, 0); }
+
     bool visit_attributes(AttributeVisitor& visitor) override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
     void validate_and_infer_types() override;
diff --git a/src/common/snippets/include/snippets/op/memory_access.hpp b/src/common/snippets/include/snippets/op/memory_access.hpp
index f1b2d8ebb2f00d..7b090c8f65d528 100644
--- a/src/common/snippets/include/snippets/op/memory_access.hpp
+++ b/src/common/snippets/include/snippets/op/memory_access.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,9 +13,9 @@ namespace op {
 /**
  * @interface MemoryAccess
  * @brief This is a base class for memory access operations (like Load and Store).
- *        It provides universal set/get interface to manipulate the number
- *        of elements accessed during one operation call ("count").
- *        Default "count" value is "1" - it means to load/store one element
+ *        It provides universal interface to manipulate with memory: load/store.
+ * @param m_input_ports - vector of input descriptors: variables of PortDescriptor class
+ * @param m_output_ports - vector of output descriptors: variables of PortDescriptor class
  * @ingroup snippets
  */
 
@@ -23,18 +23,54 @@ class MemoryAccess : public ngraph::op::Op {
 public:
     OPENVINO_OP("MemoryAccess", "SnippetsOpset");
 
-    size_t get_count() const;
-    size_t get_offset() const;
-    void set_count(const size_t count);
-    void set_offset(const size_t offset);
+    /**
+    * @interface PortDescriptor
+    * @brief This class describes port of MemoryAccess operation
+    * @param m_count - count of elements to load/store
+    * @param m_offset - starting index of elements to load/store
+    * @param m_index - port index
+    * @ingroup snippets
+    */
+    struct PortDescriptor {
+        PortDescriptor(size_t count, size_t offset) : count(count), offset(offset) {}
+        PortDescriptor() = default;
+
+        size_t count = 0lu;
+        size_t offset = 0lu;
+        size_t index = 0lu;
+
+    private:
+        PortDescriptor(size_t count, size_t offset, size_t index) : count(count), offset(offset), index(index) {}
+
+        friend class MemoryAccess;
+    };
+
+    void set_input_count(size_t count, size_t idx = 0);
+    void set_output_count(size_t count, size_t idx = 0);
+    void set_input_offset(size_t offset, size_t idx = 0);
+    void set_output_offset(size_t offset, size_t idx = 0);
+
+    size_t get_input_count(size_t idx = 0) const;
+    size_t get_output_count(size_t idx = 0) const;
+    size_t get_input_offset(size_t idx = 0) const;
+    size_t get_output_offset(size_t idx = 0) const;
+
+    size_t get_input_port_count() const { return m_input_ports.size(); }
+    size_t get_output_port_count() const { return m_output_ports.size(); }
+
     bool visit_attributes(AttributeVisitor& visitor) override;
-    void validate_and_infer_types() override;
 
 protected:
-    explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu, size_t offset = 0lu);
+    explicit MemoryAccess(const OutputVector& arguments, size_t input_count = 0, size_t output_count = 0);
     MemoryAccess() = default;
-    size_t m_count = 0lu;
-    size_t m_offset = 0lu;
+
+    void set_input_port_descriptor(const PortDescriptor& desc, const size_t i);
+    void set_output_port_descriptor(const PortDescriptor& desc, const size_t i);
+    const PortDescriptor& get_input_port_descriptor(const size_t i) const;
+    const PortDescriptor& get_output_port_descriptor(const size_t i) const;
+
+    std::vector<PortDescriptor> m_input_ports;
+    std::vector<PortDescriptor> m_output_ports;
 };
 
 } // namespace op
diff --git a/src/common/snippets/include/snippets/op/store.hpp b/src/common/snippets/include/snippets/op/store.hpp
index 38715cffc6c74c..b62a4c6ccb18b7 100644
--- a/src/common/snippets/include/snippets/op/store.hpp
+++ b/src/common/snippets/include/snippets/op/store.hpp
@@ -20,11 +20,18 @@ namespace op {
  */
 class Store : public MemoryAccess {
 public:
-    OPENVINO_OP("Store", "SnippetsOpset");
+    OPENVINO_OP("Store", "SnippetsOpset", MemoryAccess);
 
     Store(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
     Store() = default;
 
+    size_t get_offset() const { return get_output_offset(0); }
+    size_t get_count() const { return get_output_count(0); }
+
+    void set_offset(size_t offset) { set_output_offset(offset, 0); }
+    void set_count(size_t count) { set_output_count(count, 0); }
+
+    void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 };
 
diff --git a/src/common/snippets/include/snippets/pass/assign_registers.hpp b/src/common/snippets/include/snippets/pass/assign_registers.hpp
index 144a8678784451..81a5e3b2b29d62 100644
--- a/src/common/snippets/include/snippets/pass/assign_registers.hpp
+++ b/src/common/snippets/include/snippets/pass/assign_registers.hpp
@@ -6,6 +6,8 @@
 
 #include <ngraph/pass/pass.hpp>
 
+#include "snippets/generator.hpp"
+
 namespace ngraph {
 namespace snippets {
 namespace pass {
@@ -18,10 +20,13 @@ namespace pass {
  */
 class AssignRegisters : public ngraph::pass::FunctionPass {
 public:
-    explicit AssignRegisters() {
+    explicit AssignRegisters(const std::function<Generator::opRegType(const std::shared_ptr<Node>& op)>& mapper) : m_reg_type_mapper(mapper) {
         set_property(ngraph::pass::PassProperty::REQUIRE_STATIC_SHAPE, true);
     }
     bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+
+private:
+    std::function<Generator::opRegType(const std::shared_ptr<Node>& op)> m_reg_type_mapper;
 };
 
 }  // namespace pass
diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp
index 253785b516dda5..3325ff42446594 100644
--- a/src/common/snippets/include/snippets/utils.hpp
+++ b/src/common/snippets/include/snippets/utils.hpp
@@ -29,10 +29,31 @@ ov::PartialShape get_port_planar_shape(const Output<Node>& out);
 ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const std::vector<size_t>& layout);
 std::vector<size_t> get_node_output_layout(const std::shared_ptr<Node>& node);
 std::vector<size_t> get_node_output_layout(const Node* node);
+void set_transpose_output_layout(const ov::Output<Node>& port, const std::shared_ptr<opset1::Transpose>& node);
+void set_output_layout(const ov::Output<Node>& port, const std::vector<size_t>& layout);
 
 inline ov::Dimension get_inner_dim(const ov::PartialShape &shape) { return *(shape.rbegin()); }
 inline ov::Dimension get_outer_dim(const ov::PartialShape &shape) { return *(shape.rbegin() + 1); }
 
+inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t {
+    return allocation_rank < 0 ? allocation_rank + static_cast<int32_t>(shape_rank) + 1 : allocation_rank;
+}
+
+template <typename T, typename P>
+constexpr bool one_of(T val, P item) { return val == item; }
+
+template <typename T, typename P, typename... Args>
+constexpr bool one_of(T val, P item, Args... item_others) {
+    return val == item || one_of(val, item_others...);
+}
+
+template <typename T, typename P>
+constexpr bool everyone_is(T val, P item) { return val == item; }
+
+template <typename T, typename P, typename... Args>
+constexpr bool everyone_is(T val, P item, Args... item_others) {
+    return val == item && everyone_is(val, item_others...);
+}
 } // namespace utils
 } // namespace snippets
 } // namespace ngraph
diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp
index 5ff9b9e19e27d0..dba0f139fda495 100644
--- a/src/common/snippets/src/generator.cpp
+++ b/src/common/snippets/src/generator.cpp
@@ -77,8 +77,15 @@ auto tail_transformations(NodeVector& tail, const size_t tail_size, const ngraph
                 }
             }
         } else if (const auto memory_access = std::dynamic_pointer_cast<ngraph::snippets::op::MemoryAccess>(op)) {
-            if (memory_access->get_count() != 1) {
-                memory_access->set_count(tail_size);
+            for (size_t i = 0; i < memory_access->get_input_port_count(); ++i) {
+                if (memory_access->get_input_count(i) > 1) {
+                    memory_access->set_input_count(tail_size, i);
+                }
+            }
+            for (size_t i = 0; i < memory_access->get_output_port_count(); ++i) {
+                if (memory_access->get_output_count(i) > 1) {
+                    memory_access->set_output_count(tail_size, i);
+                }
             }
         }
         updated_tile.push_back(op);
@@ -220,5 +227,41 @@ std::shared_ptr<const TargetMachine> Generator::get_target_machine() const {
     return target;
 }
 
+Generator::opRegType Generator::get_op_reg_type(const std::shared_ptr<Node>& op) const {
+    if (std::dynamic_pointer_cast<opset1::Parameter>(op) ||
+        std::dynamic_pointer_cast<opset1::Result>(op) ||
+        std::dynamic_pointer_cast<op::LoopBegin>(op) ||
+        std::dynamic_pointer_cast<op::LoopEnd>(op) ||
+        std::dynamic_pointer_cast<op::Brgemm>(op) ||
+        std::dynamic_pointer_cast<op::Buffer>(op))
+        return gpr2gpr;
+    else if (std::dynamic_pointer_cast<snippets::op::Load>(op) ||
+             std::dynamic_pointer_cast<snippets::op::BroadcastLoad>(op))
+        return gpr2vec;
+    else if (std::dynamic_pointer_cast<snippets::op::Store>(op))
+        return vec2gpr;
+    else if (ov::op::util::is_unary_elementwise_arithmetic(op) ||
+             ov::op::util::is_binary_elementwise_arithmetic(op) ||
+             ov::op::util::is_binary_elementwise_comparison(op) ||
+             ov::op::util::is_binary_elementwise_logical(op) ||
+             std::dynamic_pointer_cast<opset1::LogicalNot>(op) ||
+             std::dynamic_pointer_cast<opset1::PRelu>(op) ||
+             std::dynamic_pointer_cast<opset1::Convert>(op) ||
+             std::dynamic_pointer_cast<opset1::Select>(op) ||
+             std::dynamic_pointer_cast<op::VectorBuffer>(op) ||
+             std::dynamic_pointer_cast<op::BroadcastMove>(op) ||
+             std::dynamic_pointer_cast<op::Scalar>(op) ||
+             std::dynamic_pointer_cast<op::HorizonMax>(op) ||
+             std::dynamic_pointer_cast<op::HorizonSum>(op))
+        return vec2vec;
+    else
+        return get_specific_op_reg_type(op);
+}
+
+Generator::opRegType Generator::get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const {
+    throw ov::Exception("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!");
+}
+
+
 }// namespace snippets
 }// namespace ngraph
diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp
index 7bf999cb15e423..743653099b8601 100644
--- a/src/common/snippets/src/op/brgemm.cpp
+++ b/src/common/snippets/src/op/brgemm.cpp
@@ -7,56 +7,123 @@
 #include "ngraph/runtime/host_tensor.hpp"
 #include "openvino/core/rt_info.hpp"
 #include "snippets/utils.hpp"
-#include "matmul_shape_inference.hpp"
 
 namespace ngraph {
 namespace snippets {
 namespace op {
 
-Brgemm::Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a, const size_t offset_b, const size_t offset_c)
-    : MatMul(), m_offset_a(offset_a), m_offset_b(offset_b), m_offset_c(offset_c) {
-    set_arguments({A, B});
+Brgemm::Brgemm(const Output<Node>& A, const Output<Node>& B,
+               const size_t offset_a, const size_t offset_b, const size_t offset_c) : MemoryAccess({A, B}, 2, 1) {
     set_output_size(1);
+    set_input_offset(offset_a, 0);
+    set_input_offset(offset_b, 1);
+    set_output_offset(offset_a, 0);
     constructor_validate_and_infer_types();
 }
 
-bool Brgemm::visit_attributes(AttributeVisitor& visitor) {
-    MatMul::visit_attributes(visitor);
-    visitor.on_attribute("offset_a", m_offset_a);
-    visitor.on_attribute("offset_b", m_offset_b);
-    visitor.on_attribute("offset_c", m_offset_c);
-    return true;
-}
-
 void Brgemm::validate_and_infer_types() {
     INTERNAL_OP_SCOPE(Brgemm_validate_and_infer_types);
-    element::Type result_et;
-    NODE_VALIDATION_CHECK(this,
-                          element::Type::merge(result_et, get_input_element_type(0), get_input_element_type(1)),
-                          "Arguments do not have the same element type (arg0 element type: ",
-                          get_input_element_type(0),
-                          ", arg1 element type: ",
-                          get_input_element_type(1),
-                          ").");
     // If no leading dimensions are provided, assume dense row-major inputs-outputs
     NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(),
                           "Brgemm currently supports only static shapes.");
 
-    std::vector<ov::PartialShape> planar_input_shapes;
-    for (const auto& in : input_values())
-        planar_input_shapes.emplace_back(utils::get_port_planar_shape(in));
+    std::vector<ov::PartialShape> planar_input_shapes = {
+            utils::get_port_planar_shape(input_value(0)),
+            utils::get_port_planar_shape(input_value(1))
+    };
 
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    ov::op::v0::shape_infer(this, planar_input_shapes, output_shapes);
+    auto output_shape = get_output_partial_shape(planar_input_shapes);
     const auto& output_layout = utils::get_node_output_layout(this);
-        output_shapes[0] = utils::get_reordered_planar_shape(output_shapes[0], output_layout);
-    set_output_type(0, result_et, output_shapes[0]);
+    set_output_type(0,
+                    get_output_type(),
+                    utils::get_reordered_planar_shape(output_shape, output_layout));
 }
 
 std::shared_ptr<Node> Brgemm::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(Brgemm_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<Brgemm>(new_args.at(0), new_args.at(1), m_offset_a, m_offset_b, m_offset_c);
+    return std::make_shared<Brgemm>(new_args.at(0), new_args.at(1), get_offset_a(), get_offset_b(), get_offset_c());
+}
+
+ov::element::Type Brgemm::get_output_type() const {
+    const auto element_type_a = get_input_element_type(0);
+    const auto element_type_b = get_input_element_type(1);
+    const bool is_f32 = utils::everyone_is(element::f32, element_type_a, element_type_b);
+    const bool is_int8 = utils::one_of(element_type_a, element::i8, element::u8) && element_type_b == element::i8;
+    const bool is_bf16 = utils::everyone_is(element::bf16, element_type_a, element_type_b);
+    if (is_f32 || is_bf16) {
+       return element::f32;
+    } else if (is_int8) {
+        return element::i32;
+    } else {
+        throw ngraph_error("BrgemmCPU node has incompatible input element types: " +
+                            element_type_a.get_type_name() +
+                            " and " +
+                            element_type_b.get_type_name());
+    }
+}
+
+ov::PartialShape Brgemm::get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const {
+    NGRAPH_CHECK(input_shapes.size() == 2, "BRGEMM expects 2 input shapes for shape inference");
+
+    // Note: All majors checks are missed because Brgemm is transformed from MatMul with whole shape infer support
+
+    const auto arg0_shape = input_shapes[0];
+    const auto arg1_shape = input_shapes[1];
+
+    size_t arg0_rank = arg0_shape.size(), arg1_rank = arg1_shape.size();
+
+    // temporary shapes to calculate output shape
+    ov::PartialShape arg0_shape_tmp(arg0_shape), arg1_shape_tmp(arg1_shape);
+
+    // one-dimensional tensors unsqueezing is applied to each input independently.
+    if (arg0_rank == 1) {
+        // If the first input is 1D tensor, it is unsqueezed to 2D tensor (row vector)
+        // by adding axes with size 1 at ROW_INDEX_DIM, to the left of the shape.
+        // For example {S} will be reshaped to {1, S}.
+        arg0_shape_tmp.insert(arg0_shape_tmp.begin(), 1);
+        arg0_rank = arg0_shape_tmp.size();
+    }
+    if (arg1_rank == 1) {
+        // If the second input is 1D tensor, it is unsqueezed to 2D tensor (column vector)
+        // by adding axes with size 1 at COL_INDEX_DIM, to the right of the shape.
+        // For example {S} will be reshaped to {S, 1}.
+        arg1_shape_tmp.insert(arg1_shape_tmp.end(), 1);
+        arg1_rank = arg1_shape_tmp.size();
+    }
+    // Check matrices dimensions compatibility,
+    using DimType = typename std::iterator_traits<typename ov::PartialShape::iterator>::value_type;
+    auto merged_dimension = DimType();
+    auto arg0_col_dim = arg0_shape_tmp[arg0_rank - 1];
+    auto arg1_row_dim = arg1_shape_tmp[arg1_rank - 2];
+    OPENVINO_ASSERT(DimType::merge(merged_dimension, arg0_col_dim, arg1_row_dim) || arg0_col_dim.is_dynamic() || arg1_row_dim.is_dynamic(),
+                    "Incompatible Brgemm matrix dimension");
+
+    // add 1 to begin to align shape ranks if needed
+    if (arg0_rank < arg1_rank)
+        arg0_shape_tmp.insert(arg0_shape_tmp.begin(), arg1_rank - arg0_rank, 1);
+    else if (arg0_rank > arg1_rank)
+        arg1_shape_tmp.insert(arg1_shape_tmp.begin(), arg0_rank - arg1_rank, 1);
+
+    size_t max_rank = arg0_shape_tmp.size();
+    std::vector<DimType> output_shape(max_rank);
+    for (size_t i = 0; i < max_rank - 2; ++i) {
+         OPENVINO_ASSERT(DimType::broadcast_merge(output_shape[i], arg0_shape_tmp[i], arg1_shape_tmp[i]) ||
+                         arg0_shape_tmp[i].is_dynamic() ||
+                         arg1_shape_tmp[i].is_dynamic(),
+                        "Incompatible Brgemm batch dimension");
+    }
+    output_shape[output_shape.size() - 2] = arg0_shape_tmp[arg0_shape_tmp.size() - 2];  // M
+    output_shape[output_shape.size() - 1] = arg1_shape_tmp[arg1_shape_tmp.size() - 1];  // N
+
+    // removing the temporary axes from originally 1D tensors.
+    if (arg0_shape.rank().get_length() == 1) {
+        output_shape.erase(output_shape.begin() + output_shape.size() - 2);
+    }
+    if (arg1_shape.rank().get_length() == 1) {
+        output_shape.erase(output_shape.begin() + output_shape.size() - 1);
+    }
+    return output_shape;
 }
 
 } // namespace op
diff --git a/src/common/snippets/src/op/broadcastload.cpp b/src/common/snippets/src/op/broadcastload.cpp
index 0f4e6c7667e2d1..ccbb5f9b9af9a7 100644
--- a/src/common/snippets/src/op/broadcastload.cpp
+++ b/src/common/snippets/src/op/broadcastload.cpp
@@ -12,20 +12,20 @@ using namespace std;
 using namespace ngraph;
 
 snippets::op::BroadcastLoad::BroadcastLoad(const Output<Node>& x, ov::PartialShape shape, size_t offset)
-    : BroadcastMove(x, std::move(shape)), m_offset(offset) {
+    : MemoryAccess({x}, 1, 0), output_shape(std::move(shape)) {
+    set_input_port_descriptor({1, offset}, 0);
     constructor_validate_and_infer_types();
 }
 
 bool snippets::op::BroadcastLoad::visit_attributes(AttributeVisitor& visitor) {
-    BroadcastMove::visit_attributes(visitor);
-    visitor.on_attribute("offset", m_offset);
+    MemoryAccess::visit_attributes(visitor);
     return true;
 }
 
 std::shared_ptr<Node> snippets::op::BroadcastLoad::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(BroadcastLoad);
     check_new_args_count(this, new_args);
-    return std::make_shared<BroadcastLoad>(new_args.at(0), output_shape, m_offset);
+    return std::make_shared<BroadcastLoad>(new_args.at(0), output_shape, get_offset());
 }
 
 void snippets::op::BroadcastLoad::validate_and_infer_types() {
diff --git a/src/common/snippets/src/op/buffer.cpp b/src/common/snippets/src/op/buffer.cpp
index ad05ae2e046932..8a3963119b832b 100644
--- a/src/common/snippets/src/op/buffer.cpp
+++ b/src/common/snippets/src/op/buffer.cpp
@@ -6,8 +6,8 @@
 
 #include "snippets/op/buffer.hpp"
 #include "snippets/snippets_isa.hpp"
+#include "snippets/utils.hpp"
 
-#include <ngraph/runtime/host_tensor.hpp>
 
 using namespace std;
 using namespace ngraph;
@@ -16,38 +16,64 @@ auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t
     return allocation_rank < 0 ? allocation_rank + static_cast<int32_t>(shape_rank) : allocation_rank;
 }
 
-snippets::op::Buffer::Buffer(const Output<Node>& x, const int32_t allocation_rank) : Op({x}), m_allocation_rank(allocation_rank) {
+snippets::op::Buffer::Buffer(const ov::Shape& shape)
+    : Op(), m_type(Type::NewMemory), m_shape(shape) {
+    constructor_validate_and_infer_types();
+}
+
+snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape)
+    : Op({arg}), m_type(Type::IntermediateMemory), m_shape(shape) {
+    constructor_validate_and_infer_types();
+}
+
+snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank)
+    : Op({arg}), m_type(Type::IntermediateMemory) {
+    const auto pshape = arg.get_partial_shape();
+    OPENVINO_ASSERT(pshape.is_static(), "Buffer supports only static input shape");
+    const auto shape = pshape.get_shape();
+    const auto normalize_rank = utils::normalize_rank(static_cast<int32_t>(allocation_rank), shape.size());
+    const auto offset = static_cast<int32_t>(shape.size()) - normalize_rank;
+    m_shape = {shape.begin() + offset, shape.end()};
     constructor_validate_and_infer_types();
 }
 
 bool snippets::op::Buffer::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(Buffer_visit_attributes);
-    visitor.on_attribute("allocation_rank", m_allocation_rank);
+    visitor.on_attribute("allocation_shape", m_shape);
     return true;
 }
 
+void snippets::op::Buffer::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
+    ov::element::Type output_type;
+    ov::Shape output_shape;
+    if (m_type == Type::NewMemory) {
+        OPENVINO_ASSERT(get_input_size() == 0, "Buffer with new allocated memory must to not have arguments!");
+        output_shape = m_shape;
+        output_type = ov::element::u8;  // 1Byte
+    } else if (m_type == Type::IntermediateMemory) {
+        const auto input_shape = get_input_partial_shape(0);
+        OPENVINO_ASSERT(input_shape.is_static(), "Buffer supports only static input shape");
+        output_type = get_input_element_type(0);
+        output_shape = input_shape.get_shape();
+    } else {
+        throw ov::Exception("Buffer supports only the following types: NewMemory and IntermediateMemory");
+    }
+    set_output_type(0, output_type, output_shape);
+}
+
 std::shared_ptr<Node> snippets::op::Buffer::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(Buffer_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    auto new_buffer = std::make_shared<Buffer>(new_args.at(0), m_allocation_rank);
-    return new_buffer;
-}
-
-void snippets::op::Buffer::validate_and_infer_types() {
-    INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
-    const auto shape_rank = get_input_partial_shape(0).rank();
-    if (shape_rank.is_static()) {
-        const auto normalized_rank = normalize_rank(m_allocation_rank, shape_rank.get_length());
-        NGRAPH_CHECK(normalized_rank >= 0 && normalized_rank <= shape_rank.get_length(),
-                     "Buffer has incorrect allocation rank: " + std::to_string(m_allocation_rank));
+    if (m_type == Type::NewMemory) {
+         return std::make_shared<Buffer>(m_shape);
+    } else if (m_type == Type::IntermediateMemory) {
+        return std::make_shared<Buffer>(new_args.at(0), m_shape);
     }
-    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+    throw ov::Exception("Buffer supports only the following types: NewMemory and IntermediateMemory");
 }
 
 size_t ngraph::snippets::op::Buffer::get_byte_size() const {
-    const auto pshape = get_input_partial_shape(0);
-    NGRAPH_CHECK(pshape.is_static(), "Buffer should have static shapes for memory allocation");
-    const auto shape = pshape.get_shape();
-    const auto normalized_rank = normalize_rank(m_allocation_rank, shape.size());
-    return ngraph::shape_size(shape.rbegin(), shape.rbegin() + normalized_rank) * get_element_type().size();
+    const auto shape = get_allocation_shape();
+    return ngraph::shape_size(shape) * get_element_type().size();
 }
diff --git a/src/common/snippets/src/op/load.cpp b/src/common/snippets/src/op/load.cpp
index 8ee227c7afb69e..f1f5bc42c7a3da 100644
--- a/src/common/snippets/src/op/load.cpp
+++ b/src/common/snippets/src/op/load.cpp
@@ -12,17 +12,24 @@ namespace ngraph {
 namespace snippets {
 namespace op {
 
-Load::Load(const Output<Node>& x, const size_t count, const size_t offset) : MemoryAccess({x}, count, offset) {
+Load::Load(const Output<Node>& x, const size_t count, const size_t offset) : MemoryAccess({x}, 1, 0) {
+    set_input_port_descriptor({count, offset}, 0);
     constructor_validate_and_infer_types();
 }
 
+void snippets::op::Load::validate_and_infer_types() {
+    // Load has memory access port only on output
+    OPENVINO_ASSERT(get_input_port_count() == 1, "Load node must have memory access input port");
+    OPENVINO_ASSERT(get_output_port_count() == 0, "Load node mustn't have memory access output port");
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
 std::shared_ptr<Node> Load::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(Load);
     check_new_args_count(this, new_args);
-    return std::make_shared<Load>(new_args.at(0), m_count, m_offset);
+    return std::make_shared<Load>(new_args.at(0), get_count(), get_offset());
 }
 
-
 LoadReshape::LoadReshape(const Output<ov::Node>& x, const size_t count, const size_t offset, std::vector<size_t> order)
                             : Load(x, count, offset), m_order(std::move(order)) {
     const auto& in_shape = x.get_partial_shape();
@@ -33,6 +40,8 @@ LoadReshape::LoadReshape(const Output<ov::Node>& x, const size_t count, const si
                  *std::min_element(m_order.begin(), m_order.end()) == 0, "LoadReshape detected invalid values in new_order");
     const std::set<size_t> unique_dims(order.begin(), order.end());
     NGRAPH_CHECK(unique_dims.size() == order.size(), "LoadReshape order must not contain repeated elements");
+    m_input_ports.resize(get_input_size());
+    set_input_port_descriptor({count, offset}, 0);
     constructor_validate_and_infer_types();
 }
 
@@ -53,7 +62,7 @@ bool snippets::op::LoadReshape::visit_attributes(AttributeVisitor& visitor) {
 std::shared_ptr<Node> snippets::op::LoadReshape::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(LoadReshape);
     check_new_args_count(this, new_args);
-    return std::make_shared<LoadReshape>(new_args.at(0), m_count, m_offset, m_order);
+    return std::make_shared<LoadReshape>(new_args.at(0), get_count(), get_offset(), m_order);
 }
 
 }// namespace op
diff --git a/src/common/snippets/src/op/memory_access.cpp b/src/common/snippets/src/op/memory_access.cpp
index 2530ea77b6352b..ea0e4649f9e5de 100644
--- a/src/common/snippets/src/op/memory_access.cpp
+++ b/src/common/snippets/src/op/memory_access.cpp
@@ -3,43 +3,80 @@
 //
 
 #include <snippets/itt.hpp>
-
 #include "snippets/op/memory_access.hpp"
 
-#include <ngraph/runtime/host_tensor.hpp>
-
 namespace ngraph {
 namespace snippets {
 namespace op {
 
-MemoryAccess::MemoryAccess(const Output<Node>& x, const size_t count, const size_t offset) : Op({x}), m_count(count), m_offset(offset) {}
+MemoryAccess::MemoryAccess(const OutputVector& arguments, size_t input_count, size_t output_count) : Op(arguments) {
+    while (m_input_ports.size() < input_count) {
+        m_input_ports.push_back({0, 0, m_input_ports.size()});
+    }
+    while (m_output_ports.size() < output_count) {
+        m_output_ports.push_back({0, 0, m_output_ports.size()});
+    }
+}
 
 bool MemoryAccess::visit_attributes(AttributeVisitor& visitor) {
-    visitor.on_attribute("count", m_count);
-    visitor.on_attribute("offset", m_offset);
+    for (size_t i = 0; i < m_input_ports.size(); ++i) {
+        auto port = m_input_ports[i];
+        visitor.on_attribute("count_in_" + std::to_string(i), port.count);
+        visitor.on_attribute("offset_in_" + std::to_string(i), port.offset);
+    }
+    for (size_t i = 0; i < m_output_ports.size(); ++i) {
+        auto port = m_output_ports[i];
+        visitor.on_attribute("count_out_" + std::to_string(i), port.count);
+        visitor.on_attribute("offset_out_" + std::to_string(i), port.offset);
+    }
     return true;
 }
 
-size_t MemoryAccess::get_count() const {
-    return m_count;
+void MemoryAccess::set_input_port_descriptor(const PortDescriptor& desc, const size_t i) {
+    NGRAPH_CHECK(i < m_input_ports.size(), "Index of input port descriptor should be less than count of input ports");
+    m_input_ports[i] = { desc.count, desc.offset, i};
 }
 
-size_t MemoryAccess::get_offset() const {
-    return m_offset;
+void MemoryAccess::set_output_port_descriptor(const PortDescriptor& desc, const size_t i) {
+    NGRAPH_CHECK(i < m_output_ports.size(), "Index of output port descriptor should be less than count of output ports");
+    m_output_ports[i] = { desc.count, desc.offset, i};
 }
 
-void MemoryAccess::set_count(const size_t count) {
-    m_count = count;
+const MemoryAccess::PortDescriptor& MemoryAccess::get_input_port_descriptor(const size_t i) const {
+    NGRAPH_CHECK(i < m_input_ports.size(), "Index of input port descriptor should be less than count of input ports");
+    return m_input_ports[i];
 }
 
-void MemoryAccess::set_offset(const size_t offset) {
-    m_offset = offset;
+const MemoryAccess::PortDescriptor& MemoryAccess::get_output_port_descriptor(const size_t i) const {
+    NGRAPH_CHECK(i < m_output_ports.size(), "Index of output port descriptor should be less than count of output ports");
+    return m_output_ports[i];
 }
 
-void MemoryAccess::validate_and_infer_types() {
-    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+void  MemoryAccess::set_input_count(size_t count, size_t idx) {
+    set_input_port_descriptor({count, get_input_port_descriptor(idx).offset, idx}, idx);
+}
+void MemoryAccess::set_output_count(size_t count, size_t idx) {
+    set_output_port_descriptor({count, get_output_port_descriptor(idx).offset, idx}, idx);
+}
+void  MemoryAccess::set_input_offset(size_t offset, size_t idx) {
+    set_input_port_descriptor({get_input_port_descriptor(idx).count, offset, idx}, idx);
+}
+void MemoryAccess::set_output_offset(size_t offset, size_t idx) {
+    set_output_port_descriptor({get_output_port_descriptor(idx).count, offset, idx}, idx);
+}
+size_t MemoryAccess::get_input_count(size_t idx) const {
+    return get_input_port_descriptor(idx).count;
+}
+size_t MemoryAccess::get_output_count(size_t idx) const {
+    return get_output_port_descriptor(idx).count;
+}
+size_t MemoryAccess::get_input_offset(size_t idx) const {
+    return get_input_port_descriptor(idx).offset;
+}
+size_t MemoryAccess::get_output_offset(size_t idx) const {
+    return get_output_port_descriptor(idx).offset;
 }
 
 } // namespace op
 } // namespace snippets
-} // namespace ngraph
\ No newline at end of file
+} // namespace ngraph
diff --git a/src/common/snippets/src/op/store.cpp b/src/common/snippets/src/op/store.cpp
index 2cee1b207517c4..8ac2c4cdf1704e 100644
--- a/src/common/snippets/src/op/store.cpp
+++ b/src/common/snippets/src/op/store.cpp
@@ -12,13 +12,22 @@ namespace ngraph {
 namespace snippets {
 namespace op {
 
-snippets::op::Store::Store(const Output<Node>& x, const size_t count, const size_t offset) : MemoryAccess({x}, count, offset) {
+snippets::op::Store::Store(const Output<Node>& x, const size_t count, const size_t offset) : MemoryAccess({x}, 0, 1) {
+    set_output_port_descriptor({count, offset}, 0);
     constructor_validate_and_infer_types();
 }
+
+void snippets::op::Store::validate_and_infer_types() {
+    // Store has memory access port only on output
+    OPENVINO_ASSERT(get_input_port_count() == 0, "Store node mustn't have memory access input port");
+    OPENVINO_ASSERT(get_output_port_count() == 1, "Store node must have memory access output port");
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
 std::shared_ptr<Node> snippets::op::Store::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(Store_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<Store>(new_args.at(0), m_count, m_offset);
+    return std::make_shared<Store>(new_args.at(0), get_count(), get_offset());
 }
 
 } // namespace op
diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp
index 20b6edb17b9d14..f8953745520aff 100644
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@@ -434,22 +434,21 @@ void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
 
         // Propagate to up: in Store. Buffer can have only one Store
         {
-            auto parent = buffer->get_input_node_shared_ptr(0);
-            auto idx = buffer->input(0).get_source_output().get_index();
-            // There may be graph with several LoopBegin and LoopEnd between Store/Brgemm and Buffer,
-            // so we should iterate through LoopBase
-            while (ov::is_type<snippets::op::LoopBase>(parent)) {
-                const auto source_output = parent->input_value(idx);
-                parent = source_output.get_node_shared_ptr();
-                idx = source_output.get_index();
-            }
-            if (auto store = ov::as_type_ptr<snippets::op::Store>(parent)) {
-                store->set_offset(offset);
-            } else if (const auto brgemm = ov::as_type_ptr<snippets::op::Brgemm>(parent)) {
-                // Brgemm encapsulates work with loading and storing of data
-                brgemm->set_offset_c(offset);
-            } else {
-                throw ngraph_error("Buffer::set_offset() was called when Buffer didn't have the corresponding Store op for offset propagation");
+            if (buffer->is_intermediate_memory()) {
+                OPENVINO_ASSERT(buffer->get_input_size() == 1, "Buffer with intermediate memory must have one parent");
+                auto parent = buffer->get_input_node_shared_ptr(0);
+                auto idx = buffer->input(0).get_source_output().get_index();
+                while (ov::is_type<snippets::op::LoopBase>(parent)) {
+                    const auto source_output = parent->input_value(idx);
+                    parent = source_output.get_node_shared_ptr();
+                    idx = source_output.get_index();
+                }
+                if (auto memory_access = ov::as_type_ptr<ngraph::snippets::op::MemoryAccess>(parent)) {
+                    memory_access->set_output_offset(offset, idx);
+                } else {
+                    throw ngraph_error(
+                            "Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
+                }
             }
         }
 
@@ -466,17 +465,10 @@ void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
                     for (const auto loop_target_output : child->output(index).get_target_inputs()) {
                         propagate_down(loop_target_output);
                     }
-                } else if (const auto load = ov::as_type_ptr<snippets::op::Load>(child)) {
-                    load->set_offset(offset);
-                } else if (const auto brgemm = ov::as_type_ptr<snippets::op::Brgemm>(child)) {
-                    // Brgemm encapsulates work with loading and storing of data
-                    if (target_input.get_index() == 0) {
-                        brgemm->set_offset_a(offset);
-                    } else if (target_input.get_index() == 1) {
-                        brgemm->set_offset_b(offset);
-                    }
+                } else if (auto memory_access = ov::as_type_ptr<ngraph::snippets::op::MemoryAccess>(child)) {
+                    memory_access->set_input_offset(offset, target_input.get_index());
                 } else {
-                    throw ngraph_error("Buffer::set_offset() was called when Buffer didn't have the corresponding Load op for offset propagation");
+                    throw ngraph_error("Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
                 }
             };
 
@@ -497,26 +489,25 @@ void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
                 continue;
             }
 
-            // Transpose and MatMul ops should have different memories on inputs and outputs to avoid data corruption,
-            // so after them, we should allocate new memory. Other operations (Eltwises, Convert) can be executed inplace.
-            const auto parent = buffer->get_input_node_shared_ptr(0);
-            if (ov::is_type<op::Brgemm>(parent) || is_transpose_loop(parent)) {
+            if (buffer->is_intermediate_memory()) {
+                // Transpose, MatMul and other non-decomposed ops should have different memories on inputs and outputs to avoid data corruption,
+                // so after them, we should allocate new memory. Other operations (Eltwises, Convert) can be executed inplace inside Loop.
+                OPENVINO_ASSERT(buffer->get_input_size() == 1, "Buffer with intermediate memory must have one parent");
+                const auto parent = buffer->get_input_node_shared_ptr(0);
+                if (!ov::is_type<LoopEnd>(parent) || is_transpose_loop(parent)) {
+                    offset = m_buffer_scratchpad;
+                    propagate_offset(buffer, offset);
+                    m_buffer_scratchpad += buffer_size;
+                    continue;
+                }
+
+                propagate_offset(buffer, offset);
+            } else {
+                // Single Buffer without input should allocate new memory
                 offset = m_buffer_scratchpad;
                 propagate_offset(buffer, offset);
                 m_buffer_scratchpad += buffer_size;
-                continue;
             }
-
-            // If Buffer op requires memory size more that has been already allocated,
-            // we increase current memory size to the needed size
-            // For example, it's possible when we have a sequence of Eltwise ops with broadcasting
-            const auto current_allocated_memory_size = m_buffer_scratchpad - offset;
-            if (buffer_size > current_allocated_memory_size) {
-                m_buffer_scratchpad += (buffer_size - current_allocated_memory_size);
-                // Note: we don't update offset because we just add memory to needed size
-            }
-
-            propagate_offset(buffer, offset);
         }
     }
 }
@@ -644,7 +635,10 @@ snippets::Schedule snippets::op::Subgraph::generate(
     if (config.m_has_domain_sensitive_ops)
         initialize_buffer_scratchpad_size();
 
-    snippets::pass::AssignRegisters().run_on_model(body_ptr());
+    std::function<Generator::opRegType(const std::shared_ptr<Node>& op)> reg_type_mapper = [=](const std::shared_ptr<Node>& op) -> Generator::opRegType {
+        return m_generator->get_op_reg_type(op);
+    };
+    snippets::pass::AssignRegisters(reg_type_mapper).run_on_model(body_ptr());
 
     const auto ops = body_ptr()->get_ops();
     ngraph::snippets::Generator::GeneratorConfig generatorConfig;
diff --git a/src/common/snippets/src/pass/assign_registers.cpp b/src/common/snippets/src/pass/assign_registers.cpp
index 3de3138db60767..c9af20443b8938 100644
--- a/src/common/snippets/src/pass/assign_registers.cpp
+++ b/src/common/snippets/src/pass/assign_registers.cpp
@@ -14,6 +14,7 @@
 
 namespace {
 constexpr size_t reg_count = 16lu;
+using opRegType = ngraph::snippets::Generator::opRegType;
 }  // namespace
 
 bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr<ov::Model>& f) {
@@ -22,31 +23,12 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
     using Reg = size_t;
     using tensor = std::shared_ptr<descriptor::Tensor>;
     auto ops = f->get_ordered_ops();
-    // Note that currently there are 3 types of ops:
-    //  * gpr->gpr: (Parameter, Result, LoopBegin, LoopEnd) will also be Buffer?
-    //  * gpr->vec: or vec->gpr Load/LoadConvert, Store/StoreConvert, BroadcastLoad etc.
-    //  * vec->vec: all other "normal" operations that perform calculations on vector registers: Add, BroadcastMove, Power, etc.
-    enum op_reg_type {gpr2gpr, gpr2vec, vec2gpr, vec2vec};
 
-    auto get_op_reg_type = [](const std::shared_ptr<Node>& op) {
-        if (std::dynamic_pointer_cast<opset1::Parameter>(op) ||
-                std::dynamic_pointer_cast<opset1::Result>(op) ||
-                std::dynamic_pointer_cast<op::LoopBegin>(op) ||
-                std::dynamic_pointer_cast<op::LoopEnd>(op) ||
-                std::dynamic_pointer_cast<op::Brgemm>(op) ||
-                std::dynamic_pointer_cast<op::Buffer>(op))
-            return gpr2gpr;
-        else if (std::dynamic_pointer_cast<snippets::op::Load>(op) ||
-                 std::dynamic_pointer_cast<snippets::op::BroadcastLoad>(op))
-            return gpr2vec;
-        else if (std::dynamic_pointer_cast<snippets::op::Store>(op))
-            return vec2gpr;
-        else
-            return vec2vec;
-    };
-    std::vector<std::pair<op_reg_type, std::shared_ptr<Node>>> typed_ops;
-    for (const auto& op : ops)
-        typed_ops.emplace_back(std::make_pair(get_op_reg_type(op), op));
+    std::vector<std::pair<opRegType, std::shared_ptr<Node>>> typed_ops;
+    for (const auto& op : ops) {
+        typed_ops.emplace_back(std::make_pair(m_reg_type_mapper(op), op));
+    }
+
     size_t counter_vec = 0;
     size_t counter_gpr = 0;
     std::map<tensor, Reg> regs_vec, regs_gpr;
@@ -64,10 +46,12 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
             // here we use the fact that Result input & output tensors are identical by construction
             manually_assigned_gprs[op->output(0).get_tensor_ptr()] =
                     static_cast<Reg>(f->get_result_index(result) + num_parameters);
-        } else if (const auto& buffer = ov::as_type_ptr<op::Buffer>(op)) {
+        } else if (const auto buffer = ov::as_type_ptr<op::Buffer>(op)) {
             // All buffers have one common data pointer
-            manually_assigned_gprs[op->input(0).get_tensor_ptr()] =
-                    static_cast<Reg>(num_results + num_parameters);
+            if (buffer->is_intermediate_memory()) {
+                manually_assigned_gprs[op->input(0).get_tensor_ptr()] =
+                        static_cast<Reg>(num_results + num_parameters);
+            }
             manually_assigned_gprs[op->output(0).get_tensor_ptr()] =
                     static_cast<Reg>(num_results + num_parameters);
         } else if (ov::is_type<op::HorizonMax>(op) || ov::is_type<op::HorizonSum>(op)) {
@@ -114,12 +98,12 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
     };
     for (const auto& t_op : typed_ops) {
         switch (t_op.first) {
-            case vec2vec:
-            case gpr2vec:
+            case opRegType::vec2vec:
+            case opRegType::gpr2vec:
                 enumerate_out_tensors(t_op.second, regs_vec, manually_assigned_vecs, counter_vec);
                 break;
-            case gpr2gpr:
-            case vec2gpr:
+            case opRegType::gpr2gpr:
+            case opRegType::vec2gpr:
                 enumerate_out_tensors(t_op.second, regs_gpr, manually_assigned_gprs, counter_gpr);
                 break;
         }
@@ -144,24 +128,25 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
     for (size_t i = 0; i < typed_ops.size(); i++) {
         const auto& t_op = typed_ops[i];
         std::vector<tensor> used_tensors, defined_tensors;
-        for (const auto& in : t_op.second->inputs())
+        for (const auto& in : t_op.second->inputs()) {
             used_tensors.push_back(in.get_tensor_ptr());
+        }
         for (const auto& out : t_op.second->outputs())
             defined_tensors.push_back(out.get_tensor_ptr());
         switch (t_op.first) {
-            case vec2vec:
+            case opRegType::vec2vec:
                 used_vec[i] = tensor2reg(used_tensors, regs_vec);
                 defined_vec[i] = tensor2reg(defined_tensors, regs_vec);
                 break;
-            case gpr2gpr:
+            case opRegType::gpr2gpr:
                 used_gpr[i] = tensor2reg(used_tensors, regs_gpr);
                 defined_gpr[i] = tensor2reg(defined_tensors, regs_gpr);
                 break;
-            case gpr2vec:
+            case opRegType::gpr2vec:
                 used_gpr[i] = tensor2reg(used_tensors, regs_gpr);
                 defined_vec[i] = tensor2reg(defined_tensors, regs_vec);
                 break;
-            case vec2gpr:
+            case opRegType::vec2gpr:
                 used_vec[i] = tensor2reg(used_tensors, regs_vec);
                 defined_gpr[i] = tensor2reg(defined_tensors, regs_gpr);
                 break;
@@ -196,12 +181,12 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
                     if (k == ops.size())
                         throw ngraph_error("assign registers can't find target op in the body");
                     switch (typed_ops[k].first) {
-                        case vec2vec:
-                        case vec2gpr:
+                        case opRegType::vec2vec:
+                        case opRegType::vec2gpr:
                             life_out_vec[n].insert(life_in_vec[k].begin(), life_in_vec[k].end());
                             break;
-                        case gpr2gpr:
-                        case gpr2vec:
+                        case opRegType::gpr2gpr:
+                        case opRegType::gpr2vec:
                             life_out_gpr[n].insert(life_in_gpr[k].begin(), life_in_gpr[k].end());
                             break;
                     }
diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp
index 3325881834fd88..af962adaa64432 100644
--- a/src/common/snippets/src/pass/collapse_subgraph.cpp
+++ b/src/common/snippets/src/pass/collapse_subgraph.cpp
@@ -49,9 +49,16 @@ auto outputs_are_not_broadcastable(const std::shared_ptr<const Node>& node) -> b
 auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
     OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::is_supported_op")
     auto is_supported_matmul = [](const std::shared_ptr<const Node>& n) -> bool {
-        const auto& matmul = is_type<const opset1::MatMul>(n);
+        const auto& matmul = ov::as_type_ptr<const opset1::MatMul>(n);
         const auto& out_shape = n->get_output_partial_shape(0);
-        return matmul && out_shape.is_static() && out_shape.size() == 4;
+        if (!matmul || out_shape.is_dynamic() || out_shape.size() != 4)
+            return false;
+        const auto intype_0 = matmul->get_input_element_type(0);
+        const auto intype_1 = matmul->get_input_element_type(1);
+        const bool is_f32 = intype_0 == element::f32 && intype_1 == element::f32;
+        const bool is_int8 = (intype_0 == element::i8 || intype_0 == element::u8) && (intype_1 == element::i8);
+        const bool is_bf16 = intype_0 == element::bf16 && intype_1 == element::bf16;
+        return is_f32 || is_bf16 || is_int8;
     };
     auto is_supported_transpose = [](const std::shared_ptr<const Node>& n) -> bool {
         const auto& transpose = as_type_ptr<const opset1::Transpose>(n);
diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp
index f50731bcf7cd51..62dd1292b3ffce 100644
--- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp
+++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp
@@ -49,13 +49,8 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() {
 
     auto callback = [=](pattern::Matcher& m) {
         OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "FuseTransposeBrgemm")
-        auto set_layout_from_order = [](const std::shared_ptr<opset1::Transpose>& node, const ov::Output<Node>& port) {
-            const auto& const_order = as_type_ptr<opset1::Constant>(node->get_input_node_shared_ptr(1));
-            std::vector<size_t> layout = const_order->cast_vector<size_t>();
-            auto& rt_info = port.get_node_shared_ptr()->get_rt_info();
-            rt_info["Layout"] = layout;
-        };
         auto brgemm = as_type_ptr<op::Brgemm>(m.get_match_root());
+
         // Transpose on the Brgemm's output
         if (!brgemm) {
             brgemm = as_type_ptr<op::Brgemm>(m.get_match_root()->get_input_node_shared_ptr(0));
@@ -63,13 +58,13 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() {
             const auto& transpose_out = m.get_match_value();
             for (const auto& in : transpose_out.get_target_inputs())
                 in.replace_source_output(brgemm->output(0));
-            set_layout_from_order(as_type_ptr<opset1::Transpose>(transpose_out.get_node_shared_ptr()), brgemm_out);
+            utils::set_transpose_output_layout(brgemm_out, as_type_ptr<opset1::Transpose>(transpose_out.get_node_shared_ptr()));
         }
         for (size_t i = 0; i < brgemm->get_input_size(); i++) {
             const auto& in_value = brgemm->input_value(i);
             if (transpose_matcher->match(in_value)) {
                 const auto& transpose = as_type_ptr<opset1::Transpose>(in_value.get_node_shared_ptr());
-                set_layout_from_order(transpose, transpose->input_value(0));
+                utils::set_transpose_output_layout(transpose->input_value(0), transpose);
                 brgemm->set_argument(i, transpose->input_value(0));
             }
         }
diff --git a/src/common/snippets/src/pass/insert_buffer.cpp b/src/common/snippets/src/pass/insert_buffer.cpp
index 1b080bc0b0c041..e7f4c90ae028ed 100644
--- a/src/common/snippets/src/pass/insert_buffer.cpp
+++ b/src/common/snippets/src/pass/insert_buffer.cpp
@@ -31,7 +31,7 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
                 if (!ov::is_type<ngraph::snippets::op::Buffer>(input_node) &&
                     !ov::is_type<ngraph::op::v0::Parameter>(input_node) &&
                     !ov::is_type<ngraph::op::v0::Constant>(input_node)) {
-                    const auto buffer = std::make_shared<ngraph::snippets::op::Buffer>(input_node, allocation_rank);
+                    const auto buffer = std::make_shared<op::Buffer>(input_node, allocation_rank);
                     root->set_argument(input.get_index(), buffer);
                     rewritten |= true;
                 }
@@ -68,7 +68,7 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
                     }
                 }
 
-                const auto buffer = std::make_shared<ngraph::snippets::op::Buffer>(output, allocation_rank);
+                const auto buffer = std::make_shared<op::Buffer>(output, allocation_rank);
                 for (const auto& consumer : output.get_target_inputs()) {
                     const auto output_node = consumer.get_node()->shared_from_this();
                     if (output_node != buffer &&
diff --git a/src/common/snippets/src/pass/insert_load_store.cpp b/src/common/snippets/src/pass/insert_load_store.cpp
index ef0fed11b50574..114393bd872f96 100644
--- a/src/common/snippets/src/pass/insert_load_store.cpp
+++ b/src/common/snippets/src/pass/insert_load_store.cpp
@@ -30,7 +30,7 @@ ngraph::snippets::pass::InsertLoad::InsertLoad(const size_t count) {
                     const auto& consumer_node = consumer.get_node();
                     if (ov::is_type<ngraph::snippets::op::Load>(consumer_node) ||
                         ov::is_type<ngraph::snippets::op::LoopBegin>(consumer_node) ||
-                        ov::is_type<ngraph::op::v0::MatMul>(consumer_node) ||
+                        ov::is_type<ngraph::snippets::op::Brgemm>(consumer_node) ||
                         ov::is_type<ngraph::op::v1::Transpose>(consumer_node)) {
                         return false;
                     }
@@ -67,7 +67,7 @@ ngraph::snippets::pass::InsertStore::InsertStore(const size_t count) {
                 const auto& parent_node = input.get_source_output().get_node();
                 if (ov::is_type<ngraph::snippets::op::Store>(parent_node) ||
                     ov::is_type<ngraph::snippets::op::LoopEnd>(parent_node) ||
-                    ov::is_type<ngraph::op::v0::MatMul>(parent_node)  ||
+                    ov::is_type<ngraph::snippets::op::Brgemm>(parent_node)  ||
                     ov::is_type<ngraph::op::v1::Transpose>(parent_node)) {
                     return false;
                 }
diff --git a/src/common/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
index b4fdb2506dc008..7aa69d65bbde28 100644
--- a/src/common/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
+++ b/src/common/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
@@ -24,20 +24,20 @@ ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBro
             auto root = m.get_match_root();
 
             const auto &pm = m.get_pattern_value_map();
-            const auto input = pm.at(load_pattern).get_node_shared_ptr();
+            const auto load =  ov::as_type_ptr<snippets::op::Load>(pm.at(load_pattern).get_node_shared_ptr());
             const auto param = pm.at(param_pattern).get_node_shared_ptr();
 
             // Cannot rewrite Broadcast + Load if load has more than 1 user
             // or more than one input, or if Broadcast has several inputs
-            if (input->output(0).get_target_inputs().size() != 1 ||
-                root->inputs().size() != 1 || input->inputs().size() != 1) {
+            if (load->output(0).get_target_inputs().size() != 1 ||
+                root->inputs().size() != 1 || load->inputs().size() != 1) {
                 return false;
             }
 
             auto inshape = root->input(0).get_partial_shape();
             auto outshape = root->output(0).get_partial_shape();
 
-            auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(param, outshape, ov::as_type_ptr<snippets::op::Load>(input)->get_offset());
+            auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(param, outshape, load->get_offset());
             ngraph::copy_runtime_info(root, broadcastload);
             ngraph::replace_node(root, broadcastload);
 
diff --git a/src/common/snippets/src/pass/loop_fusion.cpp b/src/common/snippets/src/pass/loop_fusion.cpp
index 18287d4464f40a..2291e0746075d9 100644
--- a/src/common/snippets/src/pass/loop_fusion.cpp
+++ b/src/common/snippets/src/pass/loop_fusion.cpp
@@ -73,7 +73,6 @@ auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBeg
         buffer = ov::as_type_ptr<ngraph::snippets::op::Buffer>(parent_shared);
         if (buffer) {
             if (buffer->output(0).get_target_inputs().size() == 0 ||
-                buffer->get_input_size() != 1 ||
                 buffer->get_input_source_output(0).get_target_inputs().size() != 1)
                 return false;
 
diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp
index b74fb3e68cc47e..add672b0fef3ea 100644
--- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp
+++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp
@@ -6,7 +6,7 @@
 
 #include "snippets/pass/matmul_to_brgemm.hpp"
 
-#include "snippets/op/brgemm.hpp"
+#include "snippets/snippets_isa.hpp"
 
 #include "ngraph/opsets/opset1.hpp"
 #include "ngraph/rt_info.hpp"
@@ -30,9 +30,13 @@ MatMulToBrgemm::MatMulToBrgemm() {
             return false;
 
         auto brgemm = std::make_shared<op::Brgemm>(matmul->get_input_source_output(0), matmul->get_input_source_output(1));
+        ov::NodeVector nodes = { brgemm };
+        if (brgemm->get_output_element_type(0) != matmul->get_output_element_type(0)) {
+            nodes.emplace_back(std::make_shared<op::ConvertSaturation>(brgemm, matmul->get_output_element_type(0)));
+        }
         brgemm->set_friendly_name(matmul->get_friendly_name());
-        ngraph::copy_runtime_info(matmul, brgemm);
-        ngraph::replace_node(matmul, brgemm);
+        ngraph::copy_runtime_info(matmul, nodes);
+        ngraph::replace_node(matmul, nodes.back());
         return true;
     };
 
diff --git a/src/common/snippets/src/pass/reset_buffer.cpp b/src/common/snippets/src/pass/reset_buffer.cpp
index bae2ac58ccdb15..54bdfef03f7f13 100644
--- a/src/common/snippets/src/pass/reset_buffer.cpp
+++ b/src/common/snippets/src/pass/reset_buffer.cpp
@@ -79,10 +79,9 @@ ngraph::snippets::pass::ResetBufferState::ResetBufferState() {
 
         // If after Loop there is immediately Buffer, we should reset the Buffer ptr for the next calculations
         for (size_t i = 0; i < o_size; ++i) {
-            const auto result_shape = body_shapes[i_size + i].get_shape();
             // check for first target input is enough for Buffer searching because operations can have only single Buffer per each output port as op
             const auto consumer = loop_end->output(i).get_target_inputs().begin()->get_node();
-            if (ov::is_type<ngraph::snippets::op::Buffer>(consumer)) {
+            if (const auto buffer = ov::as_type_ptr<ngraph::snippets::op::Buffer>(consumer->shared_from_this())) {
                 // To calculate finalization offset we should know index of nesting Loop
                 auto loop_index = 0lu;
                 auto loop = loop_end->input_value(i).get_node_shared_ptr();
@@ -93,7 +92,8 @@ ngraph::snippets::pass::ResetBufferState::ResetBufferState() {
                     port_idx = source_output.get_index();
                     loop_index++;
                 }
-
+                const auto result_shape = buffer->get_allocation_shape();
+                NGRAPH_CHECK(loop_index < result_shape.size(), "Buffer has invalid Loop index and allocation shape rank");
                 const auto work_amount = std::accumulate(result_shape.rbegin(), result_shape.rbegin() + loop_index + 1, size_t(1), std::multiplies<size_t>());
                 finalization_offsets[i_size + i] =
                         calculate_required_finalization_offsets(work_amount, *(result_shape.rbegin() + loop_index));
diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp
index 8c1c79a4b544db..a0259a4061b41e 100644
--- a/src/common/snippets/src/pass/softmax_decomposition.cpp
+++ b/src/common/snippets/src/pass/softmax_decomposition.cpp
@@ -126,7 +126,7 @@ ngraph::snippets::pass::SoftmaxDecomposition::SoftmaxDecomposition(const size_t
             apply_increments_sum, finalization_offsets_sum);
 
         const auto horizon_sum = std::make_shared<ngraph::snippets::op::HorizonSum>(sum);
-        const auto buffer_exp = std::make_shared<ngraph::snippets::op::Buffer>(loop_sum_end->output(0), buffer_allocation_rank);
+        const auto buffer_exp = std::make_shared<op::Buffer>(loop_sum_end->output(0), buffer_allocation_rank);
 
         /* =========================================== */
 
diff --git a/src/common/snippets/src/pass/vector_to_scalar.cpp b/src/common/snippets/src/pass/vector_to_scalar.cpp
index 512a0731062cf7..4f98a49de4eedd 100644
--- a/src/common/snippets/src/pass/vector_to_scalar.cpp
+++ b/src/common/snippets/src/pass/vector_to_scalar.cpp
@@ -24,7 +24,7 @@ ngraph::snippets::pass::SetScalarCountForLoad::SetScalarCountForLoad() {
             if (!load)
                 return false;
 
-            load->set_count(1lu);
+            load->set_input_count(1lu, 0);
             return true;
         });
 }
@@ -43,7 +43,7 @@ ngraph::snippets::pass::SetScalarCountForStore::SetScalarCountForStore() {
             if (!store)
                 return false;
 
-            store->set_count(1lu);
+            store->set_output_count(1lu, 0);
             return true;
         });
 }
diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp
index 3018d99e95f6f1..6587ff93fa69d2 100644
--- a/src/common/snippets/src/utils.cpp
+++ b/src/common/snippets/src/utils.cpp
@@ -115,6 +115,17 @@ ov::PartialShape get_port_planar_shape(const Output<Node>& out) {
     return get_reordered_planar_shape(tensor_shape, layout);
 }
 
+void set_transpose_output_layout(const ov::Output<Node>& port, const std::shared_ptr<opset1::Transpose>& node) {
+    const auto& const_order = as_type_ptr<opset1::Constant>(node->get_input_node_shared_ptr(1));
+    OPENVINO_ASSERT(const_order != nullptr, "Transpose order must be Constant to set layout!");
+    set_output_layout(port, const_order->cast_vector<size_t>());
+}
+
+void set_output_layout(const ov::Output<Node>& port, const std::vector<size_t>& layout) {
+    auto& rt_info = port.get_node_shared_ptr()->get_rt_info();
+    rt_info["Layout"] = layout;
+}
+
 } // namespace utils
 } // namespace snippets
 } // namespace ngraph
diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp
index b0b1bafb245308..7dfa71a4b6a7f7 100644
--- a/src/common/snippets/tests/include/lowering_utils.hpp
+++ b/src/common/snippets/tests/include/lowering_utils.hpp
@@ -36,6 +36,9 @@ class DummyGenerator : public ngraph::snippets::Generator {
 public:
     DummyGenerator() : ngraph::snippets::Generator(std::make_shared<DummyTargetMachine>()) {}
     DummyGenerator(const std::shared_ptr<ngraph::snippets::TargetMachine>& t) : ngraph::snippets::Generator(t) {}
+
+protected:
+    opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const override { return vec2vec; };
 };
 
 class LoweringTests : public TransformationTestsF {
diff --git a/src/common/snippets/tests/src/pass/set_scalar_count_for_load_and_store.cpp b/src/common/snippets/tests/src/pass/set_scalar_count_for_load_and_store.cpp
index 9bb45a81fce7ff..50448be3a5c38f 100644
--- a/src/common/snippets/tests/src/pass/set_scalar_count_for_load_and_store.cpp
+++ b/src/common/snippets/tests/src/pass/set_scalar_count_for_load_and_store.cpp
@@ -19,18 +19,20 @@ using namespace ngraph;
 
 //  todo: Rewrite this test using Snippets test infrastructure. See ./include/canonicalization.hpp for example
 
-template<typename T>
-size_t get_count(const std::shared_ptr<Function>& f, const std::string& name) {
-    size_t load_count = std::numeric_limits<size_t>::max();
+size_t get_count(const std::shared_ptr<Function>& f, const std::string& name, bool is_load = true) {
+    size_t count = std::numeric_limits<size_t>::max();
     for (auto op : f->get_ops()) {
         if (op->get_friendly_name() == name) {
-            load_count = ov::as_type_ptr<T>(op)->get_count();
+            if (const auto memory_access = std::dynamic_pointer_cast<snippets::op::MemoryAccess>(op)) {
+                count = is_load ? memory_access->get_input_offset(0)
+                                : memory_access->get_output_offset(0);
+            }
         }
     }
-    return load_count;
+    return count;
 }
 
-TEST(TransformationTests, SetScalarCountForLoad) {
+TEST(TransformationTests, SetScalarCountForLoadStore) {
     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
     const auto count = 16;
     {
@@ -39,11 +41,13 @@ TEST(TransformationTests, SetScalarCountForLoad) {
         load->set_friendly_name("load");
         auto neg = std::make_shared<opset1::Negative>(load);
         auto store = std::make_shared<snippets::isa::Store>(neg, count);
+        store->set_friendly_name("store");
         f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
 
         pass::Manager m;
         m.register_pass<ov::pass::InitNodeInfo>();
         m.register_pass<snippets::pass::SetScalarCountForLoad>();
+        m.register_pass<snippets::pass::SetScalarCountForStore>();
         m.run_passes(f);
         ASSERT_NO_THROW(check_rt_info(f));
     }
@@ -52,39 +56,6 @@ TEST(TransformationTests, SetScalarCountForLoad) {
         auto load = std::make_shared<snippets::isa::Load>(data, 1lu);
         load->set_friendly_name("load_ref");
         auto neg = std::make_shared<opset1::Negative>(load);
-        auto store = std::make_shared<snippets::isa::Store>(neg, count);
-        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
-    }
-
-    auto res = compare_functions(f, f_ref);
-    ASSERT_TRUE(res.first) << res.second;
-
-    auto load_count = get_count<ngraph::snippets::op::Load>(f, "load");
-    auto load_count_ref = get_count<ngraph::snippets::op::Load>(f_ref, "load_ref");
-    ASSERT_EQ(load_count, load_count_ref);
-}
-
-TEST(TransformationTests, SetScalarCountForStore) {
-    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-    const auto count = 16;
-    {
-        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
-        auto load = std::make_shared<snippets::isa::Load>(data, count);
-        auto neg = std::make_shared<opset1::Negative>(load);
-        auto store = std::make_shared<snippets::isa::Store>(neg, count);
-        store->set_friendly_name("store");
-        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
-
-        pass::Manager m;
-        m.register_pass<ov::pass::InitNodeInfo>();
-        m.register_pass<snippets::pass::SetScalarCountForStore>();
-        m.run_passes(f);
-        ASSERT_NO_THROW(check_rt_info(f));
-    }
-    {
-        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
-        auto load = std::make_shared<snippets::isa::Load>(data, count);
-        auto neg = std::make_shared<opset1::Negative>(load);
         auto store = std::make_shared<snippets::isa::Store>(neg, 1lu);
         store->set_friendly_name("store_ref");
         f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
@@ -93,7 +64,11 @@ TEST(TransformationTests, SetScalarCountForStore) {
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
 
-    int64_t store_count = get_count<ngraph::snippets::op::Store>(f, "store");
-    int64_t store_count_ref = get_count<ngraph::snippets::op::Store>(f_ref, "store_ref");
+    auto load_count = get_count(f, "load");
+    auto load_count_ref = get_count(f_ref, "load_ref");
+    ASSERT_EQ(load_count, load_count_ref);
+
+    auto store_count = get_count(f, "store", false);
+    auto store_count_ref = get_count(f_ref, "store_ref", false);
     ASSERT_EQ(store_count, store_count_ref);
 }
diff --git a/src/common/snippets/tests/src/registers.cpp b/src/common/snippets/tests/src/registers.cpp
index 531190e6048a9e..e9d7c503802142 100644
--- a/src/common/snippets/tests/src/registers.cpp
+++ b/src/common/snippets/tests/src/registers.cpp
@@ -13,6 +13,7 @@
 #include <transformations/init_node_info.hpp>
 
 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "lowering_utils.hpp"
 
 using namespace testing;
 using namespace ngraph;
@@ -20,6 +21,7 @@ using namespace ngraph;
 //  todo: Rewrite this test using Snippets test infrastructure. See ./include/canonicalization.hpp for example
 
 TEST(TransformationTests, AssignRegisters) {
+    const auto generator = std::make_shared<ov::test::snippets::DummyGenerator>();
     std::shared_ptr<Function> f(nullptr);
     {
         auto p0 = std::make_shared<opset1::Parameter>(element::f32, Shape(1));
@@ -37,7 +39,12 @@ TEST(TransformationTests, AssignRegisters) {
 
         pass::Manager m;
         m.register_pass<ov::pass::InitNodeInfo>();
-        m.register_pass<snippets::pass::AssignRegisters>();
+        std::function<snippets::Generator::opRegType(const std::shared_ptr<Node>& op)> reg_type_mapper =
+            [=](const std::shared_ptr<Node>& op) -> snippets::Generator::opRegType {
+            return generator->get_op_reg_type(op);
+        };
+        m.register_pass<snippets::pass::AssignRegisters>(reg_type_mapper);
+
         m.run_passes(f);
         ASSERT_NO_THROW(check_rt_info(f));
     }
@@ -73,6 +80,7 @@ TEST(TransformationTests, AssignRegisters) {
 }
 
 TEST(TransformationTests, AssignRegisters2) {
+    const auto generator = std::make_shared<ov::test::snippets::DummyGenerator>();
     std::shared_ptr<Function> f(nullptr);
     {
         auto p0 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
@@ -126,7 +134,11 @@ TEST(TransformationTests, AssignRegisters2) {
 
         pass::Manager m;
         m.register_pass<ov::pass::InitNodeInfo>();
-        m.register_pass<snippets::pass::AssignRegisters>();
+        std::function<snippets::Generator::opRegType(const std::shared_ptr<Node>& op)> reg_type_mapper =
+            [=](const std::shared_ptr<Node>& op) -> snippets::Generator::opRegType {
+            return generator->get_op_reg_type(op);
+        };
+        m.register_pass<snippets::pass::AssignRegisters>(reg_type_mapper);
         m.run_passes(f);
         ASSERT_NO_THROW(check_rt_info(f));
     }
diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
index 8c2e666d6b6438..3841a768d422d0 100644
--- a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
+++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp
@@ -18,7 +18,8 @@
 #include "snippets_transformations/op/load_convert.hpp"
 #include "snippets_transformations/op/store_convert.hpp"
 #include "snippets_transformations/op/fused_mul_add.hpp"
-#include "snippets/op/brgemm.hpp"
+#include "snippets_transformations/op/brgemm_copy_b.hpp"
+#include "snippets_transformations/op/brgemm_cpu.hpp"
 #include "ngraph_transformations/op/swish_cpu.hpp"
 
 #include <ngraph/opsets/opset5.hpp>
@@ -144,7 +145,8 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_
     jitters[ngraph::snippets::op::Kernel::get_type_info_static()] = CREATE_EMITTER(KernelEmitter);
     jitters[ngraph::snippets::op::LoopBegin::get_type_info_static()] = CREATE_EMITTER(LoopBeginEmitter);
     jitters[ngraph::snippets::op::LoopEnd::get_type_info_static()] = CREATE_EMITTER(LoopEndEmitter);
-    jitters[ngraph::snippets::op::Brgemm::get_type_info_static()] = CREATE_EMITTER(BrgemmEmitter);
+    jitters[ov::intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_EMITTER(BrgemmEmitter);
+    jitters[ov::intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_EMITTER(BrgemmCopyBEmitter);
 }
 
 size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const {
@@ -169,3 +171,15 @@ code ov::intel_cpu::CPUTargetMachine::get_snippet() const {
 
 ov::intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared<CPUTargetMachine>(isa_)) {
 }
+
+ngraph::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const {
+    if (std::dynamic_pointer_cast<ov::intel_cpu::BrgemmCPU>(op) ||
+        std::dynamic_pointer_cast<ov::intel_cpu::BrgemmCopyB>(op))
+        return gpr2gpr;
+    else if (
+        std::dynamic_pointer_cast<ov::intel_cpu::FusedMulAdd>(op) ||
+        std::dynamic_pointer_cast<ov::intel_cpu::SwishNode>(op))
+        return vec2vec;
+    else
+        throw ov::Exception("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!");
+}
diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp
index 7301fcb177b93f..b624d2c0b093bf 100644
--- a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp
+++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp
@@ -28,6 +28,9 @@ class CPUTargetMachine : public ngraph::snippets::TargetMachine {
 class CPUGenerator : public ngraph::snippets::Generator {
 public:
     CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa);
+
+protected:
+    opRegType get_specific_op_reg_type(const std::shared_ptr<ov::Node>& op) const override;
 };
 
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
index 4f63dd641f6295..338cb62dcec39b 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
@@ -6,9 +6,10 @@
 #include <cpu/x64/jit_generator.hpp>
 
 #include "jit_snippets_emitters.hpp"
-#include "snippets/op/brgemm.hpp"
 #include "snippets/op/subgraph.hpp"
 #include "snippets/utils.hpp"
+#include "snippets_transformations/op/brgemm_copy_b.hpp"
+#include "snippets_transformations/op/brgemm_cpu.hpp"
 
 using namespace InferenceEngine;
 using ngraph::snippets::op::Subgraph;
@@ -20,6 +21,10 @@ using namespace dnnl::impl::cpu::x64;
 namespace ov {
 namespace intel_cpu {
 
+namespace {
+constexpr size_t gpr_size = 8;
+} // namespace
+
 inline static void transform_idxs_to_regs(const std::vector<size_t>& idxs, std::vector<Reg64>& regs) {
     regs.resize(idxs.size());
     std::transform(idxs.begin(), idxs.end(), regs.begin(), [](size_t idx){return Reg64(static_cast<int>(idx));});
@@ -68,7 +73,8 @@ void jit_container_emitter::map_abstract_registers(mapping_info& gpr_map_pool,
                 //  where all utility emitters align with conventional Op emitters
                 if (std::dynamic_pointer_cast<LoopBeginEmitter>(emitter) ||
                     std::dynamic_pointer_cast<LoopEndEmitter>(emitter) ||
-                    std::dynamic_pointer_cast<BrgemmEmitter>(emitter))
+                    std::dynamic_pointer_cast<BrgemmEmitter>(emitter) ||
+                    std::dynamic_pointer_cast<BrgemmCopyBEmitter>(emitter))
                     in_physical_regs = map_regs(in_abstract_regs, gpr_map_pool);
                 else
                     in_physical_regs = std::move(in_abstract_regs);
@@ -182,7 +188,8 @@ KernelEmitter::KernelEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
                                    // todo: how this will be handled if Brgemm in & out are op::Buffer
                                    // Brgemm is a special case since it incorporates input and output (we use onednn kernel)
                                    // Just like Load & Store it requires offsets calculation
-                                   const auto is_brgemm = std::dynamic_pointer_cast<BrgemmEmitter>(emitter) != nullptr;
+                                   const auto is_brgemm = std::dynamic_pointer_cast<BrgemmEmitter>(emitter) ||
+                                                          std::dynamic_pointer_cast<BrgemmCopyBEmitter>(emitter);
                                    return emitter_type == gpr_to_vec || emitter_type == vec_to_gpr || is_brgemm;
                            });
     // Note that we can't use reg_indexes_idx or reg_const_params_idx to store data pointers because these two
@@ -567,9 +574,6 @@ LoadEmitter::LoadEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu
         IE_THROW() << "LoadEmitter supports only equal input and output types but gets: " << src_prc.name() << " and " << dst_prc.name();
 
     const auto load = std::dynamic_pointer_cast<ngraph::snippets::op::Load>(n);
-    if (!load)
-        IE_THROW() << "LoadEmitter expects Load snippets op";
-
     count = load->get_count();
     byte_offset = load->get_offset();
     in_out_type_ = emitter_in_out_map::gpr_to_vec;
@@ -606,9 +610,6 @@ BroadcastLoadEmitter::BroadcastLoadEmitter(dnnl::impl::cpu::x64::jit_generator*
         IE_THROW() << "BroadcastEmitters support only equal input and output types but gets: " << src_prc.name() << " and " << dst_prc.name();
 
     const auto broadcast_load = std::dynamic_pointer_cast<ngraph::snippets::op::BroadcastLoad>(n);
-    if (!broadcast_load)
-        IE_THROW() << "BroadcastLoadEmitter expects BroadcastLoad snippets op";
-
     byte_offset = broadcast_load->get_offset();
     in_out_type_ = emitter_in_out_map::gpr_to_vec;
 }
@@ -717,12 +718,15 @@ size_t BrgemmEmitter::getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const {
     return mIdx * 4 + kIdx * 2 + nIdx;
 }
 BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa,
-                                         const std::shared_ptr<ov::Node>& node) : jit_emitter(h, isa, node) {
+                             const std::shared_ptr<ov::Node>& node) : jit_emitter(h, isa, node) {
     in_out_type_ = emitter_in_out_map::gpr_to_gpr;
-    const auto& brgemm_node = as_type_ptr<ngraph::snippets::op::Brgemm>(node);
+    const auto& brgemm_node = as_type_ptr<ov::intel_cpu::BrgemmCPU>(node);
     if (brgemm_node->is_dynamic())
         IE_THROW() << "Snippets don't support code generation for dynamic Brgemm";
-    const OutputVector io_values {brgemm_node->input_value(0), brgemm_node->input_value(1), brgemm_node->output(0)};
+    const auto brgemm_copy = brgemm_node->is_with_data_repacking() ? brgemm_node->get_brgemm_copy() : nullptr;
+    const OutputVector io_values {brgemm_node->input_value(0),
+                                  brgemm_copy ? brgemm_copy->input_value(0) : brgemm_node->input_value(1),
+                                  brgemm_node->output(0)};
     std::vector<size_t> leading_dimensions;
     std::vector<std::vector<size_t>> io_layouts;
     for (const auto& val : io_values) {
@@ -747,51 +751,61 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
             io_layouts.push_back(layout);
         }
     }
-    // todo: leave AMX and VNNI related code for now, it'll help to enable int8 and bf16 support
-    bool isAMXSupported = mayiuse(avx512_core_amx);
 
     const auto& A_shape = io_values[0].get_shape();
     const auto& A_layout = io_layouts[0];
     const auto& C_shape = io_values[2].get_shape();
     const auto& C_layout = io_layouts[2];
 
-    M = C_shape[C_layout[2]];
-    K = A_shape[A_layout[3]];
-    M_blk = matmulOptimalM;
-    M_tail = M % M_blk;
+    // We need find original M,N,K having layouts and ordered shapes
+    // Layout:  0, 1, 2, 3   =>   New layout: 0, 2, 1, 3
+    // Shape:   1, 3, 5, 9   =>   New Shape:  1, 5, 3, 9
+    // To find original 2nd dimension, we should find index of position value `2` in new layout
+    // and get dimension from new shape by this index
+    auto get_ordered_idx = [](const std::vector<size_t>& layout, size_t idx) {
+        return std::distance(layout.begin(), std::find(layout.begin(), layout.end(), idx));
+    };
+
+    m_M = C_shape[get_ordered_idx(C_layout, C_layout.size() - 2)];
+    m_K = A_shape[get_ordered_idx(A_layout, A_layout.size() - 1)];
+    m_M_blk = matmulOptimalM;
+    m_M_tail = m_M % m_M_blk;
     // B_shape[B_layout[3]]
-    N = C_shape[C_layout[3]];
+    m_N = C_shape[get_ordered_idx(C_layout, C_layout.size() - 1)];
 
     auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0));
     auto brg1Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(1));
     io_data_size = {brg0Prc.size(), brg1Prc.size(), brgemm_node->get_output_element_type(0).size()};
-    brg0VnniFactor = 4 / brg0Prc.size();
-    bool brg0WithAMX = isAMXSupported && brg0Prc != Precision::FP32 && (K % brg0VnniFactor == 0) && (N % brg0VnniFactor == 0);
+    m_brg0VnniFactor = 4 / brg0Prc.size();
+    bool brgWithAMX = brgemm_node->is_amx();
+
+    m_with_comp = brgemm_node->is_with_compensations();
+    m_with_scratch = brgemm_node->is_with_scratchpad();
 
-    N_blk = brg0Prc == Precision::FP32 ? N :
-             brg0Prc == Precision::BF16 ? 32 : 64;
-    N_tail = N % N_blk;
-    K_blk = brg0WithAMX ? brg0Prc == Precision::BF16 ? 32 : 64
-                         : K;
-    K_tail = K % K_blk;
+    m_N_blk = brg1Prc == Precision::FP32 ? m_N :
+              brg1Prc == Precision::BF16 ? 32 : 64;
+    m_N_tail = m_N % m_N_blk;
+    m_K_blk = brgWithAMX ? brg0Prc == Precision::BF16 ? 32 : 64
+                         : m_K;
+    m_K_tail = m_K % m_K_blk;
 
     size_t brg0BaseIdx = -1;
     for (size_t m = 0; m < 2; m++) {
         for (size_t k = 0; k < 2; k++) {
             for (size_t n = 0; n < 2; n++) {
-                auto& brgemmCtx = brgCtxs0[getBrgIdx(m, k, n)];
+                auto& brgemmCtx = m_brgCtxs0[getBrgIdx(m, k, n)];
 
-                auto M_ = m ? M_tail
-                            : M < M_blk ? 0 : M_blk;
-                auto N_ = n ? N_tail : N - N_tail;
-                auto K_ = k ? K_tail : K - K_tail;
-                auto beta = k && brgCtxs0[getBrgIdx(m, 0, n)].K != 0 ? 1.0f : 0.0f;
+                auto M_ = m ? m_M_tail
+                            : m_M < m_M_blk ? 0 : m_M_blk;
+                auto N_ = n ? m_N_tail : m_N - m_N_tail;
+                auto K_ = k ? m_K_tail : m_K - m_K_tail;
+                auto beta = k && m_brgCtxs0[getBrgIdx(m, 0, n)].K != 0 ? 1.0f : 0.0f;
 
                 brgemmCtx.M = M_;
                 brgemmCtx.N = N_;
                 brgemmCtx.K = K_;
                 brgemmCtx.LDA = leading_dimensions[0];
-                brgemmCtx.LDB = leading_dimensions[1];
+                brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, m_N_blk) : leading_dimensions[1];
                 brgemmCtx.LDC = leading_dimensions[2];
                 brgemmCtx.dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc));
                 brgemmCtx.dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc));
@@ -801,22 +815,46 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
                 if (M_ != 0 && K_ != 0 && N_ != 0) {
                     if (brg0BaseIdx == -1)
                         brg0BaseIdx = getBrgIdx(m, k, n);
-                    initBrgemm(brgemmCtx, brgKernels0[getBrgIdx(m, k, n)], brg0WithAMX);
+                    initBrgemm(brgemmCtx, m_brgKernels0[getBrgIdx(m, k, n)], brgWithAMX);
                 }
             }
         }
     }
 
-    load_offset_a = brgemm_node->get_offset_a();
-    load_offset_b = brgemm_node->get_offset_b();
-    store_offset_c = brgemm_node->get_offset_c();
+    m_load_offset_a = brgemm_node->get_offset_a();
+    m_load_offset_b = brgemm_node->get_offset_b();
+    m_store_offset_c = brgemm_node->get_offset_c();
+    if (m_with_scratch)
+        m_load_offset_scratch = brgemm_node->get_offset_scratch();
+}
+
+std::set<std::vector<element::Type>> BrgemmEmitter::get_supported_precisions(const std::shared_ptr<ngraph::Node>& node) {
+    const auto brgemm = as_type_ptr<ov::intel_cpu::BrgemmCPU>(node);
+    OPENVINO_ASSERT(brgemm, "BrgemmEmitter::get_supported_precisions() expects BrgemmCPU node");
+    switch (brgemm->get_type()) {
+        case BrgemmCPU::Type::Floating:
+            return {{element::f32, element::f32}};
+        case BrgemmCPU::Type::WithDataRepacking:
+            return {{element::u8, element::i8},
+                    {element::bf16, element::bf16}};
+        case BrgemmCPU::Type::WithCompensations:
+            return {{element::i8, element::i8, element::f32}};
+        case BrgemmCPU::Type::AMX:
+            return {{element::i8, element::i8, element::u8},
+                    {element::u8, element::i8, element::u8},
+                    {element::bf16, element::bf16, element::u8}};
+        default:
+            throw ov::Exception("BrgemmEmitter got BrgemmCPU node with unsupported type");
+    }
 }
 
 void BrgemmEmitter::initBrgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKernel, bool use_amx) const {
     brgemm_t brgDesc;
     brgemm_strides_t strides {static_cast<dnnl_dim_t>(ctx.M * ctx.K), static_cast<dnnl_dim_t>(ctx.K * ctx.N)};
-    // When implementing int8 support, note that isa logics is more complicated in the MHA node
-    auto status = brgemm_desc_init(&brgDesc, host_isa_, brgemm_strd, ctx.dt_in0, ctx.dt_in1,
+    const bool is_int8 = utils::one_of(ctx.dt_in0, data_type::u8, data_type::s8) && utils::one_of(ctx.dt_in1, data_type::u8, data_type::s8);
+    auto isa = use_amx ? isa_undef
+                       : ctx.dt_in0 == dnnl_data_type_t::dnnl_bf16 ? avx512_core_bf16 : (is_int8 ? avx512_core_vnni : avx512_core);
+    auto status = brgemm_desc_init(&brgDesc, isa, brgemm_strd, ctx.dt_in0, ctx.dt_in1,
                                    false, false, brgemm_row_major, 1.f, ctx.beta, ctx.LDA, ctx.LDB, ctx.LDC, ctx.M, ctx.N, ctx.K, &strides);
     if (status != dnnl_success)
         IE_THROW() << "BrgemmEmitter cannot initialize brgemm descriptor due to invalid params";
@@ -837,23 +875,91 @@ void BrgemmEmitter::initBrgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>&
 
 void BrgemmEmitter::emit_impl(const std::vector<size_t>& in,
                               const std::vector<size_t>& out) const {
-    if (host_isa_ == cpu::x64::sse41 || host_isa_ == cpu::x64::avx2) {
-        IE_THROW() << "BrgemmEmitter requires at least avx512_core instruction set";
-    } else if (host_isa_ == cpu::x64::avx512_core) {
-        emit_isa<cpu::x64::avx512_core>(in, out);
+    if (host_isa_ == cpu::x64::avx512_core) {
+        Xbyak::Reg64 input_0(static_cast<int>(in[0]));
+        Xbyak::Reg64 input_1(static_cast<int>(in[1]));
+        Xbyak::Reg64 input_2(static_cast<int>(0));  // scratch. Default reg index is 0 if there isn't scratch
+        if (m_with_scratch) {
+            if (in.size() != 3) {
+                IE_THROW() << "BRGEMM Emitter expects 3 inputs if there are compensations/wsp";
+            }
+            input_2 = Xbyak::Reg64(static_cast<int>(in[2]));
+        }
+        Xbyak::Reg64 output_0(static_cast<int>(out[0]));
+
+        for (size_t mb = 0; mb < div_up(m_M, m_M_blk); mb++) {
+            const bool is_M_tail = (m_M - mb * m_M_blk < m_M_blk);
+
+            size_t brgIdx0 = getBrgIdx(0, 0, 0);
+            size_t K0_step0 = m_brgCtxs0[brgIdx0].K;
+            size_t K0_step1 = m_brgCtxs0[brgIdx0].K * m_brgCtxs0[brgIdx0].LDB;
+            size_t N0_step0 = m_brgCtxs0[brgIdx0].N * m_brg0VnniFactor;
+            size_t N0_step1 = m_brgCtxs0[brgIdx0].N;
+            for (size_t n = 0; n < 2; n++) {
+                for (size_t k = 0; k < 2; k++) {
+                    size_t mIdx = is_M_tail ? 1 : 0;
+                    auto& brgemmCtx = m_brgCtxs0[getBrgIdx(mIdx, k, n)];
+
+                    if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
+                        const size_t in0_offset = m_load_offset_a + (k * K0_step0 + mb * m_M_blk * brgemmCtx.LDA) * io_data_size[0];
+                        const size_t in1_offset = m_load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
+                        const size_t in2_offset = m_load_offset_scratch + (m_with_comp ? n * N0_step1 * sizeof(int32_t) : 0);
+                        const size_t out0_offset = m_store_offset_c + (n * N0_step1 + mb * m_M_blk * brgemmCtx.LDC) * io_data_size[2];
+
+                        emit_brgemm_kernel_call(m_brgKernels0[getBrgIdx(mIdx, k, n)].get(),
+                                                brgemmCtx,
+                                                input_0,
+                                                input_1,
+                                                input_2,
+                                                output_0,
+                                                in0_offset,
+                                                in1_offset,
+                                                in2_offset,
+                                                out0_offset);
+                    }
+                }
+            }
+        }
     } else {
-        assert(!"unsupported isa");
+        IE_THROW() << "BrgemmEmitter requires at least avx512_core instruction set";
     }
 }
-template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, int bs,
-                                            Reg64 addr_A, Reg64 addr_B,
-                                            const brgemm_batch_element_t *batch, Reg64 addr_C, void *scratch,
-                                            const size_t in0_kernel_offset, const size_t in1_kernel_offset, const size_t out0_kernel_offset) const {
-    using Vmm = typename dnnl::impl::utils::conditional3<isa == cpu::x64::sse41, Xmm, isa == cpu::x64::avx2, Ymm, Zmm>::type;
-    size_t gpr_size = 8;
-    Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
-                                     h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
+
+void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brg_kernel, const brgemmCtx& ctx,
+                                            Reg64 addr_A, Reg64 addr_B, Reg64 scratch, Reg64 addr_C,
+                                            const size_t in0_kernel_offset, const size_t in1_kernel_offset,
+                                            const size_t in2_kernel_offset, const size_t out0_kernel_offset) const {
+    if (ctx.is_with_amx) {
+        Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->rax,
+                                         h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
+        size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]);
+
+        h->sub(h->rsp, n_gprs_to_save * gpr_size);
+        for (size_t i = 0; i < n_gprs_to_save; ++i)
+            h->mov(h->ptr[h->rsp + i * gpr_size], gprs_to_save[i]);
+
+        // save function address in gpr to pass in call instruction
+        const auto& overload = static_cast<status_t(*)(const char*)>(amx_tile_configure);
+        h->mov(h->rbp, reinterpret_cast<uintptr_t>(overload));
+        h->mov(abi_param1, reinterpret_cast<uintptr_t>(ctx.palette));
+
+        // align stack on 16-byte as ABI requires
+        // note that RBX must not be changed by the callee
+        h->mov(h->rbx, h->rsp);
+        h->and_(h->rbx, 0xf);
+        h->sub(h->rsp, h->rbx);
+
+        h->call(h->rbp);
+
+        h->add(h->rsp, h->rbx);
+        // restore gpr registers
+        for (int i = n_gprs_to_save - 1; i >= 0; --i)
+            h->mov(gprs_to_save[i], h->ptr[h->rsp + i * gpr_size]);
+        h->add(h->rsp, n_gprs_to_save * gpr_size);
+    }
+
+    Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->r12, h->r13, h->r14, h->r15,
+                                     h->rax, h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
     size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]);
 
     h->sub(h->rsp, n_gprs_to_save * gpr_size);
@@ -862,14 +968,12 @@ void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, in
 
     // caller obligation to save k-regs as callee may use them
     size_t n_k_regs_to_save = 8;
-    if (isa == cpu::x64::avx512_core) {
-        h->sub(h->rsp, n_k_regs_to_save * k_mask_size);
-        for (size_t i = 0; i < n_k_regs_to_save; ++i) {
-            if (mayiuse(avx512_core))
-                h->kmovq(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
-            else
-                h->kmovw(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
-        }
+    h->sub(h->rsp, n_k_regs_to_save * k_mask_size);
+    for (size_t i = 0; i < n_k_regs_to_save; ++i) {
+        if (mayiuse(avx512_core))
+            h->kmovq(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
+        else
+            h->kmovw(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
     }
 
     // 1. Caller obligation to save vector registers as callee may use them.
@@ -879,13 +983,16 @@ void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, in
     // `host_isa::vecs_count`.
     h->sub(h->rsp, get_max_vecs_count() * get_vec_length());
     for (size_t i = 0; i < get_max_vecs_count(); ++i)
-        h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Vmm(i));
+        h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Zmm(i));
 
+    size_t num_args_passed_on_stack = 0;
     // save function address in gpr to pass in call instruction
     const auto& brgemm_kernel_overload = static_cast<void (*)(const brgemm_kernel_t*,
                                                               const void*,
                                                               const void*,
-                                                              void*)>(kernel_execute);
+                                                              void*,
+                                                              void*,
+                                                              int)>(kernel_execute);
     h->mov(h->rbp, reinterpret_cast<uintptr_t>(brgemm_kernel_overload));
     // todo: several of addr_{A, B, C} could be also abi_paramX, so one of them could be corrupted
     //  if moving directly h->uni_vmovq(abi_paramX, adr_X). Save them to vector regs to avoid corruption.
@@ -893,16 +1000,44 @@ void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, in
     h->uni_vmovq(Xmm(0), addr_A);
     h->uni_vmovq(Xmm(1), addr_B);
     h->uni_vmovq(Xmm(2), addr_C);
-
+    if (m_with_scratch)
+        h->uni_vmovq(Xmm(3), scratch);
+    // todo: Windows ABI : requires different num of arguments passed in regs and on the stack. Need to align.
     const auto data_ptr_reg = [&](Xmm xmm, Xbyak::Reg64 reg, size_t bytes_offset) {
         h->uni_vmovq(reg, xmm);
         if (bytes_offset) h->add(reg, bytes_offset);
     };
-    h->mov(abi_param1, reinterpret_cast<uintptr_t>(brgKernel));
+    h->mov(abi_param1, reinterpret_cast<uintptr_t>(brg_kernel));
     data_ptr_reg(Xmm(0), abi_param2, in0_kernel_offset);
     data_ptr_reg(Xmm(1), abi_param3, in1_kernel_offset);
     data_ptr_reg(Xmm(2), abi_param4, out0_kernel_offset);
 
+#ifdef _WIN32
+    // Before function call we should allocate stack area for
+    //  - register parameters - ABI parameters (shadow space)
+    //  - stack parameters - remaining parameters
+    num_args_passed_on_stack = 6;  // count of function brgemm_kernel_overload() parameters
+    size_t abi_param_count = sizeof(abi_param_regs) / sizeof(abi_param_regs[0]);
+    h->sub(h->rsp, num_args_passed_on_stack * gpr_size);
+
+    // Push the remaining parameters on the stack
+    if (m_with_scratch) {
+        h->uni_vmovq(h->qword[h->rsp + (abi_param_count + 0) * gpr_size], Xmm(3));
+        if (in2_kernel_offset) h->add(h->qword[h->rsp + (abi_param_count + 0) * gpr_size], in2_kernel_offset);
+    } else {
+        h->mov(h->qword[h->rsp + (abi_param_count + 0) * gpr_size], reinterpret_cast<uintptr_t>(nullptr));
+    }
+    h->mov(abi_not_param1, static_cast<int>(m_with_comp));
+    h->mov(h->qword[h->rsp + (abi_param_count + 1) * gpr_size], abi_not_param1);
+#else
+    if (m_with_scratch) {
+        data_ptr_reg(Xmm(3), abi_param5, in2_kernel_offset);
+    } else {
+        h->mov(abi_param5, reinterpret_cast<uintptr_t>(nullptr));
+    }
+    h->mov(abi_param6, static_cast<int>(m_with_comp));
+#endif
+
     // align stack on 16-byte as ABI requires
     // note that RBX must not be changed by the callee
     h->mov(h->rbx, h->rsp);
@@ -912,22 +1047,22 @@ void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, in
     h->call(h->rbp);
 
     h->add(h->rsp, h->rbx);
+    if (num_args_passed_on_stack > 0)
+        h->add(h->rsp, num_args_passed_on_stack * gpr_size);
     // restore vector registers
     for (int i = static_cast<int>(get_max_vecs_count()) - 1; i >= 0; --i) {
-        h->uni_vmovups(Vmm(i), h->ptr[h->rsp + i * get_vec_length()]);
+        h->uni_vmovups(Zmm(i), h->ptr[h->rsp + i * get_vec_length()]);
     }
     h->add(h->rsp, (get_max_vecs_count()) * get_vec_length());
 
     // restore k registers
-    if (isa == cpu::x64::avx512_core) {
-        for (int i = n_k_regs_to_save - 1; i >= 0; --i) {
-            if (mayiuse(avx512_core))
-                h->kmovq(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
-            else
-                h->kmovw(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
-        }
-        h->add(h->rsp, n_k_regs_to_save * k_mask_size);
+    for (int i = n_k_regs_to_save - 1; i >= 0; --i) {
+        if (mayiuse(avx512_core))
+            h->kmovq(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
+        else
+            h->kmovw(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
     }
+    h->add(h->rsp, n_k_regs_to_save * k_mask_size);
 
     // restore gpr registers
     for (int i = n_gprs_to_save - 1; i >= 0; --i)
@@ -935,9 +1070,8 @@ void BrgemmEmitter::emit_brgemm_kernel_call(const brgemm_kernel_t *brgKernel, in
     h->add(h->rsp, n_gprs_to_save * gpr_size);
 }
 
-void BrgemmEmitter::kernel_execute(const brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C) {
-    // TODO: There are 4 available abi_params on Windows so we have the copy of brgemm_kernel_execute() function
-    //       with 4 runtime parameters (kernel and I/O) and 4 default parameter values (batch, bs and scratch)
+void BrgemmEmitter::kernel_execute(const brgemm_kernel_t *brg_kernel,
+                                   const void *A, const void *B, void *C, void *scratch, int with_comp) {
     brgemm_kernel_params_t brgemm_p;
 
     brgemm_p.batch = nullptr;  // default value
@@ -945,56 +1079,268 @@ void BrgemmEmitter::kernel_execute(const brgemm_kernel_t *brg_kernel, const void
     brgemm_p.ptr_B = B;
     brgemm_p.ptr_C = C;
     brgemm_p.ptr_D = C;
-    brgemm_p.ptr_buf = nullptr;  // default value
+    brgemm_p.ptr_buf = scratch;
     brgemm_p.ptr_bias = nullptr;
-    brgemm_p.do_post_ops = 0;
-    brgemm_p.do_apply_comp = 0;
+    brgemm_p.do_post_ops = static_cast<size_t>(with_comp);
+    brgemm_p.do_apply_comp = static_cast<size_t>(with_comp);
     brgemm_p.skip_accm = 0;
     brgemm_p.BS = 1;  // default value
     assert(brg_kernel);
     (*brg_kernel)(&brgemm_p);
 }
 
-template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-void BrgemmEmitter::emit_isa(const std::vector<size_t> &in, const std::vector<size_t> &out) const {
-    Reg64 input_0(static_cast<int>(in[0]));
-    Reg64 input_1(static_cast<int>(in[1]));
-    Reg64 output_0(static_cast<int>(out[0]));
-
-    for (size_t mb = 0; mb < div_up(M, M_blk); mb++) {
-        const bool is_M_tail = (M - mb * M_blk < M_blk);
-
-        size_t brgIdx0 = getBrgIdx(0, 0, 0);
-        size_t K0_step0 = brgCtxs0[brgIdx0].K;
-        size_t K0_step1 = brgCtxs0[brgIdx0].K * brgCtxs0[brgIdx0].LDB;
-        size_t N0_step0 = brgCtxs0[brgIdx0].N * brg0VnniFactor;
-        size_t N0_step1 = brgCtxs0[brgIdx0].N;
-        for (size_t n = 0; n < 2; n++) {
-            for (size_t k = 0; k < 2; k++) {
-                size_t mIdx = is_M_tail ? 1 : 0;
-                auto& brgemmCtx = brgCtxs0[getBrgIdx(mIdx, k, n)];
-
-                if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
-                    const size_t in0_offset = load_offset_a + (k * K0_step0 + mb * M_blk * brgemmCtx.LDA) * io_data_size[0];
-                    const size_t in1_offset = load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
-                    const size_t out0_offset = store_offset_c + (n * N0_step1 + mb * M_blk * brgemmCtx.LDC) * io_data_size[2];
-
-                    emit_brgemm_kernel_call<isa>(brgKernels0[getBrgIdx(mIdx, k, n)].get(),
-                                                 1,
-                                                 input_0,
-                                                 input_1,
-                                                 nullptr,
-                                                 output_0,
-                                                 nullptr,
-                                                 in0_offset,
-                                                 in1_offset,
-                                                 out0_offset);
-                }
+BrgemmCopyBEmitter::BrgemmCopyBEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n)
+    : jit_emitter(h, isa, n) {
+    in_out_type_ = emitter_in_out_map::gpr_to_gpr;
+    const auto brgemm_repack = ov::as_type_ptr<ov::intel_cpu::BrgemmCopyB>(n);
+    if (!brgemm_repack)
+        IE_THROW() << "BrgemmCopyBEmitters expects BrgemmCopyB node";
+
+    m_brgemm_prc_in0 = brgemm_repack->get_src_element_type();
+    m_brgemm_prc_in1 = brgemm_repack->get_input_element_type(0);
+    m_brgemmVNNIFactor = 4 / m_brgemm_prc_in0.size();
+    m_with_comp = brgemm_repack->is_with_compensations();
+    m_in_offset = brgemm_repack->get_offset_in();
+    m_out_offset = brgemm_repack->get_offset_out();
+    if (m_with_comp)
+        m_comp_offset = brgemm_repack->get_offset_compensations();
+
+    auto layout = ngraph::snippets::utils::get_node_output_layout(brgemm_repack->get_input_node_shared_ptr(0));
+    const auto& original_shape = brgemm_repack->get_input_shape(0);
+    auto transposed_shape = original_shape;
+    size_t leading_dimension = *(original_shape.rbegin());
+    if (!layout.empty()) {
+        transposed_shape.resize(layout.size(), 1);
+        for (size_t i = 0; i < layout.size(); ++i) {
+            transposed_shape[i] = original_shape[layout[i]];
+        }
+        // The idea here is to find "2" (for 4D shapes) in the layout and multiply dimensions that are to the right
+        // This implies that "3" is the last layout value, otherwise this layout is not supported.
+        // counting from the end since shape could be prepended with ones
+        const int64_t num_last_dims = layout.end() - std::find(layout.begin(), layout.end(), layout.size() - 2) - 1;
+        if (layout.back() != layout.size() - 1 || num_last_dims < 1)
+            IE_THROW() << "BrgemmRepackEmitter detected invalid layout values: " <<
+                       "check that this shape + layout combination is schedulable";
+        leading_dimension = std::accumulate(original_shape.end() - num_last_dims, original_shape.end(), 1, std::multiplies<size_t>());
+    }
+
+    m_N = *(transposed_shape.rbegin());
+    m_K = *(transposed_shape.rbegin() + 1);
+
+    const bool isAMXSupported = mayiuse(avx512_core_amx);
+    const auto use_amx = isAMXSupported && m_brgemm_prc_in0 != ov::element::f32 && (m_K % m_brgemmVNNIFactor == 0) && (m_N % m_brgemmVNNIFactor == 0);
+
+    m_N_blk = m_brgemm_prc_in1 == ov::element::f32 ? m_N :
+              m_brgemm_prc_in1 == ov::element::bf16 ? 32 : 64;
+    m_K_blk = use_amx ? m_brgemm_prc_in0 == ov::element::bf16 ? 32 : 64
+                      : m_K;
+    m_N_tail = m_N % m_N_blk;
+    m_K_tail = m_K % m_K_blk;
+    m_LDB = m_brgemm_prc_in1 == ov::element::f32 ? leading_dimension : rnd_up(m_N, m_N_blk);
+
+    const auto dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(InferenceEngine::details::convertPrecision(m_brgemm_prc_in0)));
+    const auto dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(InferenceEngine::details::convertPrecision(m_brgemm_prc_in1)));
+    init_brgemm_copy(m_kernel, leading_dimension, m_N_blk, m_N_tail, m_LDB, m_K - m_K_tail, use_amx, dt_in0, dt_in1);
+}
+
+void BrgemmCopyBEmitter::init_brgemm_copy(std::unique_ptr<matmul::jit_brgemm_matmul_copy_b_t>& kernel,
+                                          size_t N, size_t N_blk, size_t N_tail, size_t LDB, size_t K,
+                                          bool is_with_amx, dnnl_data_type_t dt_in0, dnnl_data_type_t dt_in1) const {
+    matmul::brgemm_matmul_conf_t brgCopyKernelConf;
+    brgCopyKernelConf.src_dt = dt_in0;
+    brgCopyKernelConf.wei_dt = dt_in1;
+    brgCopyKernelConf.wei_n_blk = static_cast<int>(N_blk);
+    brgCopyKernelConf.wei_tag = dnnl_abcd;  // What's about other ranks?
+    brgCopyKernelConf.copy_B_wei_stride = 0;
+    brgCopyKernelConf.LDB = static_cast<dim_t>(LDB);
+    brgCopyKernelConf.N =  static_cast<dim_t>(N);
+    brgCopyKernelConf.N_tail =  static_cast<dim_t>(N_tail);
+    brgCopyKernelConf.N_blk =  static_cast<dim_t>(N_blk);
+    brgCopyKernelConf.K =  static_cast<dim_t>(K);
+    brgCopyKernelConf.K_blk =  static_cast<dim_t>(K);
+    brgCopyKernelConf.N_chunk_elems = brgCopyKernelConf.N_blk;
+    brgCopyKernelConf.b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
+    brgCopyKernelConf.tr_b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
+    brgCopyKernelConf.req_wei_vnni_downconvert = false;
+
+    if (is_with_amx) {
+        brgCopyKernelConf.isa = avx512_core_amx;
+        brgCopyKernelConf.s8s8_compensation_required = false;
+    } else {
+        brgCopyKernelConf.isa = dt_in0 == dnnl_data_type_t::dnnl_bf16 ? avx512_core_bf16 : avx512_core_vnni;
+        brgCopyKernelConf.s8s8_compensation_required = dt_in0 == dnnl_data_type_t::dnnl_s8;
+    }
+
+    brgCopyKernelConf.has_zero_point_a = false;
+    brgCopyKernelConf.has_zero_point_b = false;
+    brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none;
+
+    auto status = matmul::create_brgemm_matmul_copy_b(kernel, &brgCopyKernelConf);
+    if (status != dnnl_success)
+        IE_THROW() << "BrgemmRepackEmitter cannot create kernel due to invalid params";
+}
+
+void BrgemmCopyBEmitter::emit_impl(const std::vector<size_t>& in,
+                                   const std::vector<size_t>& out) const {
+    if (host_isa_ == cpu::x64::avx512_core) {
+        Xbyak::Reg64 src(static_cast<int>(in[0]));
+        Xbyak::Reg64 dst(static_cast<int>(out[0]));
+        Xbyak::Reg64 comp(static_cast<int>(0));  // Compensations. Default reg idx is 0 if there aren't the compensations
+        if (m_with_comp) {
+            if (out.size() != 2) {
+                IE_THROW() << "BrgemmCopyBEmitter with compensations requires separate register for them";
             }
+            comp = Xbyak::Reg64(static_cast<int>(out[1]));
         }
+
+        const size_t data_size = m_brgemm_prc_in1.size();
+        for (size_t nb = 0; nb < div_up(m_N, m_N_blk); nb++) {
+            const size_t offset_in = m_in_offset + nb * m_N_blk * data_size;
+            const size_t offset_out = m_out_offset + nb * m_N_blk * m_brgemmVNNIFactor * data_size;
+            const size_t offset_comp = m_with_comp ? m_comp_offset + nb * m_N_blk * sizeof(int32_t) : 0;
+
+            const bool is_N_tail = (m_N - nb * m_N_blk < m_N_blk);
+            const auto current_N_blk = is_N_tail ? m_N_tail : m_N_blk;
+
+            emit_kernel_call(m_kernel.get(), src, dst, comp, current_N_blk, m_K, offset_in, offset_out, offset_comp);
+        }
+    } else {
+        IE_THROW() << "BrgemmCopyBEmitter requires at least avx512_core instruction set";
     }
 }
 
+void BrgemmCopyBEmitter::emit_kernel_call(const matmul::jit_brgemm_matmul_copy_b_t* kernel, Reg64 src, Reg64 dst, Reg64 comp,
+                                          size_t N, size_t K, size_t offset_in, size_t offset_out, size_t offset_comp) const {
+    Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->r12, h->r13, h->r14, h->r15,
+                                     h->rax, h->rcx, h->rdx, h->rdi, h->rsi, h->rbp, h->rbx};
+    size_t n_gprs_to_save = sizeof(gprs_to_save) / sizeof(gprs_to_save[0]);
+
+    h->sub(h->rsp, n_gprs_to_save * gpr_size);
+    for (size_t i = 0; i < n_gprs_to_save; ++i)
+        h->mov(h->ptr[h->rsp + i * gpr_size], gprs_to_save[i]);
+
+    // caller obligation to save k-regs as callee may use them
+    size_t n_k_regs_to_save = 8;
+    h->sub(h->rsp, n_k_regs_to_save * k_mask_size);
+    for (size_t i = 0; i < n_k_regs_to_save; ++i) {
+        if (mayiuse(avx512_core))
+            h->kmovq(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
+        else
+            h->kmovw(h->ptr[h->rsp + i * k_mask_size], Opmask(static_cast<int>(i)));
+    }
+
+    // 1. Caller obligation to save vector registers as callee may use them.
+    // 2. There is an implicit assumption that the host code uses the same
+    // `isa` as the injector. Once the assumption is wrong, `vecs_count` and
+    // `vlen` should be replaced with `host_isa::vlen` and
+    // `host_isa::vecs_count`.
+    h->sub(h->rsp, get_max_vecs_count() * get_vec_length());
+    for (size_t i = 0; i < get_max_vecs_count(); ++i)
+        h->uni_vmovups(h->ptr[h->rsp + i * get_vec_length()], Zmm(i));
+
+    const auto data_ptr = [&](Xmm xmm, Xbyak::Reg64 reg, size_t bytes_offset) {
+        h->uni_vmovq(reg, xmm);
+        if (bytes_offset) h->add(reg, bytes_offset);
+    };
+#ifdef _WIN32
+    const auto push_value = [&](size_t value, size_t index) {
+        // Firstly we need to move integer to GPR. Then we can move value from GPR to stack
+        h->mov(abi_not_param1, value);
+        h->mov(h->qword[h->rsp + index * gpr_size], abi_not_param1);
+    };
+#endif
+
+    size_t num_args_passed_on_stack = 0;
+    // save function address in gpr to pass in call instruction
+    const auto &kernel_overload = static_cast<void (*)(matmul::jit_brgemm_matmul_copy_b_t*,
+                                                       const void*,
+                                                       const void*,
+                                                       const void*,
+                                                       size_t,
+                                                       size_t)>(execute);
+    h->mov(h->rbp, reinterpret_cast<uintptr_t>(kernel_overload));
+    // todo: several of addr_{A, B, C} could be also abi_paramX, so one of them could be corrupted
+    //  if moving directly h->uni_vmovq(abi_paramX, adr_X). Save them to vector regs to avoid corruption.
+    //  It's likely that a more efficient solution exists.
+    h->uni_vmovq(Xmm(0), src);
+    h->uni_vmovq(Xmm(1), dst);
+    if (m_with_comp)
+        h->uni_vmovq(Xmm(2), comp);
+    // todo: Windows ABI : requires different num of arguments passed in regs and on the stack. Need to align.
+    h->mov(abi_param1, reinterpret_cast<uintptr_t>(kernel));
+
+    data_ptr(Xmm(0), abi_param2, offset_in);
+    data_ptr(Xmm(1), abi_param3, offset_out);
+    if (m_with_comp) {
+        data_ptr(Xmm(2), abi_param4, offset_comp);
+    } else {
+        h->mov(abi_param4, reinterpret_cast<uintptr_t>(nullptr));
+    }
+
+#ifdef _WIN32
+    // Before function call we should allocate stack area for
+    //  - register parameters - ABI parameters (shadow space)
+    //  - stack parameters - remaining parameters
+    num_args_passed_on_stack = 6;  // count of function kernel_overload() parameters
+    size_t abi_param_count = sizeof(abi_param_regs) / sizeof(abi_param_regs[0]);
+
+    h->sub(h->rsp, num_args_passed_on_stack * gpr_size);
+    push_value(N, abi_param_count + 0);
+    push_value(K, abi_param_count + 1);
+#else
+    h->mov(abi_param5, N);
+    h->mov(abi_param6, K);
+#endif
+    // align stack on 16-byte as ABI requires
+    // note that RBX must not be changed by the callee
+    h->mov(h->rbx, h->rsp);
+    h->and_(h->rbx, 0xf);
+    h->sub(h->rsp, h->rbx);
+
+    h->call(h->rbp);
+
+    h->add(h->rsp, h->rbx);
+    if (num_args_passed_on_stack > 0)
+        h->add(h->rsp, gpr_size * num_args_passed_on_stack);
+    // restore vector registers
+    for (int i = static_cast<int>(get_max_vecs_count()) - 1; i >= 0; --i) {
+        h->uni_vmovups(Zmm(i), h->ptr[h->rsp + i * get_vec_length()]);
+    }
+    h->add(h->rsp, (get_max_vecs_count()) * get_vec_length());
+
+    // restore k registers
+    for (int i = n_k_regs_to_save - 1; i >= 0; --i) {
+        if (mayiuse(avx512_core))
+            h->kmovq(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
+        else
+            h->kmovw(Opmask(i), h->ptr[h->rsp + i * k_mask_size]);
+    }
+    h->add(h->rsp, n_k_regs_to_save * k_mask_size);
+
+    // restore gpr registers
+    for (int i = n_gprs_to_save - 1; i >= 0; --i)
+        h->mov(gprs_to_save[i], h->ptr[h->rsp + i * gpr_size]);
+    h->add(h->rsp, n_gprs_to_save * gpr_size);
+}
+
+void BrgemmCopyBEmitter::execute(matmul::jit_brgemm_matmul_copy_b_t *kernel, const void *src,
+                                 const void *dst, const void *comp, size_t N, size_t K) {
+    if (!kernel)
+        IE_THROW() << "Kernel for `brgemm_copy_b` hasn't been created";
+
+    auto ctx = dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t::ctx_t();
+    ctx.current_N_blk = N;
+    ctx.src = src;
+    ctx.tr_src = dst;
+    ctx.compensation_ptr = comp;
+    ctx.zp_a_compensation_ptr = nullptr;
+    ctx.zp_a_neg_value_ptr = nullptr;
+    ctx.current_K_start = 0;
+    ctx.current_K_iters = K;
+
+    (*kernel)(&ctx);
+}
+
 HorizonMaxEmitter::HorizonMaxEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n) :
     jit_emitter(h, isa, n, Precision::FP32, emitter_in_out_map::vec_to_vec) {}
 
diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
index cae08b3fe43ac8..0f00eb6f7048b8 100644
--- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp
@@ -321,17 +321,13 @@ class BrgemmEmitter : public jit_emitter {
 public:
     BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n);
 
-    size_t get_inputs_num() const override {return 2;}
-    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
-        return {{element::f32, element::f32}};
-    }
+    size_t get_inputs_num() const override { return m_with_scratch ? 3 : 2; }
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr);
 
 private:
     void emit_impl(const std::vector<size_t>& in,
                    const std::vector<size_t>& out) const override;
 
-    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-    void emit_isa(const std::vector<size_t> &in, const std::vector<size_t> &out) const;
     std::vector<size_t> io_data_size {};
     struct brgemmCtx {
         size_t M, N, K, LDA, LDB, LDC;
@@ -342,29 +338,68 @@ class BrgemmEmitter : public jit_emitter {
         float beta;
     };
     void initBrgemm(brgemmCtx& ctx, std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t>& brgKernel, bool use_amx) const;
-    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-    void callBrgemm(brgemmCtx& ctx, std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t>& brgKernel,
-                    const void* pin0, const void* pin1, void* pout, void* wsp) const;
     size_t getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const;
-    template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-    void emit_brgemm_kernel_call(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, int bs,
-                                 Xbyak::Reg64 addr_A, Xbyak::Reg64 addr_B,
-                                 const dnnl::impl::cpu::x64::brgemm_batch_element_t *batch, Xbyak::Reg64 addr_C, void *scratch,
-                                 const size_t in0_kernel_offset, const size_t in1_kernel_offset, const size_t out0_kernel_offset) const;
-    static void kernel_execute(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C);
+
+    void emit_brgemm_kernel_call(const dnnl::impl::cpu::x64::brgemm_kernel_t* brg_kernel, const brgemmCtx& ctx,
+                                 Xbyak::Reg64 addr_A, Xbyak::Reg64 addr_B, Xbyak::Reg64 scratch, Xbyak::Reg64 addr_C,
+                                 const size_t in0_kernel_offset, const size_t in1_kernel_offset,
+                                 const size_t in2_kernel_offset, const size_t out0_kernel_offset) const;
+    static void kernel_execute(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C, void *scratch, int with_comp);
+
     static constexpr size_t BRGEMM_KERNELS_NUM = 8;
     static constexpr size_t matmulOptimalM = 32;
-    brgemmCtx brgCtxs0[BRGEMM_KERNELS_NUM];
-    std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t> brgKernels0[BRGEMM_KERNELS_NUM];
+    brgemmCtx m_brgCtxs0[BRGEMM_KERNELS_NUM];
+    std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t> m_brgKernels0[BRGEMM_KERNELS_NUM];
+
+    size_t m_M, m_M_blk, m_M_tail;
+    size_t m_K, m_K_blk, m_K_tail;
+    size_t m_N, m_N_blk, m_N_tail;
+    size_t m_brg0VnniFactor;
+
+    bool m_with_scratch = false;
+    bool m_with_comp = false;
+
+    size_t m_load_offset_a = 0lu;
+    size_t m_load_offset_b = 0lu;
+    size_t m_load_offset_scratch = 0lu;
+    size_t m_store_offset_c = 0lu;
+};
+
+class BrgemmCopyBEmitter : public jit_emitter {
+public:
+    BrgemmCopyBEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr<ov::Node>& n);
+
+    size_t get_inputs_num() const override {return 1;}
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ngraph::Node>& node = nullptr) {
+        return {{element::i8}, {element::bf16}};
+    }
+
+private:
+    void emit_impl(const std::vector<size_t>& in,
+                   const std::vector<size_t>& out) const override;
+
+    void init_brgemm_copy(std::unique_ptr<dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t>& kernel,
+                          size_t N, size_t N_blk, size_t N_tail, size_t LDB, size_t K,
+                          bool is_with_amx, dnnl_data_type_t dt_in0, dnnl_data_type_t dt_in1) const;
+    void emit_kernel_call(const dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t* kernel,
+                          Xbyak::Reg64 src, Xbyak::Reg64 dst, Xbyak::Reg64 comp, size_t N, size_t K,
+                          size_t offset_in, size_t offset_out, size_t offset_comp) const;
+
+    static void execute(dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t* kernel,
+                        const void* src, const void* dst, const void* comp, size_t N, size_t K);
+
+    std::unique_ptr<dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t> m_kernel;
 
-    size_t M, M_blk, M_tail;
-    size_t K, K_blk, K_tail;
-    size_t N, N_blk, N_tail;
-    size_t brg0VnniFactor;
+    ov::element::Type m_brgemm_prc_in0, m_brgemm_prc_in1;
+    size_t m_N, m_N_blk, m_N_tail;
+    size_t m_K, m_K_blk, m_K_tail;
+    size_t m_LDB;
+    size_t m_brgemmVNNIFactor;
+    bool m_with_comp = false;
 
-    size_t load_offset_a = 0lu;
-    size_t load_offset_b = 0lu;
-    size_t store_offset_c = 0lu;
+    size_t m_in_offset = 0lu;
+    size_t m_out_offset = 0lu;
+    size_t m_comp_offset = 0lu;
 };
 
 class HorizonMaxEmitter : public jit_emitter {
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index fd0c07ea55b6d7..a9d6e08377c971 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -11,6 +11,8 @@
 #include "ngraph_transformations/op/mha.hpp"
 #include "snippets_transformations/op/load_convert.hpp"
 #include "snippets_transformations/op/store_convert.hpp"
+#include "snippets_transformations/op/brgemm_cpu.hpp"
+#include "snippets_transformations/op/brgemm_copy_b.hpp"
 
 #include <ngraph/ngraph.hpp>
 #include <ov_ops/augru_cell.hpp>
@@ -54,6 +56,8 @@ std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
         NGRAPH_OP(LoadConvertTruncation, ov::intel_cpu)
         NGRAPH_OP(StoreConvertSaturation, ov::intel_cpu)
         NGRAPH_OP(StoreConvertTruncation, ov::intel_cpu)
+        NGRAPH_OP(BrgemmCPU, ov::intel_cpu)
+        NGRAPH_OP(BrgemmCopyB, ov::intel_cpu)
 #undef NGRAPH_OP
 
         return opset;
@@ -132,9 +136,9 @@ std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
 
 #define NGRAPH_OP(NAME, NAMESPACE) opset.insert<NAMESPACE::NAME>();
         NGRAPH_OP(Brgemm, ngraph::snippets::op)
+        NGRAPH_OP(Buffer, ngraph::snippets::op)
         NGRAPH_OP(BroadcastLoad, ngraph::snippets::op)
         NGRAPH_OP(BroadcastMove, ngraph::snippets::op)
-        NGRAPH_OP(Buffer, ngraph::snippets::op)
         NGRAPH_OP(ConvertSaturation, ngraph::snippets::op)
         NGRAPH_OP(ConvertTruncation, ngraph::snippets::op)
         NGRAPH_OP(Fill, ngraph::snippets::op)
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
index 8eb425e7ec4921..41b79e9e941aa2 100644
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -25,6 +25,7 @@
 #include "utils/cpu_utils.hpp"
 #include "snippets_transformations/fuse_load_store_and_convert.hpp"
 #include "snippets_transformations/mul_add_to_fma.hpp"
+#include "snippets_transformations/brgemm_to_brgemm_cpu.hpp"
 #include "snippets_transformations/remove_converts.hpp"
 #include "ngraph_transformations/convert_to_swish_cpu.hpp"
 
@@ -536,6 +537,7 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
     pre_dialect.register_pass<ConvertToSwishCPU>();
 
     ov::pass::Manager post_dialect;
+    post_dialect.register_pass<ov::intel_cpu::pass::BrgemmToBrgemmCPU>();
 
     ov::pass::Manager post_precision;
     post_precision.register_pass<ov::intel_cpu::pass::RemoveConverts>();
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.cpp
new file mode 100644
index 00000000000000..63779e5848bec4
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/itt.hpp"
+
+#include "brgemm_to_brgemm_cpu.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "snippets/utils.hpp"
+#include "op/brgemm_copy_b.hpp"
+#include "op/brgemm_cpu.hpp"
+
+#include "ngraph/rt_info.hpp"
+#include "ngraph/pattern/op/wrap_type.hpp"
+
+#include <cpu/x64/cpu_isa_traits.hpp>
+
+#include "cpu_shape.h"
+#include "utils/general_utils.h"
+
+
+namespace ov {
+namespace intel_cpu {
+
+pass::BrgemmToBrgemmCPU::BrgemmToBrgemmCPU() {
+    MATCHER_SCOPE(BrgemmToBrgemmCPU);
+
+    auto m_brgemm = ngraph::pattern::wrap_type<ngraph::snippets::op::Brgemm>();
+
+    auto callback = [=](ngraph::pattern::Matcher& m) {
+        OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::BrgemmToBrgemmCPU")
+        const auto node = m.get_match_root();
+        const auto brgemm = ov::as_type_ptr<ngraph::snippets::op::Brgemm>(node);
+        const auto brgemm_plugin = ov::as_type_ptr<BrgemmCPU>(node);
+        if (!brgemm || brgemm_plugin)
+            throw ov::Exception("BrgemmCPU cannot be in body before BrgemmToBrgemmCPU pass");
+
+        if (brgemm->is_dynamic()) {
+            return false;
+        }
+
+        const auto dimsMatMulIn0 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(0)).get_shape();
+        const auto dimsMatMulIn1 = ngraph::snippets::utils::get_port_planar_shape(brgemm->input_value(1)).get_shape();
+
+        const auto K = *dimsMatMulIn0.rbegin();
+        const auto N = *dimsMatMulIn1.rbegin();
+
+        const auto element_type_a = brgemm->get_input_element_type(0);
+        const auto brgemmVNNIFactor = 4 / element_type_a.size();
+        const bool isAMXSupported = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx);
+        const bool with_amx = isAMXSupported && element_type_a != ov::element::f32 && (K % brgemmVNNIFactor == 0) && (N % brgemmVNNIFactor == 0);
+        const bool with_comp = element_type_a == ov::element::i8 && !with_amx;
+
+        const auto offset_a = brgemm->get_offset_a();
+        const auto offset_b = brgemm->get_offset_b();
+        const auto offset_c = brgemm->get_offset_c();
+
+        std::shared_ptr<ov::Node> brgemm_cpu = nullptr;
+        if (element_type_a == ov::element::f32) {
+            brgemm_cpu = std::make_shared<BrgemmCPU>(brgemm->input_value(0), brgemm->input_value(1), BrgemmCPU::Type::Floating,
+                                                     offset_a, offset_b, offset_c);
+        } else {
+            const auto layoutIn1 = ngraph::snippets::utils::get_node_output_layout(brgemm->input_value(1).get_node_shared_ptr());
+            const auto copy_b_type = with_comp ? BrgemmCopyB::WithCompensations : BrgemmCopyB::OnlyRepacking;
+            const auto brgemmRepackIn1 = std::make_shared<BrgemmCopyB>(brgemm->input_value(1), element_type_a, copy_b_type, offset_b);
+            const auto buffer = std::make_shared<ngraph::snippets::op::Buffer>(brgemmRepackIn1->output(0));
+
+            if (with_amx) {
+                const auto scratch = std::make_shared<ngraph::snippets::op::Buffer>(ov::Shape{BrgemmCPU::SCRATCH_BYTE_SIZE});
+                brgemm_cpu = std::make_shared<BrgemmCPU>(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::AMX,
+                                                         offset_a, offset_b, offset_c);
+            } else if (with_comp) {
+                const auto scratch = std::make_shared<ngraph::snippets::op::Buffer>(brgemmRepackIn1->output(1));
+                brgemm_cpu = std::make_shared<BrgemmCPU>(brgemm->input_value(0), buffer, scratch, BrgemmCPU::Type::WithCompensations,
+                                                         offset_a, offset_b, offset_c);
+            } else if (one_of(element_type_a, ov::element::u8, ov::element::bf16)) {
+                brgemm_cpu = std::make_shared<BrgemmCPU>(brgemm->input_value(0), buffer, BrgemmCPU::Type::WithDataRepacking,
+                                                         offset_a, offset_b, offset_c);
+            } else {
+                IE_THROW() << "Invalid configuration for BRGEMM CPU";
+            }
+        }
+
+        brgemm_cpu->set_friendly_name(brgemm->get_friendly_name());
+        ngraph::snippets::utils::set_output_layout(brgemm_cpu->output(0), ngraph::snippets::utils::get_node_output_layout(brgemm));
+        ngraph::copy_runtime_info(brgemm, brgemm_cpu);
+        ngraph::replace_node(brgemm, brgemm_cpu);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(m_brgemm, matcher_name);
+    register_matcher(m, callback);
+}
+} // namespace intel_cpu
+} // namespace ov
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.hpp b/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.hpp
new file mode 100644
index 00000000000000..c400d2d0790035
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/brgemm_to_brgemm_cpu.hpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/pass/graph_rewrite.hpp"
+#include "ngraph/pattern/matcher.hpp"
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+
+/**
+ * @interface BrgemmToBrgemmCPU
+ * @brief The pass decompose Snippets Brgemm to specific subgraph that depends on ISA and input precisions:
+ *        - f32|f32:
+ *                   BrgemmCPU
+ *        - u8|i8 or bf16|bf16 (non-AMX system):
+ *                 \       BrgemmCopyB (the operation for data repacking)
+ *                  \        Buffer
+ *                   BrgemmCPU
+ *        - i8|i8 (non-AMX system) - needs compensations:
+ *                \                              BrgemmCopyB
+ *                 \                            /          \
+ *                  \        Buffer (with repacked data)  Buffer (with compensations)
+ *                   \                |                  /
+ *                               BrgemmCPU
+ *        - u8|i8, i8|i8 or bf16|bf16 on AMX system:
+ *                 \              BrgemmCopyB
+ *                  \        Buffer (with repacked data)  Buffer (with new memory)
+ *                   \                |                  /
+ *                               BrgemmCPU
+ * @ingroup snippets
+ */
+class BrgemmToBrgemmCPU: public ngraph::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("BrgemmToBrgemmCPU", "0");
+    BrgemmToBrgemmCPU();
+};
+
+
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/fuse_load_store_and_convert.cpp b/src/plugins/intel_cpu/src/snippets_transformations/fuse_load_store_and_convert.cpp
index b47fcfe73da808..0c64a20b655ed9 100644
--- a/src/plugins/intel_cpu/src/snippets_transformations/fuse_load_store_and_convert.cpp
+++ b/src/plugins/intel_cpu/src/snippets_transformations/fuse_load_store_and_convert.cpp
@@ -10,20 +10,17 @@
 #include "snippets_transformations/op/load_convert.hpp"
 #include "snippets_transformations/op/store_convert.hpp"
 
-#include "ngraph/opsets/opset1.hpp"
 #include "ngraph/rt_info.hpp"
 #include "ngraph/pattern/op/wrap_type.hpp"
 
 ov::intel_cpu::pass::FuseLoadConvert::FuseLoadConvert() {
     MATCHER_SCOPE(FuseLoadConvert);
-    auto param_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Parameter>();
-    auto load_pattern = ngraph::pattern::wrap_type<ngraph::snippets::op::Load>({param_pattern});
+    auto load_pattern = ngraph::pattern::wrap_type<ngraph::snippets::op::Load>();
     auto convert_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({load_pattern});
 
     auto callback = [=](ngraph::pattern::Matcher& m) {
         OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::FuseLoadConvert")
         auto& pm = m.get_pattern_value_map();
-        const auto param = pm.at(param_pattern).get_node_shared_ptr();
         const auto load_shared = pm.at(load_pattern).get_node_shared_ptr();
         if (!load_shared || load_shared->output(0).get_target_inputs().size() != 1) {
             return false;
@@ -40,12 +37,12 @@ ov::intel_cpu::pass::FuseLoadConvert::FuseLoadConvert() {
         std::shared_ptr<ngraph::Node> load_convert = nullptr;
         if (const auto convert_saturation =
                 std::dynamic_pointer_cast<ngraph::snippets::op::ConvertSaturation>(convert)) {
-            load_convert = std::make_shared<ov::intel_cpu::LoadConvertSaturation>(param,
+            load_convert = std::make_shared<ov::intel_cpu::LoadConvertSaturation>(load->input_value(0),
                                                                                   convert_saturation->get_destination_type(),
                                                                                   load->get_count(), load->get_offset());
         } else if (const auto convert_truncation =
                 std::dynamic_pointer_cast<ngraph::snippets::op::ConvertTruncation>(convert)) {
-            load_convert = std::make_shared<ov::intel_cpu::LoadConvertTruncation>(param,
+            load_convert = std::make_shared<ov::intel_cpu::LoadConvertTruncation>(load->input_value(0),
                                                                                   convert_truncation->get_destination_type(),
                                                                                   load->get_count(), load->get_offset());
         } else {
@@ -102,7 +99,6 @@ ov::intel_cpu::pass::FuseStoreConvert::FuseStoreConvert() {
                 "Type of Convert op is undefined. Supports only fusing Store and ConvertTruncation or ConvertSaturation ops");
         }
 
-
         if (!store_convert)
             return false;
 
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.cpp
new file mode 100644
index 00000000000000..0e4004395e188a
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/itt.hpp"
+#include "snippets/utils.hpp"
+
+#include "brgemm_copy_b.hpp"
+
+#include "utils/general_utils.h"
+
+using namespace std;
+using namespace ov;
+
+intel_cpu::BrgemmCopyB::BrgemmCopyB(const Output<Node>& x, const element::Type src_type, const Type type,
+                                    const size_t offset_in, const size_t offset_out0, const size_t offset_out1)
+    : ngraph::snippets::op::MemoryAccess({x}, 1, type == Type::WithCompensations ? 2 : 1), m_type(type), m_src_type(src_type) {
+    set_output_size(get_output_port_count());
+    m_input_ports.resize(get_input_size());
+    m_output_ports.resize(get_output_size());
+    set_input_port_descriptor({0, offset_in}, 0);
+    set_output_port_descriptor({0, offset_out0}, 0);
+    if (is_with_compensations()) {
+        set_output_port_descriptor({0, offset_out1}, 1);
+    }
+    constructor_validate_and_infer_types();
+}
+
+bool intel_cpu::BrgemmCopyB::visit_attributes(AttributeVisitor& visitor) {
+    INTERNAL_OP_SCOPE(BrgemmRepack_visit_attributes);
+    MemoryAccess::visit_attributes(visitor);
+    visitor.on_attribute("src_type", m_src_type);
+    return true;
+}
+
+void intel_cpu::BrgemmCopyB::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(BrgemmRepack_validate_and_infer_types);
+
+    const auto element_type = get_input_element_type(0);
+    NGRAPH_CHECK(one_of(element_type, element::bf16, element::i8),
+                 "BrgemmCopyB doesn't support element type" + element_type.get_type_name());
+
+    const auto pshape = ngraph::snippets::utils::get_port_planar_shape(input_value(0));
+    if (pshape.is_dynamic()) {
+        set_output_type(0, element_type, ov::PartialShape{ov::Dimension::dynamic()});
+        if (is_with_compensations()) {
+            set_output_type(1, ov::element::f32, ov::PartialShape{ov::Dimension::dynamic()});
+        }
+        return;
+    }
+
+    const auto shape = pshape.get_shape();
+    const auto N = *shape.rbegin();
+    const auto K = *(shape.rbegin() + 1);
+    const auto N_blk = element_type == element::bf16 ? 32 : 64;
+    const auto brgemmVNNIFactor = 4 / m_src_type.size();
+
+    set_output_type(0, element_type, ov::PartialShape{ov::Dimension(rnd_up(K, brgemmVNNIFactor)),
+                                                      ov::Dimension(rnd_up(N, N_blk))});
+    if (is_with_compensations()) {
+        set_output_type(1, ov::element::f32, ov::PartialShape{ov::Dimension(rnd_up(N, N_blk))});
+    }
+}
+
+std::shared_ptr<Node> intel_cpu::BrgemmCopyB::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(BrgemmRepack_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    return std::make_shared<BrgemmCopyB>(new_args.at(0), m_src_type, m_type,
+                                         get_offset_in(),
+                                         get_offset_out(),
+                                         is_with_compensations() ? get_offset_compensations() : 0);
+}
+
+size_t intel_cpu::BrgemmCopyB::get_offset_compensations() const {
+    OPENVINO_ASSERT(is_with_compensations() && get_output_size() == 2,
+                    "The offset for compensations must be in BrgemmCopyB only with compensations and 2 outputs!");
+    return get_output_offset(1);
+}
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.hpp
new file mode 100644
index 00000000000000..d8db828b4a3e56
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_copy_b.hpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/op/memory_access.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+/**
+* @interface BrgemmCopyB
+* @brief The operation for data repacking of Brgemm with input non-fp32 precisions.
+         The CPU Generator uses oneDNN primitives for generation code of Brgemm.
+         OneDNN requiers data repacking for second input of Brgemm with input non-fp32 precisions.
+* @ingroup snippets
+*/
+class BrgemmCopyB : public ngraph::snippets::op::MemoryAccess {
+public:
+    OPENVINO_OP("BrgemmCopyB", "SnippetsOpset", MemoryAccess);
+
+    enum Type {
+        OnlyRepacking,     // Just data repacking - one output
+        WithCompensations, // Repack data and caclulate compensations - 2 outputs (is needed for BrgemmCPU with compensations)
+    };
+
+    BrgemmCopyB(const Output<Node>& x, const element::Type src_type, const Type type = Type::OnlyRepacking,
+                const size_t offset_in = 0lu, const size_t offset_out0 = 0lu, const size_t offset_out1 = 0lu);
+    BrgemmCopyB() = default;
+
+    size_t get_offset_in() const { return get_input_offset(0); }
+    size_t get_offset_out() const { return get_output_offset(0); }
+    size_t get_offset_compensations() const;
+
+    Type get_type() const { return m_type; }
+    element::Type get_src_element_type() const { return m_src_type; }
+    bool is_with_compensations() const { return m_type == Type::WithCompensations; }
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+    void validate_and_infer_types() override;
+    bool has_evaluate() const override { return false; }
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+private:
+    Type m_type = Type::OnlyRepacking;
+    element::Type m_src_type = ov::element::undefined;  // src element type of the corresponding BRGEMM
+};
+
+} // namespace intel_cpu
+} // namespace ov
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.cpp b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.cpp
new file mode 100644
index 00000000000000..67e85394063c66
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.cpp
@@ -0,0 +1,117 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/itt.hpp"
+#include "brgemm_cpu.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "snippets/utils.hpp"
+#include "utils/general_utils.h"
+
+
+namespace ov {
+namespace intel_cpu {
+
+BrgemmCPU::BrgemmCPU(const Output<Node>& A, const Output<Node>& B, const Type type,
+                     const size_t offset_a, const size_t offset_b, const size_t offset_c)
+    : Brgemm(), m_type(type) {
+    // We call default ctor of Brgemm class to avoid incorrect shape infer in constructor_validate_and_type_infer() call
+    set_arguments({A, B});
+    set_output_size(1);
+    m_input_ports.resize(get_input_size());
+    m_output_ports.resize(get_output_size());
+    set_input_port_descriptor({0, offset_a}, 0);
+    set_input_port_descriptor({0, offset_b}, 1);
+    set_output_port_descriptor({0, offset_c}, 0);
+    constructor_validate_and_infer_types();
+}
+
+BrgemmCPU::BrgemmCPU(const Output<Node>& A, const Output<Node>& B, const Output<Node>& scratch, const Type type,
+                     const size_t offset_a, const size_t offset_b, const size_t offset_scratch, const size_t offset_c)
+    : Brgemm(), m_type(type) {
+    set_arguments({A, B, scratch});
+    set_output_size(1);
+    m_input_ports.resize(get_input_size());
+    m_output_ports.resize(get_output_size());
+    set_input_port_descriptor({0, offset_a}, 0);
+    set_input_port_descriptor({0, offset_b}, 1);
+    set_output_port_descriptor({0, offset_c}, 0);
+    set_input_port_descriptor({0, offset_scratch}, 2);
+    constructor_validate_and_infer_types();
+}
+
+void BrgemmCPU::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(BrgemmCPU_validate_and_infer_types);
+    // If no leading dimensions are provided, assume dense row-major inputs-outputs
+    NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static(),
+                          "BrgemmCPU currently supports only static shapes.");
+
+    OPENVINO_ASSERT(implication(one_of(m_type, Type::Floating, Type::WithDataRepacking), get_input_size() == 2),
+                    "BrgemmCPU expects 2 inputs in cases, when input precisions are f32|f32, u8|i8 or bf16|bf16 (non-AMX system)");
+    OPENVINO_ASSERT(implication(one_of(m_type, Type::WithCompensations, Type::AMX), get_input_size() == 3),
+                    "BrgemmCPU expects 3 inputs with input precisions i8|i8 and bf16|bf16 on AMX system");
+
+    const auto brgemm_copy = is_with_data_repacking() ? get_brgemm_copy() : nullptr;
+    std::vector<ov::PartialShape> planar_input_shapes = {
+            ngraph::snippets::utils::get_port_planar_shape(input_value(0)),
+            ngraph::snippets::utils::get_port_planar_shape(brgemm_copy ? brgemm_copy->input_value(0) : input_value(1))
+    };
+
+    auto output_shape = get_output_partial_shape(planar_input_shapes);
+    const auto& output_layout = ngraph::snippets::utils::get_node_output_layout(this);
+    set_output_type(0,
+                    get_output_type(),
+                    ngraph::snippets::utils::get_reordered_planar_shape(output_shape, output_layout));
+
+    //Additional check for 3rd input
+    if (one_of(m_type, Type::WithCompensations, Type::AMX)) {
+        const auto shape = get_input_partial_shape(2);
+        NGRAPH_CHECK(shape.is_static(), "BRGEMM Scratch must have static shape");
+        const auto type = get_input_element_type(2);
+        if (is_with_compensations()) {
+            const auto element_type_b = get_input_element_type(0);
+            const auto shape_b = planar_input_shapes[1].get_shape();
+            const auto N = *shape_b.rbegin();
+            const auto N_blk = element_type_b == element::f32 ? N :
+                               element_type_b == element::bf16 ? 32 : 64;
+            const auto expected_shape = ov::Shape{rnd_up(N, N_blk)};
+            const auto expected_type = ov::element::f32;
+            NGRAPH_CHECK(expected_shape == shape.get_shape() && expected_type == type,
+                         "BRGEMM Scratch with compensations must have shape {rnd_up(N, N_blk)} and FP32 element type");
+        } else {
+            NGRAPH_CHECK(ngraph::shape_size(shape.get_shape()) == SCRATCH_BYTE_SIZE && type == ov::element::u8,
+                         "BRGEMM Scratch for space workplace must be static, have U8 element type and size is equal to " + std::to_string(SCRATCH_BYTE_SIZE));
+        }
+    }
+}
+
+std::shared_ptr<Node> BrgemmCPU::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(BrgemmCPU_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    std::shared_ptr<BrgemmCPU> new_node = nullptr;
+    if (!is_with_scratchpad()) {
+        new_node = std::make_shared<BrgemmCPU>(new_args.at(0), new_args.at(1), m_type,
+                                               get_offset_a(), get_offset_b(), get_offset_c());
+    } else {
+        new_node = std::make_shared<BrgemmCPU>(new_args.at(0), new_args.at(1), new_args.at(2), m_type,
+                                               get_offset_a(), get_offset_b(), get_offset_scratch(), get_offset_c());
+    }
+    return new_node;
+}
+
+std::shared_ptr<BrgemmCopyB> BrgemmCPU::get_brgemm_copy() const {
+    OPENVINO_ASSERT(one_of(m_type, Type::WithDataRepacking, Type::WithCompensations, Type::AMX), "Brgemm doesn't need BrgemmCopyB");
+    if (const auto buffer = ov::as_type_ptr<ngraph::snippets::op::Buffer>(get_input_node_shared_ptr(1))) {
+        return ov::as_type_ptr<BrgemmCopyB>(buffer->get_input_node_shared_ptr(0));
+    }
+    throw ov::Exception("BrgemmCopyB hasn't been found!");
+}
+
+size_t BrgemmCPU::get_offset_scratch() const {
+    OPENVINO_ASSERT(is_with_scratchpad() && get_input_size() == 3, "Offset of scratchpad must be only in Brgemm with scratchpad on 3rd input");
+    return get_input_offset(2);
+}
+
+} // namespace intel_cpu
+} // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.hpp b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.hpp
new file mode 100644
index 00000000000000..2081ca25c7528f
--- /dev/null
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/brgemm_cpu.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/op/brgemm.hpp"
+#include "brgemm_copy_b.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+/**
+ * @interface BrgemmCPU
+ * @brief BrgemmCPU is a batch-reduced matrix multiplication with the support of arbitrary strides between matrices rows
+ *        with support of several precisions on plugin level
+ * @ingroup snippets
+ */
+class BrgemmCPU : public ngraph::snippets::op::Brgemm {
+public:
+    OPENVINO_OP("BrgemmCPU", "SnippetsOpset", ngraph::snippets::op::Brgemm);
+
+    enum Type {
+        Floating,          // f32|f32
+        WithDataRepacking, // u8|i8 or bf16|bf16 (non-AMX system) - needs BrgemmCopyB on second input for data repacking
+        WithCompensations, // i8|i8 (non-AMX system) - needs BrgemmCopyB for data repacking and compensations
+        AMX,               // i8|i8 or bf16|bf16 on AMX system - needs BrgemmCopyB and scratchpad
+    };
+
+    BrgemmCPU(const Output<Node>& A, const Output<Node>& B, const Type type,
+              const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_c = 0);
+    BrgemmCPU(const Output<Node>& A, const Output<Node>& B, const Output<Node>& scratch, const Type type,
+              const size_t offset_a = 0, const size_t offset_b = 0, const size_t offset_scratch = 0, const size_t offset_c = 0);
+    BrgemmCPU() = default;
+
+    void validate_and_infer_types() override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    Type get_type() const { return m_type; }
+    bool is_with_compensations() const { return m_type == Type::WithCompensations; }
+    bool is_with_data_repacking() const { return m_type != Type::Floating; }
+    bool is_amx() const { return m_type == Type::AMX; }
+    bool is_with_scratchpad() const { return is_with_compensations() || is_amx(); }
+
+    size_t get_offset_scratch() const;
+    std::shared_ptr<BrgemmCopyB> get_brgemm_copy() const;
+
+    constexpr static size_t SCRATCH_BYTE_SIZE = 32 * 1024;
+
+private:
+   Type m_type = Type::Floating;
+};
+
+} // namespace intel_cpu
+} // namespace ov
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/load_convert.cpp b/src/plugins/intel_cpu/src/snippets_transformations/op/load_convert.cpp
index dbb8046f636588..a71181e8e2b666 100644
--- a/src/plugins/intel_cpu/src/snippets_transformations/op/load_convert.cpp
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/load_convert.cpp
@@ -19,6 +19,7 @@ intel_cpu::LoadConvertSaturation::LoadConvertSaturation(const Output<Node>& x, c
 
 bool intel_cpu::LoadConvertSaturation::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(LoadConvert_visit_attributes);
+    MemoryAccess::visit_attributes(visitor);
     visitor.on_attribute("destination_type", m_destination_type);
     return true;
 }
@@ -31,7 +32,8 @@ void intel_cpu::LoadConvertSaturation::validate_and_infer_types() {
 std::shared_ptr<Node> intel_cpu::LoadConvertSaturation::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(LoadConvert_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<LoadConvertSaturation>(new_args.at(0), m_destination_type, m_count, m_offset);
+    return std::make_shared<LoadConvertSaturation>(
+            new_args.at(0), m_destination_type, get_count(), get_offset());
 }
 
 intel_cpu::LoadConvertTruncation::LoadConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type,
@@ -42,6 +44,7 @@ intel_cpu::LoadConvertTruncation::LoadConvertTruncation(const Output<Node>& x, c
 
 bool intel_cpu::LoadConvertTruncation::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(LoadConvert_visit_attributes);
+    MemoryAccess::visit_attributes(visitor);
     visitor.on_attribute("destination_type", m_destination_type);
     return true;
 }
@@ -54,5 +57,6 @@ void intel_cpu::LoadConvertTruncation::validate_and_infer_types() {
 std::shared_ptr<Node> intel_cpu::LoadConvertTruncation::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(LoadConvert_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<LoadConvertTruncation>(new_args.at(0), m_destination_type, m_count, m_offset);
+    return std::make_shared<LoadConvertTruncation>(
+            new_args.at(0), m_destination_type, get_count(), get_offset());
 }
diff --git a/src/plugins/intel_cpu/src/snippets_transformations/op/store_convert.cpp b/src/plugins/intel_cpu/src/snippets_transformations/op/store_convert.cpp
index 52921e681e9884..d7e1c9e4b0530c 100644
--- a/src/plugins/intel_cpu/src/snippets_transformations/op/store_convert.cpp
+++ b/src/plugins/intel_cpu/src/snippets_transformations/op/store_convert.cpp
@@ -19,6 +19,7 @@ intel_cpu::StoreConvertSaturation::StoreConvertSaturation(const Output<Node>& x,
 
 bool intel_cpu::StoreConvertSaturation::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(StoreConvert_visit_attributes);
+    MemoryAccess::visit_attributes(visitor);
     visitor.on_attribute("destination_type", m_destination_type);
     return true;
 }
@@ -31,7 +32,8 @@ void intel_cpu::StoreConvertSaturation::validate_and_infer_types() {
 std::shared_ptr<Node> intel_cpu::StoreConvertSaturation::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(StoreConvert_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<StoreConvertSaturation>(new_args.at(0), m_destination_type, m_count, m_offset);
+    return std::make_shared<StoreConvertSaturation>(
+            new_args.at(0), m_destination_type, get_count(), get_offset());
 }
 
 intel_cpu::StoreConvertTruncation::StoreConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type,
@@ -42,6 +44,7 @@ intel_cpu::StoreConvertTruncation::StoreConvertTruncation(const Output<Node>& x,
 
 bool intel_cpu::StoreConvertTruncation::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(StoreConvert_visit_attributes);
+    MemoryAccess::visit_attributes(visitor);
     visitor.on_attribute("destination_type", m_destination_type);
     return true;
 }
@@ -54,5 +57,6 @@ void intel_cpu::StoreConvertTruncation::validate_and_infer_types() {
 std::shared_ptr<Node> intel_cpu::StoreConvertTruncation::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(StoreConvert_clone_with_new_inputs);
     check_new_args_count(this, new_args);
-    return std::make_shared<StoreConvertTruncation>(new_args.at(0), m_destination_type, m_count, m_offset);
+    return std::make_shared<StoreConvertTruncation>(
+            new_args.at(0), m_destination_type, get_count(), get_offset());
 }
diff --git a/src/plugins/intel_cpu/src/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
index 174a10cd6d0363..2bedc4d32df2e2 100644
--- a/src/plugins/intel_cpu/src/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
@@ -556,7 +556,7 @@ void Transformations::PostLpt() {
 
 void Transformations::MainSnippets(void) {
     if (snippetsMode == Config::SnippetsMode::Disable ||
-        !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) // snippets are implemeted only for relevant platforms (avx2+ extentions)
+        !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) // snippets are implemented only for relevant platforms (avx2+ extensions)
         return;
 
     ngraph::pass::Manager snippetsManager;
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index acbd2310908f61..5e246017855e49 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -207,6 +207,10 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*Snippets.*MHA.*)");
         retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
     }
+    if (!InferenceEngine::with_cpu_x86_avx512_core_vnni() && !InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
+        // MatMul in Snippets uses BRGEMM that supports i8 only on platforms with VNNI or AMX instructions
+        retVector.emplace_back(R"(.*Snippets.*MatMulFQ.*)");
+    }
     if (!InferenceEngine::with_cpu_x86_avx512_core_amx_int8())
         //TODO: Issue 92895
         // on platforms which do not support AMX, we are disabling I8 input tests
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
index 9ab22c79d2eb39..9d792f35264066 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
@@ -4,6 +4,7 @@
 
 #include "snippets/matmul.hpp"
 #include "common_test_utils/test_constants.hpp"
+#include "ie_system_conf.h"
 
 namespace ov {
 namespace test {
@@ -16,49 +17,47 @@ std::vector<std::vector<ov::PartialShape>> input_shapes{
         {{3, 1, 32, 14}, {1, 2, 14, 32}},
         {{1, 2, 37, 23}, {2, 1, 23, 37}},
         {{1, 1, 37, 23}, {1, 2, 23, 33}},
-        {{2, 1, 69, 43}, {1, 1, 43, 49}}
+        {{1, 16, 384, 64}, {1, 16, 64, 384}}
 };
-std::vector<element::Type> precisions{element::f32};
+static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32 = true) {
+    std::vector<std::vector<element::Type>> prc = {
+            {element::f32, element::f32},
+    };
+    if (!only_fp32) {
+        // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
+        if (InferenceEngine::with_cpu_x86_avx512_core_vnni() || InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
+            prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
+            prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
+        }
+        // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
+        if (InferenceEngine::with_cpu_x86_bfloat16() || InferenceEngine::with_cpu_x86_avx512_core_amx_bf16()) {
+            prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
+        }
+    }
+    return prc;
+}
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, MatMul,
                          ::testing::Combine(
                              ::testing::ValuesIn(input_shapes),
-                             ::testing::ValuesIn(precisions),
-                             ::testing::Values(1), // MatMu;
+                             ::testing::ValuesIn(precisions(false)),
+                             ::testing::Values(1), // MatMul
                              ::testing::Values(1), // Tokenized MatMul
                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          MatMul::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias,
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulFQ, MatMulFQ,
                          ::testing::Combine(
-                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 1, 69, 49}}),
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // Subgraph;
-                                 ::testing::Values(1), // Tokenized MatMul+Bias
+                                 ::testing::ValuesIn(input_shapes),
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(1), // MatMul;
+                                 ::testing::Values(1), // Tokenized MatMul
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          MatMul::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul,
-                         ::testing::Combine(
-                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 49, 2, 43}}),
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // Subgraph;
-                                 ::testing::Values(1), // Tokenized MatMul+Bias
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                         ExplicitTransposeMatMul::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias,
-                         ::testing::Combine(
-                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}}),
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // Subgraph;
-                                 ::testing::Values(1), // Tokenized MatMul+Bias
-                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                         MatMul::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMulMatMulBias, ExplicitTransposeMulMatMulBias,
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias,
                          ::testing::Combine(
-                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 2, 1, 1}, {1, 1, 69, 49}}),
-                                 ::testing::ValuesIn(precisions),
+                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 1, 69, 49}}),
+                                 ::testing::ValuesIn(precisions(false)),
                                  ::testing::Values(1), // Subgraph;
                                  ::testing::Values(1), // Tokenized MatMul+Bias
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
index 8e3af45fd52da2..6423f5a3db418f 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
@@ -4,6 +4,7 @@
 
 #include "snippets/transpose_matmul.hpp"
 #include "common_test_utils/test_constants.hpp"
+#include "ie_system_conf.h"
 
 namespace ov {
 namespace test {
@@ -11,7 +12,23 @@ namespace snippets {
 
 
 namespace {
-std::vector<element::Type> precisions{element::f32};
+static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32 = true) {
+    std::vector<std::vector<element::Type>> prc = {
+            {element::f32, element::f32},
+    };
+    if (!only_fp32) {
+        // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
+        if (InferenceEngine::with_cpu_x86_avx512_core_vnni() || InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
+            prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
+            prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
+        }
+        // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
+        if (InferenceEngine::with_cpu_x86_bfloat16() || InferenceEngine::with_cpu_x86_avx512_core_amx_bf16()) {
+            prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
+        }
+    }
+    return prc;
+}
 namespace transpose_zero_input {
 std::vector<std::vector<ov::PartialShape>> transpose_input_shapes{
         {{1, 49, 2, 23}, {2, 2, 23, 39}}
@@ -20,11 +37,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
                                  ::testing::ValuesIn(transpose_input_shapes),
                                  ::testing::Values(0), // Transpose on 0th Matmul input
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // MatMul;
+                                 ::testing::ValuesIn(precisions(false)),
+                                 ::testing::Values(1), // MatMul
                                  ::testing::Values(1), // Tokenized MatMul + FusedTranspose
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          TransposeMatMul::getTestCaseName);
+
+// TODO: FuseTransposeToBrgemm supports fusing only if Transpose is before Parameter in cases when Transpose is on input at the moment
+//       When we support the branch Parameter->FQ->Transpose->MatMul[0th input], uncomment this test case please
+// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ,
+//                          ::testing::Combine(
+//                                  ::testing::ValuesIn(transpose_input_shapes),
+//                                  ::testing::Values(0), // Transpose on 0th Matmul input
+//                                  ::testing::Values(ov::element::i8),
+//                                  ::testing::Values(1), // MatMul
+//                                  ::testing::Values(1), // Tokenized MatMul + FusedTranspose
+//                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                          TransposeMatMulFQ::getTestCaseName);
 } // namespace transpose_zero_input
 
 namespace transpose_first_input {
@@ -35,11 +64,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
                                  ::testing::ValuesIn(transpose_input_shapes),
                                  ::testing::Values(1), // Transpose on 1st Matmul input
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // MatMu;
+                                 ::testing::ValuesIn(precisions(false)),
+                                 ::testing::Values(1), // MatMul
                                  ::testing::Values(1), // Tokenized MatMul + FusedTranspose
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          TransposeMatMul::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(transpose_input_shapes),
+                                 ::testing::Values(1), // Transpose on 1st Matmul input
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(1), // MatMul
+                                 ::testing::Values(1), // Tokenized MatMul + FusedTranspose
+                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                         TransposeMatMulFQ::getTestCaseName);
 } // namespace transpose_first_input
 
 namespace transpose_output {
@@ -50,13 +89,64 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul,
                          ::testing::Combine(
                                  ::testing::ValuesIn(transpose_input_shapes),
                                  ::testing::Values(2), // Transpose on Matmul output
-                                 ::testing::ValuesIn(precisions),
-                                 ::testing::Values(1), // MatMu;
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(1), // MatMul
                                  ::testing::Values(1), // Tokenized MatMul + FusedTranspose
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          TransposeMatMul::getTestCaseName);
+
+// TODO: At the moment we doesn't support the branch MatMul[output]->Transpose->FQ.
+//      When we add support, uncomment this test case please
+// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ,
+//                          ::testing::Combine(
+//                                  ::testing::ValuesIn(transpose_input_shapes),
+//                                  ::testing::Values(2), // Transpose on Matmul output
+//                                  ::testing::Values(ov::element::i8),
+//                                  ::testing::Values(1), // MatMul
+//                                  ::testing::Values(1), // Tokenized MatMul + FusedTranspose
+//                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                          TransposeMatMulFQ::getTestCaseName);
 } // namespace transpose_output
 
+namespace explicit_transpose {
+static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32 = true) {
+    std::vector<std::vector<element::Type>> prc = {
+            {element::f32, element::f32},
+    };
+    if (!only_fp32) {
+        // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms
+        if (InferenceEngine::with_cpu_x86_avx512_core_vnni() || InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
+            prc.emplace_back(std::vector<element::Type>{element::i8, element::i8});
+            prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
+        }
+        // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
+        if (InferenceEngine::with_cpu_x86_bfloat16() || InferenceEngine::with_cpu_x86_avx512_core_amx_bf16()) {
+            prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
+        }
+    }
+    return prc;
+}
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul,
+                         ::testing::Combine(
+                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 49, 2, 43}}),
+                                 ::testing::Values(1), // Transpose on second input
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(1), // Subgraph;
+                                 ::testing::Values(1), // Tokenized MatMul+Bias
+                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                         ExplicitTransposeMatMul::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias,
+                         ::testing::Combine(
+                                 ::testing::Values(std::vector<ov::PartialShape>{{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}}),
+                                 ::testing::Values(1), // Transpose on second input
+                                 ::testing::ValuesIn(precisions()),
+                                 ::testing::Values(1), // Subgraph;
+                                 ::testing::Values(1), // Tokenized MatMul+Bias
+                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                         ExplicitTransposeMatMulBias::getTestCaseName);
+} // namespace explicit_transpose
+
 }  // namespace
 } // namespace snippets
 } // namespace test
diff --git a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp
index 770dffa3c5edfe..3e2a0ab015e988 100644
--- a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp
+++ b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp
@@ -12,21 +12,12 @@ namespace snippets {
 
 typedef std::tuple<
         std::vector<ov::PartialShape>, // Input  Shapes
-        ov::element::Type,             // Element type
+        std::vector<ov::element::Type>,// Input Element types
         size_t,                        // Expected num nodes
         size_t,                        // Expected num subgraphs
         std::string                    // Target Device
 > MatMulParams;
 
-typedef std::tuple<
-        std::vector<ov::PartialShape>, // Input  Shapes
-        size_t ,                       // Transpose position
-        ov::element::Type,             // Element type
-        size_t,                        // Expected num nodes
-        size_t,                        // Expected num subgraphs
-        std::string                    // Target Device
-> TransposeMatMulParams;
-
 class MatMul : public testing::WithParamInterface<ov::test::snippets::MatMulParams>,
             virtual public ov::test::SnippetsTestsCommon {
 public:
@@ -36,22 +27,12 @@ class MatMul : public testing::WithParamInterface<ov::test::snippets::MatMulPara
     void SetUp() override;
 };
 
-class MatMulBias : public MatMul {
-protected:
-    void SetUp() override;
-};
-
-class ExplicitTransposeMatMul : public MatMul {
-protected:
-    void SetUp() override;
-};
-
-class ExplicitTransposeMatMulBias : public MatMul {
+class MatMulFQ : public MatMul {
 protected:
     void SetUp() override;
 };
 
-class ExplicitTransposeMulMatMulBias : public MatMul {
+class MatMulBias : public MatMul {
 protected:
     void SetUp() override;
 };
diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp
index f949e9df9d5c3b..6eadc733042151 100644
--- a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp
+++ b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp
@@ -13,7 +13,7 @@ namespace snippets {
 typedef std::tuple<
         std::vector<ov::PartialShape>, // Input  Shapes
         size_t ,                       // Transpose position
-        ov::element::Type,             // Element type
+        std::vector<ov::element::Type>,// Input Element types
         size_t,                        // Expected num nodes
         size_t,                        // Expected num subgraphs
         std::string                    // Target Device
@@ -28,6 +28,21 @@ class TransposeMatMul : public testing::WithParamInterface<ov::test::snippets::T
     void SetUp() override;
 };
 
+class TransposeMatMulFQ : public TransposeMatMul {
+protected:
+    void SetUp() override;
+};
+
+class ExplicitTransposeMatMul : public TransposeMatMul {
+protected:
+    void SetUp() override;
+};
+
+class ExplicitTransposeMatMulBias : public TransposeMatMul {
+protected:
+    void SetUp() override;
+};
+
 } // namespace snippets
 } // namespace test
 } // namespace ov
\ No newline at end of file
diff --git a/src/tests/functional/plugin/shared/src/snippets/matmul.cpp b/src/tests/functional/plugin/shared/src/snippets/matmul.cpp
index 1c38a168e83818..06a37e2fd1ffed 100644
--- a/src/tests/functional/plugin/shared/src/snippets/matmul.cpp
+++ b/src/tests/functional/plugin/shared/src/snippets/matmul.cpp
@@ -14,14 +14,15 @@ namespace snippets {
 
 std::string MatMul::getTestCaseName(testing::TestParamInfo<ov::test::snippets::MatMulParams> obj) {
     std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
+    std::vector<ov::element::Type> elem_types;
     std::string targetDevice;
     size_t num_nodes, num_subgraphs;
-    std::tie(input_shapes, elem_type, num_nodes, num_subgraphs, targetDevice) = obj.param;
+    std::tie(input_shapes, elem_types, num_nodes, num_subgraphs, targetDevice) = obj.param;
     std::ostringstream result;
     for (size_t i = 0; i < input_shapes.size(); i++)
         result << "IS[" << i <<"]=" << CommonTestUtils::partialShape2str({input_shapes[i]}) << "_";
-    result << "T=" << elem_type << "_";
+    for (size_t i = 0; i < elem_types.size(); i++)
+        result << "T[" << i <<"]=" << elem_types[i] << "_";
     result << "#N=" << num_nodes << "_";
     result << "#S=" << num_subgraphs << "_";
     result << "targetDevice=" << targetDevice;
@@ -30,11 +31,11 @@ std::string MatMul::getTestCaseName(testing::TestParamInfo<ov::test::snippets::M
 
 void MatMul::SetUp() {
     std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
     init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
 
-    auto f = ov::test::snippets::MatMulFunction(input_shapes);
+    auto f = ov::test::snippets::MatMulFunction(input_shapes, elem_types);
     function = f.getOriginal();
     if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
         configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
@@ -42,41 +43,13 @@ void MatMul::SetUp() {
     }
 }
 
-void MatMulBias::SetUp() {
-    std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
-    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
-
-    auto f = ov::test::snippets::MatMulBiasFunction(input_shapes);
-    function = f.getOriginal();
-    if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
-        configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
-                              InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK});
-    }
-}
-
-void ExplicitTransposeMatMul::SetUp() {
-    std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
-    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
-
-    auto f = ov::test::snippets::TransposeMatMulFunction(input_shapes);
-    function = f.getOriginal();
-    if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
-        configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
-                              InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK});
-    }
-}
-
-void ExplicitTransposeMatMulBias::SetUp() {
+void MatMulFQ::SetUp() {
     std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
     init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
 
-    auto f = ov::test::snippets::TransposeMatMulBiasFunction(input_shapes);
+    auto f = ov::test::snippets::FQMatMulFunction(input_shapes);
     function = f.getOriginal();
     if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
         configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
@@ -84,13 +57,13 @@ void ExplicitTransposeMatMulBias::SetUp() {
     }
 }
 
-void ExplicitTransposeMulMatMulBias::SetUp() {
+void MatMulBias::SetUp() {
     std::vector<ov::PartialShape> input_shapes;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
     init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
 
-    auto f = ov::test::snippets::TransposeMulMatMulBiasFunction(input_shapes);
+    auto f = ov::test::snippets::MatMulBiasFunction(input_shapes, elem_types);
     function = f.getOriginal();
     if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
         configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
@@ -99,26 +72,19 @@ void ExplicitTransposeMulMatMulBias::SetUp() {
 }
 
 TEST_P(MatMul, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     run();
     validateNumSubgraphs();
 }
 
-TEST_P(MatMulBias, CompareWithRefImpl) {
-    run();
-    validateNumSubgraphs();
-}
-
-TEST_P(ExplicitTransposeMatMul, CompareWithRefImpl) {
-    run();
-    validateNumSubgraphs();
-}
-
-TEST_P(ExplicitTransposeMatMulBias, CompareWithRefImpl) {
+TEST_P(MatMulFQ, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     run();
     validateNumSubgraphs();
 }
 
-TEST_P(ExplicitTransposeMulMatMulBias, CompareWithRefImpl) {
+TEST_P(MatMulBias, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     run();
     validateNumSubgraphs();
 }
diff --git a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp
index 68a2140339f5e5..f3fc23c2ce4714 100644
--- a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp
+++ b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp
@@ -15,17 +15,17 @@ namespace snippets {
 std::string TransposeMatMul::getTestCaseName(testing::TestParamInfo<ov::test::snippets::TransposeMatMulParams> obj) {
     std::vector<ov::PartialShape> input_shapes;
     size_t transpose_position;
-    ov::element::Type elem_type;
+    std::vector<ov::element::Type> elem_types;
     std::string targetDevice;
     size_t num_nodes, num_subgraphs;
-    std::tie(input_shapes, transpose_position, elem_type, num_nodes, num_subgraphs, targetDevice) = obj.param;
-    if (input_shapes.size() != 2)
-        IE_THROW() << "Invalid input shapes vector size";
+    std::tie(input_shapes, transpose_position, elem_types, num_nodes, num_subgraphs, targetDevice) = obj.param;
     std::ostringstream result;
-    result << "IS[0]=" << CommonTestUtils::partialShape2str({input_shapes[0]}) << "_";
-    result << "IS[1]=" << CommonTestUtils::partialShape2str({input_shapes[1]}) << "_";
+    for (size_t i = 0; i < input_shapes.size(); ++i) {
+        result << "IS[" << i << "]=" << CommonTestUtils::partialShape2str({input_shapes[i]}) << "_";
+    }
     result << "Pos=" << transpose_position << "_";
-    result << "T=" << elem_type << "_";
+    for (size_t i = 0; i < elem_types.size(); i++)
+        result << "T[" << i <<"]=" << elem_types[i] << "_";
     result << "#N=" << num_nodes << "_";
     result << "#S=" << num_subgraphs << "_";
     result << "targetDevice=" << targetDevice;
@@ -35,11 +35,56 @@ std::string TransposeMatMul::getTestCaseName(testing::TestParamInfo<ov::test::sn
 void TransposeMatMul::SetUp() {
     std::vector<ov::PartialShape> input_shapes;
     size_t transpose_position;
-    ov::element::Type elem_type;
-    std::tie(input_shapes, transpose_position, elem_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
+
+    auto f = ov::test::snippets::Transpose0213MatMulFunction(input_shapes, elem_types, transpose_position);
+    function = f.getOriginal();
+    if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
+        configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
+                              InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK});
+    }
+}
+
+void TransposeMatMulFQ::SetUp() {
+    std::vector<ov::PartialShape> input_shapes;
+    size_t transpose_position;
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
+
+    auto f = ov::test::snippets::FQMatMulFunction(input_shapes, transpose_position);
+    function = f.getOriginal();
+    if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
+        configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
+                              InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK});
+    }
+}
+
+void ExplicitTransposeMatMul::SetUp() {
+    std::vector<ov::PartialShape> input_shapes;
+    size_t transpose_position;
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
     init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
 
-    auto f = ov::test::snippets::Transpose0213MatMulFunction(input_shapes, transpose_position);
+    auto f = ov::test::snippets::TransposeMatMulFunction(input_shapes);
+    function = f.getOriginal();
+    if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
+        configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
+                              InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK});
+    }
+}
+
+void ExplicitTransposeMatMulBias::SetUp() {
+    std::vector<ov::PartialShape> input_shapes;
+    size_t transpose_position;
+    std::vector<ov::element::Type> elem_types;
+    std::tie(input_shapes, transpose_position, elem_types, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
+    init_input_shapes(static_partial_shapes_to_test_representation(input_shapes));
+
+    auto f = ov::test::snippets::TransposeMatMulBiasFunction(input_shapes);
     function = f.getOriginal();
     if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) {
         configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
@@ -48,6 +93,25 @@ void TransposeMatMul::SetUp() {
 }
 
 TEST_P(TransposeMatMul, CompareWithRefImpl) {
+   SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+    validateNumSubgraphs();
+}
+
+TEST_P(TransposeMatMulFQ, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+    validateNumSubgraphs();
+}
+
+TEST_P(ExplicitTransposeMatMul, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+    validateNumSubgraphs();
+}
+
+TEST_P(ExplicitTransposeMatMulBias, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     run();
     validateNumSubgraphs();
 }
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp
index c583b5882ab710..40f8c20c9f3a65 100644
--- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp
@@ -56,7 +56,7 @@ class EltwiseThreeInputsLoweredFunction : public EltwiseThreeInputsFunction {
 class Transpose0213MatMulLoweredFunction : public Transpose0213MatMulFunction {
 public:
     explicit Transpose0213MatMulLoweredFunction(const std::vector<PartialShape>& inputShapes, size_t position = 0) :
-            Transpose0213MatMulFunction(inputShapes, position) {
+            Transpose0213MatMulFunction(inputShapes, std::vector<ov::element::Type>{ov::element::f32, ov::element::f32}, position) {
     }
 protected:
     std::shared_ptr<ov::Model> initLowered() const override;
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_matmul.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_matmul.hpp
index ea533334e80a88..15954605e69fdd 100644
--- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_matmul.hpp
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_matmul.hpp
@@ -6,6 +6,7 @@
 
 #include "ngraph/ngraph.hpp"
 #include "./snippets_helpers.hpp"
+#include "snippets/utils.hpp"
 
 /* This file contains definitions of relatively simple functions (models) that will be used
  * to test snippets-specific behavior. All the functions are expected to be direct descendants of
@@ -20,48 +21,77 @@ namespace snippets {
 //   in1        in2
 //        Matmul
 //         Result
-// todo: remove  once "no subgraph after input" limitation is relaxed
 class MatMulFunction : public SnippetsFunctionBase {
 public:
-    explicit MatMulFunction(const std::vector<PartialShape>& inputShapes)
-    : SnippetsFunctionBase(inputShapes) {
+    explicit MatMulFunction(const std::vector<PartialShape>& inputShapes, const std::vector<ov::element::Type>& precisions)
+    : SnippetsFunctionBase(inputShapes), precisions(precisions) {
         NGRAPH_CHECK(input_shapes.size() == 2, "Got invalid number of input shapes");
+        verify_precisions(precisions);
+    }
+    static void verify_precisions(const std::vector<ov::element::Type>& precisions) {
+        NGRAPH_CHECK(precisions.size() == 2, "Got invalid number of input element types");
+        const bool is_f32 = ngraph::snippets::utils::everyone_is(element::f32, precisions[0], precisions[1]);
+        const bool is_int8 = ngraph::snippets::utils::one_of(precisions[0], element::i8, element::u8) && precisions[1] == element::i8;
+        const bool is_bf16 = ngraph::snippets::utils::everyone_is(element::bf16, precisions[0], precisions[1]);
+        NGRAPH_CHECK(is_f32 || is_bf16 || is_int8, "Invalid precisions");
     }
 protected:
     std::shared_ptr<ov::Model> initOriginal() const override;
     std::shared_ptr<ov::Model> initReference() const override;
+
+    std::vector<ov::element::Type> precisions;
+};
+
+class FQMatMulFunction : public SnippetsFunctionBase {
+public:
+    explicit FQMatMulFunction(const std::vector<PartialShape>& inputShapes, int pos = -1) : SnippetsFunctionBase({inputShapes[0]}), pos(pos)  {
+        NGRAPH_CHECK(inputShapes.size() == 2, "Got invalid number of input shapes");
+        NGRAPH_CHECK(pos >=-1 && pos <= 2, "Got invalid transpose position");
+        const_shape = inputShapes[1];
+    }
+protected:
+    std::shared_ptr<ov::Model> initOriginal() const override;
+
+    ov::PartialShape const_shape;
+    int pos = -1;
 };
 
 // As same as MatMulFunction but with biases
 class MatMulBiasFunction : public SnippetsFunctionBase {
 public:
-    explicit MatMulBiasFunction(const std::vector<PartialShape>& inputShapes)
-            : SnippetsFunctionBase(inputShapes) {
+    explicit MatMulBiasFunction(const std::vector<PartialShape>& inputShapes, const std::vector<ov::element::Type>& precisions)
+            : SnippetsFunctionBase(inputShapes), precisions(precisions) {
         NGRAPH_CHECK(input_shapes.size() == 3, "Got invalid number of input shapes");
+        MatMulFunction::verify_precisions(precisions);
     }
 protected:
     std::shared_ptr<ov::Model> initOriginal() const override;
+
+    std::vector<ov::element::Type> precisions;
 };
 
 /// Minimal graph to test MatMul+Transpose combinations. Transpose location is specified via the position argument:
 /// 0 - before the first MatMul input; 1 - before the second MatMul input; 2 - after the MatMul output.
 /// Tokenized simply by starting subgraph,
 //   in1        in2
-// Transpose  /
+//   Transpose  /
 //         Matmul
 //         Result
 class Transpose0213MatMulFunction : public SnippetsFunctionBase {
 public:
-    explicit Transpose0213MatMulFunction(const std::vector<PartialShape>& inputShapes, size_t position = 0)
-    : SnippetsFunctionBase(inputShapes), transpose_position(position)  {
+    explicit Transpose0213MatMulFunction(const std::vector<PartialShape>& inputShapes, const std::vector<ov::element::Type>& precisions,
+                                         size_t position = 0)
+    : SnippetsFunctionBase(inputShapes), transpose_position(position), precisions(precisions)  {
         NGRAPH_CHECK(input_shapes.size() == 2, "Got invalid number of input shapes");
         NGRAPH_CHECK(input_shapes[0].rank().get_length() == 4 && input_shapes[1].rank().get_length() == 4,
                      "Only rank 4 input shapes are supported by this test");
         NGRAPH_CHECK(transpose_position >=0 && transpose_position <= 2, "Got invalid transpose position");
+        MatMulFunction::verify_precisions(precisions);
     }
 protected:
     std::shared_ptr<ov::Model> initOriginal() const override;
     size_t transpose_position;
+    std::vector<ov::element::Type> precisions;
 };
 
 class TransposeMatMulFunction : public SnippetsFunctionBase {
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp
index 22b86982e9e0e1..6c818b6078cdc6 100644
--- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp
@@ -107,8 +107,8 @@ std::shared_ptr<ov::Model> EltwiseThreeInputsLoweredFunction::initLowered() cons
 }
 
 std::shared_ptr<ov::Model> Transpose0213MatMulLoweredFunction::initLowered() const {
-    ParameterVector data{std::make_shared<op::v0::Parameter>(precision, input_shapes[0]),
-                         std::make_shared<op::v0::Parameter>(precision, input_shapes[1])};
+    ParameterVector data{std::make_shared<op::v0::Parameter>(precisions[0], input_shapes[0]),
+                         std::make_shared<op::v0::Parameter>(precisions[1], input_shapes[1])};
     std::vector<size_t> layout{0, 2, 1, 3};
     // Note: validity of transpose_position values is checked in Transpose0213MatMulSinhFunction constructor
     if (transpose_position <= 1) {
@@ -194,6 +194,7 @@ std::shared_ptr<ov::Model> SoftmaxLoweredFunction::initLowered() const {
     const auto horizon_sum = std::make_shared<ngraph::snippets::op::HorizonSum>(sum);
     horizon_sum->add_control_dependency(loop_sum_end);
 
+    const auto size_exp = std::make_shared<ngraph::opset1::Constant>(ov::element::i32, ov::Shape{2});
     const auto buffer_exp = std::make_shared<ngraph::snippets::op::Buffer>(loop_sum_end->output(0));
 
     loop_sum_begin->add_control_dependency(vector_buffer_sum);
@@ -303,6 +304,7 @@ std::shared_ptr<ov::Model> AddSoftmaxLoweredFunction::initLowered() const {
 
     /* =========================================== */
 
+    const auto size_add = std::make_shared<ngraph::opset1::Constant>(ov::element::i32, ov::Shape{2});
     const auto buffer_add = std::make_shared<ngraph::snippets::op::Buffer>(loop_max_end->output(0));
 
     /* === Sub + Exp + ReduceSum decomposition === */
@@ -331,6 +333,7 @@ std::shared_ptr<ov::Model> AddSoftmaxLoweredFunction::initLowered() const {
     const auto horizon_sum = std::make_shared<ngraph::snippets::op::HorizonSum>(sum);
     horizon_sum->add_control_dependency(loop_sum_end);
 
+    const auto size_exp = std::make_shared<ngraph::opset1::Constant>(ov::element::i32, ov::Shape{2});
     const auto buffer_exp = std::make_shared<ngraph::snippets::op::Buffer>(loop_sum_end->output(0));
 
     loop_sum_begin->add_control_dependency(vector_buffer_sum);
diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_matmul.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_matmul.cpp
index af312a2ee2d812..b213c66eccacc6 100644
--- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_matmul.cpp
+++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_matmul.cpp
@@ -5,50 +5,133 @@
 #include "subgraph_matmul.hpp"
 #include "common_test_utils/data_utils.hpp"
 #include <snippets/op/subgraph.hpp>
+#include "ngraph_functions/builders.hpp"
+#include "ov_ops/type_relaxed.hpp"
+
 
 namespace ov {
 namespace test {
 namespace snippets {
 std::shared_ptr<ov::Model> MatMulFunction::initOriginal() const {
-    auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
-    auto data1 = std::make_shared<op::v0::Parameter>(precision, input_shapes[1]);
-    auto matmul = std::make_shared<op::v0::MatMul>(data0, data1);
+    auto data0 = std::make_shared<op::v0::Parameter>(precisions[0], input_shapes[0]);
+    auto data1 = std::make_shared<op::v0::Parameter>(precisions[1], input_shapes[1]);
+    std::shared_ptr<Node> matmul;
+    if (precisions[1] == ov::element::i8) {
+        matmul = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                std::vector<element::Type>{element::f32, element::f32},
+                std::vector<element::Type>{ element::f32 },
+                ov::op::TemporaryReplaceOutputType(data0, element::f32).get(),
+                ov::op::TemporaryReplaceOutputType(data1, element::f32).get());
+    } else {
+        matmul = std::make_shared<op::v0::MatMul>(data0, data1);
+    }
     return std::make_shared<ov::Model>(NodeVector{matmul}, ParameterVector{data0, data1});
 }
 std::shared_ptr<ov::Model> MatMulFunction::initReference() const {
+    auto data0 = std::make_shared<op::v0::Parameter>(precisions[0], input_shapes[0]);
+    auto data1 = std::make_shared<op::v0::Parameter>(precisions[1], input_shapes[1]);
+    auto indata0 = std::make_shared<op::v0::Parameter>(precisions[0], data0->get_output_partial_shape(0));
+    auto indata1 = std::make_shared<op::v0::Parameter>(precisions[1], data1->get_output_partial_shape(0));
+    std::shared_ptr<Node> matmul;
+    if (precisions[1] == ov::element::i8) {
+        matmul = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                std::vector<element::Type>{ element::f32, element::f32 },
+                std::vector<element::Type>{ element::f32 },
+                ov::op::TemporaryReplaceOutputType(indata0, element::f32).get(),
+                ov::op::TemporaryReplaceOutputType(indata1, element::f32).get());
+    } else {
+        matmul = std::make_shared<op::v0::MatMul>(indata0, indata1);
+    }
+    const auto subgraph = std::make_shared<ngraph::snippets::op::Subgraph>(NodeVector{data0, data1},
+                                                                std::make_shared<ov::Model>(NodeVector{matmul},
+                                                                                            ParameterVector{indata0, indata1}));
+    return std::make_shared<ov::Model>(NodeVector{subgraph}, ParameterVector{data0, data1});
+}
+std::shared_ptr<ov::Model> FQMatMulFunction::initOriginal() const {
+    auto const_order = std::make_shared<op::v0::Constant>(ov::element::i32, Shape {4}, std::vector<int>{0, 2, 1, 3});
     auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
-    auto data1 = std::make_shared<op::v0::Parameter>(precision, input_shapes[1]);
-    auto indata0 = std::make_shared<op::v0::Parameter>(precision, data0->get_output_partial_shape(0));
-    auto indata1 = std::make_shared<op::v0::Parameter>(precision, data1->get_output_partial_shape(0));
-    auto matmul = std::make_shared<ngraph::snippets::op::Subgraph>(NodeVector{data0, data1},
-                                          std::make_shared<ov::Model>(NodeVector{std::make_shared<op::v0::MatMul>(indata0, indata1)},
-                                                                      ParameterVector{indata0, indata1}));
-    return std::make_shared<ov::Model>(NodeVector{matmul}, ParameterVector{data0, data1});
+    auto ih = std::make_shared<op::v0::Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{34.7436294});
+    auto il = std::make_shared<op::v0::Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{-35.0172004});
+    auto oh = std::make_shared<op::v0::Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{34.7436294});
+    auto ol = std::make_shared<op::v0::Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{-35.0172004});
+    auto fq = std::make_shared<op::v0::FakeQuantize>(data0, il, ih, ol, oh, 256);
+    std::shared_ptr<ov::Node> in0 = fq;
+    if (pos == 0) {
+        in0 = std::make_shared<op::v1::Transpose>(in0, const_order);
+    }
+    auto constant = ngraph::builder::makeConstant(ov::element::i8, const_shape.get_shape(), std::vector<int8_t>{}, true);
+    auto convert = std::make_shared<op::v0::Convert>(constant, ov::element::f32);
+    auto deq_mul = std::make_shared<op::v0::Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{0.00499185826});
+    auto mul = std::make_shared<op::v1::Multiply>(convert, deq_mul);
+    std::shared_ptr<ov::Node> in1 = mul;
+    if (pos == 1) {
+        in1 = std::make_shared<op::v1::Transpose>(in1, const_order);
+    }
+    auto matmul = std::make_shared<op::v0::MatMul>(in0, in1);
+    std::shared_ptr<ov::Node> out = matmul;
+    if (pos == 2) {
+        out = std::make_shared<op::v1::Transpose>(out, const_order);
+    }
+    return std::make_shared<ov::Model>(NodeVector{out}, ParameterVector{data0});
 }
 std::shared_ptr<ov::Model> MatMulBiasFunction::initOriginal() const {
     auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
     auto data1 = std::make_shared<op::v0::Parameter>(precision, input_shapes[1]);
-    auto matmul = std::make_shared<op::v0::MatMul>(data0, data1);
     auto data2 = std::make_shared<op::v0::Parameter>(precision, input_shapes[2]);
+    std::shared_ptr<Node> matmul;
+    if (precisions[1]  == ov::element::i8) {
+        matmul = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                std::vector<element::Type>{ element::f32, element::f32 },
+                std::vector<element::Type>{ element::f32 },
+                ov::op::TemporaryReplaceOutputType(data0, element::f32).get(),
+                ov::op::TemporaryReplaceOutputType(data1, element::f32).get());
+    } else {
+        matmul = std::make_shared<op::v0::MatMul>(data0, data1);
+    }
     auto bias = std::make_shared<op::v1::Add>(matmul, data2);
     return std::make_shared<ov::Model>(NodeVector{bias}, ParameterVector{data0, data1, data2});
 }
 std::shared_ptr<ov::Model> Transpose0213MatMulFunction::initOriginal() const {
-    auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
-    auto data1 = std::make_shared<op::v0::Parameter>(precision, input_shapes[1]);
+    auto data0 = std::make_shared<op::v0::Parameter>(precisions[0], input_shapes[0]);
+    auto data1 = std::make_shared<op::v0::Parameter>(precisions[1], input_shapes[1]);
     auto const_order = std::make_shared<op::v0::Constant>(ov::element::i32, Shape {4}, std::vector<int>{0, 2, 1, 3});
     std::shared_ptr<Node> result;
     switch (transpose_position) {
         case 0: {
             auto transpose = std::make_shared<op::v1::Transpose>(data0, const_order);
-            result = std::make_shared<op::v0::MatMul>(transpose, data1);
+            if (precisions[1] == ov::element::i8) {
+                result = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                         std::vector<element::Type>{element::f32, element::f32},
+                         std::vector<element::Type>{ element::f32 },
+                         ov::op::TemporaryReplaceOutputType(transpose, element::f32).get(),
+                         ov::op::TemporaryReplaceOutputType(data1, element::f32).get());
+            } else {
+                result = std::make_shared<op::v0::MatMul>(transpose, data1);
+            }
             break;
         } case 1: {
             auto transpose = std::make_shared<op::v1::Transpose>(data1, const_order);
-            result = std::make_shared<op::v0::MatMul>(data0, transpose);
+            if (precisions[1] == ov::element::i8) {
+                result = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                         std::vector<element::Type>{element::f32, element::f32},
+                         std::vector<element::Type>{ element::f32 },
+                         ov::op::TemporaryReplaceOutputType(data0, element::f32).get(),
+                         ov::op::TemporaryReplaceOutputType(transpose, element::f32).get());
+            } else {
+                result = std::make_shared<op::v0::MatMul>(data0, transpose);
+            }
             break;
         } case 2: {
-            auto matmul = std::make_shared<op::v0::MatMul>(data0, data1);
+            std::shared_ptr<ov::Node> matmul;
+            if (precisions[1] == ov::element::i8) {
+                matmul = std::make_shared<op::TypeRelaxed<op::v0::MatMul>>(
+                         std::vector<element::Type>{element::f32, element::f32},
+                         std::vector<element::Type>{ element::f32 },
+                         ov::op::TemporaryReplaceOutputType(data0, element::f32).get(),
+                         ov::op::TemporaryReplaceOutputType(data1, element::f32).get());
+            } else {
+                matmul = std::make_shared<op::v0::MatMul>(data0, data1);
+            }
             result = std::make_shared<op::v1::Transpose>(matmul, const_order);
             break;
         }

From a2218ab169f260602ebba49d0f0cbe73db50d703 Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Wed, 29 Mar 2023 01:19:00 +0800
Subject: [PATCH 135/296] Add notes for oneTBB (#16606)

* Add notes for oneTBB

Signed-off-by: Peter Chen <peter.chen@intel.com>

* Update wording

---------

Signed-off-by: Peter Chen <peter.chen@intel.com>
---
 docs/dev/cmake_options_for_custom_comiplation.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/dev/cmake_options_for_custom_comiplation.md b/docs/dev/cmake_options_for_custom_comiplation.md
index fde6d2f54a38d6..15c77a8bc17c37 100644
--- a/docs/dev/cmake_options_for_custom_comiplation.md
+++ b/docs/dev/cmake_options_for_custom_comiplation.md
@@ -96,6 +96,8 @@ This document provides description and default values for CMake options that can
 > * Hybrid architecture to separate Performance / Efficiency cores and schedule tasks in the optimal way.
 
 > **Note:** if you build OpenVINO runtime with [oneTBB] support where TBBBind 2.5 is automatically loaded by TBB in runtime, then set `ENABLE_TBBBIND_2_5` to `OFF`
+> * make sure libtbbbind.so is in the same folder as libtbb.so. For example, oneTBB package on Ubuntu 22.04 has libtbbbind missed. https://bugs.launchpad.net/ubuntu/+source/onetbb/+bug/2006898
+> * oneTBB relies on higher version hwloc to recognize hybrid CPU core types correctly, on some machines, they require higher hwloc version to work correctly. Check if hwloc-info --version returns hwloc version >= 2.7.0, Ubuntu 20.04 with hwloc 2.1.0
 
 * `ENABLE_SSE42` enables SSE4.2 optimizations:
     * `ON` is default for x86 platforms; not available for other platforms.
@@ -132,6 +134,7 @@ cmake ...
 In this case OpenVINO CMake scripts take `TBBROOT` environment variable into account and provided TBB will be used.
 
 **Note:** if you are building TBB from source files, please install TBB after and use `TBBROOT` to point to installation root. 
+**Note:** reference to oneTBB Note in [Options affecting binary size](#options-affecting-binary-size)
 
 ## Test capabilities
 

From 3849d5aa0299a45502c00bce132b5aa6ea38316b Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Tue, 28 Mar 2023 22:03:22 +0400
Subject: [PATCH 136/296] [INSTALL] Fix setupvars (installation for MacOS) and
 build python  (#16514)

* Fix installation + Python build on MacOS

* Update setupvars.sh

* Update setupvars.sh

* Revert

* revert

* Update scripts/setupvars/setupvars.sh

---------

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 scripts/setupvars/setupvars.sh                  | 17 +++++++++++------
 .../openvino/cmake/CythonConfig.cmake           |  3 ++-
 .../openvino/inference_engine/CMakeLists.txt    |  3 +++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh
index 3cdf4987d732b7..c9bcc61c643051 100755
--- a/scripts/setupvars/setupvars.sh
+++ b/scripts/setupvars/setupvars.sh
@@ -4,12 +4,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 abs_path () {
-    path=$(eval echo "$1")
-    directory=$(dirname "$path")
-    echo "$(cd "$directory" || exit; pwd -P)/$(basename "$path")";
+    script_path=$(eval echo "$1")
+    directory=$(dirname "$script_path")
+    echo "$(cd "$directory" || exit; pwd -P)";
 }
 
-SCRIPT_DIR="$( cd "$( dirname "$(abs_path "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
+SCRIPT_DIR="$(abs_path "${BASH_SOURCE[0]}")" >/dev/null 2>&1
 INSTALLDIR="${SCRIPT_DIR}"
 export INTEL_OPENVINO_DIR="$INSTALLDIR"
 
@@ -121,7 +121,12 @@ check_python_version () {
         "${PYTHON_VERSION_MAJOR}.${MAX_SUPPORTED_PYTHON_VERSION_MINOR} (64-bit) from https://www.python.org/downloads/"
         return 0
     fi
-    python_bitness=$(python"$python_version" -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
+    if command -v python"$python_version" > /dev/null 2>&1; then
+        python_interp=python"$python_version"
+    else
+        python_interp=python"$python_version_major"
+    fi
+    python_bitness=$("$python_interp" -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
 
     if [ "$python_bitness" != "" ] && [ "$python_bitness" != "64" ] && [ "$OS_NAME" != "Raspbian" ]; then
         echo "[setupvars.sh] WARNING: 64 bitness for Python $python_version is required"
@@ -144,7 +149,7 @@ check_python_version () {
             echo "[setupvars.sh] WARNING: OpenVINO Python environment does not set properly"
         fi
     fi
-} 
+}
 
 python_version_to_check="$python_version"
 if [ -z "$python_version" ]; then
diff --git a/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake b/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake
index 73a4f21f871c42..05f75ceeb51772 100644
--- a/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake
+++ b/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake
@@ -36,7 +36,8 @@ if( PYTHONINTERP_FOUND )
   file(TO_CMAKE_PATH "$ENV{HOME}" ENV_HOME)
   find_host_program( CYTHON_EXECUTABLE
     NAMES cython cython.bat cython3
-    HINTS ${_python_path} ${ENV_HOME}/.local/bin $ENV{HOMEBREW_OPT}/cython/bin
+    HINTS ${_python_path} ${ENV_HOME}/.local/bin $ENV{HOMEBREW_OPT}/cython/bin 
+          ${ENV_HOME}/Library/Python/${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}/bin
     )
 else()
   find_host_program( CYTHON_EXECUTABLE
diff --git a/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt b/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt
index 7c503d6070f5cf..cfeb78aa1c3995 100644
--- a/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt
+++ b/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt
@@ -22,6 +22,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     ie_add_compiler_flags(/wd4267)
     ie_add_compiler_flags(/wd4244)
 endif()
+if (OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
+    ie_add_compiler_flags(-Wno-undef)
+endif()
 if(UNUSED_BUT_SET_VARIABLE_SUPPORTED)
     ie_add_compiler_flags(-Wno-unused-but-set-variable)
 endif()

From e169c7cd3846cf38a0cdbd885c9bc505e06d2326 Mon Sep 17 00:00:00 2001
From: Eddy Kim <eddy.kim@intel.com>
Date: Wed, 29 Mar 2023 03:19:35 +0900
Subject: [PATCH 137/296] fix a bug in permute_bfzyx_to_bfyxz (#16599)

---
 .../src/kernel_selector/cl_kernels/permute_bfzyx_to_bfyxz.cl   | 2 +-
 src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_bfzyx_to_bfyxz.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_bfzyx_to_bfyxz.cl
index 963f975d632653..627208de39cff0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_bfzyx_to_bfyxz.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/permute_bfzyx_to_bfyxz.cl
@@ -102,7 +102,7 @@ KERNEL (permute_bfzyx_to_bfyxz)(
     }
 #endif
 #if defined(X_REMAINDER_ITEM) && defined(Z_REMAINDER_ITEM)
-     else if (f == Z_REMAINDER_ITEM && x == X_REMAINDER_ITEM) {
+     else if (z == Z_REMAINDER_ITEM && x == X_REMAINDER_ITEM) {
         // point by point
         for (int lh = 0; lh < Z_REMAINDER_SIZE; ++lh) {
             // read
diff --git a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
index 0b7b285b467a63..10ea18282cc65a 100644
--- a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
@@ -1913,7 +1913,8 @@ INSTANTIATE_TEST_SUITE_P(, permute_bfzyx_to_bfyxz,
     ::testing::ValuesIn(std::vector<TiledPermuteParam> {
             {{1, 3, 85, 20, 20}, format::bfzyx},
             {{1, 3, 85, 40, 40}, format::bfzyx},
-            {{1, 3, 85, 80, 80}, format::bfzyx}
+            {{1, 3, 85, 80, 80}, format::bfzyx},
+            {{1, 192, 2, 64, 33}, format::bfzyx}
         }));
 
 TEST_P(permute_bfzyx_to_bfyxz, combined) {

From 44034333091fac0aacfcaa81e4265f845b884624 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <mateusz.bencer@intel.com>
Date: Tue, 28 Mar 2023 20:47:17 +0200
Subject: [PATCH 138/296] [ONNX FE] Implementation of ONNX STFT op (#16461)

---
 src/frontends/onnx/frontend/src/op/dft.cpp    |   61 +-
 src/frontends/onnx/frontend/src/op/stft.cpp   |  124 ++
 src/frontends/onnx/frontend/src/op/stft.hpp   |   22 +
 .../onnx/frontend/src/ops_bridge.cpp          |    5 +
 src/frontends/onnx/frontend/src/utils/dft.cpp |   77 +
 src/frontends/onnx/frontend/src/utils/dft.hpp |   20 +
 src/frontends/onnx/tests/CMakeLists.txt       |    1 +
 .../models/stft_dynamic_signal_shape.prototxt |   99 ++
 ...input_given_window_default_length.prototxt |   85 +
 ...ut_given_window_no_default_length.prototxt |  101 ++
 ...ex_input_no_window_default_length.prototxt |   72 +
 ..._input_no_window_default_length_2.prototxt |   72 +
 ...input_no_window_no_default_length.prototxt |   87 +
 ...al_input_no_window_default_length.prototxt |   72 +
 .../stft_non_const_frame_length.prototxt      |   98 +
 .../models/stft_non_const_frame_step.prototxt |   99 ++
 .../stft_onesided_complex_input.prototxt      |  100 ++
 ...ut_given_window_no_default_length.prototxt |  100 ++
 ...al_input_no_window_default_length.prototxt |   67 +
 src/frontends/onnx/tests/onnx_import.in.cpp   |  246 ---
 .../onnx/tests/onnx_import_signal.in.cpp      | 1578 +++++++++++++++++
 21 files changed, 2886 insertions(+), 300 deletions(-)
 create mode 100644 src/frontends/onnx/frontend/src/op/stft.cpp
 create mode 100644 src/frontends/onnx/frontend/src/op/stft.hpp
 create mode 100644 src/frontends/onnx/frontend/src/utils/dft.cpp
 create mode 100644 src/frontends/onnx/frontend/src/utils/dft.hpp
 create mode 100644 src/frontends/onnx/tests/models/stft_dynamic_signal_shape.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_no_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length_2.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_no_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_no_onesided_real_input_no_window_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_non_const_frame_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_non_const_frame_step.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_onesided_complex_input.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_onesided_real_input_given_window_no_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/models/stft_onesided_real_input_no_window_default_length.prototxt
 create mode 100644 src/frontends/onnx/tests/onnx_import_signal.in.cpp

diff --git a/src/frontends/onnx/frontend/src/op/dft.cpp b/src/frontends/onnx/frontend/src/op/dft.cpp
index 79c3fbe0e589ef..28fbad5203e85f 100644
--- a/src/frontends/onnx/frontend/src/op/dft.cpp
+++ b/src/frontends/onnx/frontend/src/op/dft.cpp
@@ -4,75 +4,28 @@
 
 #include "op/dft.hpp"
 
-#include "default_opset.hpp"
 #include "onnx_import/core/null_node.hpp"
 #include "utils/common.hpp"
+#include "utils/dft.hpp"
 
 namespace ngraph {
 namespace onnx_import {
 namespace op {
 namespace set_1 {
-
-namespace {
-// For DFT, IDFT, IRDFT cases, if real data are provided (with shape [D_0, D_1, ..., D_{N-1}, 1])
-// it's needed to fill tensors with zero imaginary part to be aligned with Core ops requirements.
-bool try_convert_real_to_complex(ov::Output<ov::Node>& data) {
-    if (data.get_partial_shape().rank().is_static()) {
-        const auto length = data.get_partial_shape().rank().get_length();
-        const auto last_axis_pos = length - 1;
-        const auto last_dim = data.get_partial_shape()[last_axis_pos];
-        if (last_dim.is_static() && last_dim.get_length() == 1) {
-            ov::Output<ov::Node> imag_part = default_opset::Constant::create(data.get_element_type(), {}, {0});
-            imag_part =
-                std::make_shared<default_opset::Broadcast>(imag_part, std::make_shared<default_opset::ShapeOf>(data));
-            data = std::make_shared<default_opset::Concat>(OutputVector{data, imag_part}, last_axis_pos);
-            return true;
-        }
-    }
-    // [D_0, D_1, ..., D_{N-1}, 2] case, so additional transformations not needed or we are not able to check it during
-    // importing.
-    return false;
-}
-}  // namespace
-
 OutputVector dft(const Node& node) {
     const OutputVector ng_inputs{node.get_ng_inputs()};
-    ov::Output<ov::Node> data = ng_inputs.at(0);
+    const ov::Output<ov::Node> data = ng_inputs.at(0);
 
     const auto dft_length_provided = ng_inputs.size() > 1 && !ngraph::op::is_null(ng_inputs[1]);
     const auto axis = node.get_attribute_value<int64_t>("axis", 1);
-    const auto axis_const = default_opset::Constant::create(element::i64, {1}, {axis});
     const auto inverse = node.get_attribute_value<int64_t>("inverse", 0);
     const auto onesided = node.get_attribute_value<int64_t>("onesided", 0);
 
-    bool conversion_to_complex_applied = false;
-    if (inverse || !onesided) {  // skip for RDFT case
-        conversion_to_complex_applied = try_convert_real_to_complex(data);
-    }
-
-    ov::Output<ov::Node> result;
-    if (inverse) {
-        if (onesided) {
-            result = dft_length_provided ? std::make_shared<default_opset::IRDFT>(data, axis_const, ng_inputs.at(1))
-                                         : std::make_shared<default_opset::IRDFT>(data, axis_const);
-            if (conversion_to_complex_applied) {  // align the output shape with a real numbers representation
-                const auto unsqueeze_axis = default_opset::Constant::create(element::i64, {}, {-1});
-                result = std::make_shared<default_opset::Unsqueeze>(result, unsqueeze_axis);
-            }
-        } else {
-            result = dft_length_provided ? std::make_shared<default_opset::IDFT>(data, axis_const, ng_inputs.at(1))
-                                         : std::make_shared<default_opset::IDFT>(data, axis_const);
-        }
-    } else {
-        if (onesided) {
-            result = dft_length_provided ? std::make_shared<default_opset::RDFT>(data, axis_const, ng_inputs.at(1))
-                                         : std::make_shared<default_opset::RDFT>(data, axis_const);
-        } else {
-            result = dft_length_provided ? std::make_shared<default_opset::DFT>(data, axis_const, ng_inputs.at(1))
-                                         : std::make_shared<default_opset::DFT>(data, axis_const);
-        }
-    }
-    return {result};
+    return {dft::make_dft(data,
+                          dft_length_provided ? ng_inputs.at(1) : std::make_shared<NullNode>(),
+                          axis,
+                          inverse == 1,
+                          onesided == 1)};
 }
 
 }  // namespace set_1
diff --git a/src/frontends/onnx/frontend/src/op/stft.cpp b/src/frontends/onnx/frontend/src/op/stft.cpp
new file mode 100644
index 00000000000000..016aba8a9bbd16
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/stft.cpp
@@ -0,0 +1,124 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "op/stft.hpp"
+
+#include "default_opset.hpp"
+#include "exceptions.hpp"
+#include "onnx_import/core/null_node.hpp"
+#include "utils/common.hpp"
+#include "utils/dft.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_17 {
+
+OutputVector stft(const Node& node) {
+    const OutputVector ng_inputs{node.get_ng_inputs()};
+    auto signal = ng_inputs.at(0);
+    const auto dft_length_provided = ng_inputs.size() > 3 && !ngraph::op::is_null(ng_inputs[3]);
+    const auto onesided = node.get_attribute_value<int64_t>("onesided", 1);
+    const int64_t axis = 1;
+
+    const auto& frame_step_node = ng_inputs.at(1);
+    CHECK_VALID_NODE(node,
+                     ngraph::op::is_constant(frame_step_node.get_node_shared_ptr()) &&
+                         ov::shape_size(frame_step_node.get_shape()) <= 1,
+                     "frame_step input must be a scalar or Shape{1} constant.");
+    const auto frame_step =
+        ov::as_type_ptr<default_opset::Constant>(frame_step_node.get_node_shared_ptr())->cast_vector<int64_t>()[0];
+    const auto signal_param_shape = signal.get_partial_shape();
+    CHECK_VALID_NODE(node,
+                     signal_param_shape.is_static() && signal_param_shape.size() == 3,
+                     "Shape of signal input must be static with the rank equal to 3.");
+
+    int64_t frame_length = signal_param_shape[axis].get_length() / frame_step;  // default value
+    if (dft_length_provided) {
+        const auto& frame_length_node = ng_inputs[3];
+        CHECK_VALID_NODE(node,
+                         ngraph::op::is_constant(frame_length_node.get_node_shared_ptr()) &&
+                             ov::shape_size(frame_length_node.get_shape()) <= 1,
+                         "frame_length input must be a scalar or Shape{1} constant.");
+        frame_length = ov::as_type_ptr<default_opset::Constant>(frame_length_node.get_node_shared_ptr())
+                           ->cast_vector<int64_t>()[0];
+    }
+
+    const auto window_node_provided = ng_inputs.size() > 2 && !ngraph::op::is_null(ng_inputs[2]);
+    if (window_node_provided) {  // window input provided
+        if (ng_inputs[2].get_partial_shape().rank().is_static()) {
+            CHECK_VALID_NODE(node,
+                             ng_inputs[2].get_partial_shape().rank().get_length() == 1,
+                             "The rank of window input must be 1D.");
+            if (ng_inputs[2].get_partial_shape()[0].is_static()) {
+                CHECK_VALID_NODE(node,
+                                 ng_inputs[2].get_partial_shape()[0].get_length() == frame_length,
+                                 "The length of window input must be equal to frame_length.");
+            }
+        }
+    }
+    const auto is_complex = [](const ov::Output<ov::Node>& data) {
+        return data.get_partial_shape().rank().is_static() && (data.get_partial_shape().cend() - 1)->is_static() &&
+               (data.get_partial_shape().cend() - 1)->get_length() == 2;
+    };
+    if (onesided == 1) {
+        CHECK_VALID_NODE(node, !is_complex(signal), "If attribute onesided==1, signal input can NOT be complex.");
+    }
+    const int64_t batch_size = signal_param_shape[0].get_length();
+    const auto nstfts = static_cast<int64_t>((signal_param_shape[axis].get_length() - frame_length) / frame_step) + 1;
+    const auto axis_const = default_opset::Constant::create(element::i64, {}, {axis});
+    const auto zero_const = default_opset::Constant::create(element::i64, {}, {0});
+    const auto step = default_opset::Constant::create(element::i64, Shape{2}, {1, 1});
+    ov::OutputVector all_signals;
+    for (int64_t batch = 0; batch < batch_size; ++batch) {
+        ov::OutputVector signals_in_batch;
+        for (int64_t sig_idx = 0; sig_idx < nstfts; ++sig_idx) {
+            const auto start = default_opset::Constant::create(element::i64,
+                                                               Shape{2},
+                                                               std::vector<int64_t>{batch, sig_idx * frame_step});
+            const auto stop =
+                default_opset::Constant::create(element::i64,
+                                                Shape{2},
+                                                std::vector<int64_t>{batch + 1, sig_idx * frame_step + frame_length});
+            const auto slice_axes =
+                default_opset::Constant::create(element::i64, Shape{2}, std::vector<int64_t>{0, axis});
+            const auto slice = std::make_shared<default_opset::Slice>(signal, start, stop, step, slice_axes);
+            const ov::Output<ov::Node> flatten_slice = std::make_shared<default_opset::Reshape>(
+                slice,
+                is_complex(slice) ? default_opset::Constant::create(element::i64, {2}, {-1, 2})
+                                  : (onesided ? default_opset::Constant::create(element::i64, {1}, {-1})
+                                              : default_opset::Constant::create(element::i64, {2}, {-1, 1})),
+                false);
+            const auto dft = dft::make_dft(
+                window_node_provided
+                    ? std::make_shared<default_opset::Multiply>(
+                          flatten_slice,
+                          is_complex(flatten_slice)
+                              ? std::make_shared<default_opset::Broadcast>(  // align window shape with signal shape
+                                    std::make_shared<default_opset::Unsqueeze>(
+                                        ng_inputs[2],
+                                        default_opset::Constant::create(element::i64, {1}, {1})),
+                                    std::make_shared<default_opset::ShapeOf>(flatten_slice))
+                              : ng_inputs[2])
+                    : flatten_slice,
+                dft_length_provided ? ng_inputs[3] : std::make_shared<NullNode>(),
+                0,
+                false,
+                onesided == 1);
+            signals_in_batch.push_back(std::make_shared<default_opset::Unsqueeze>(dft, zero_const));
+        }
+        all_signals.push_back(
+            std::make_shared<default_opset::Unsqueeze>(std::make_shared<default_opset::Concat>(signals_in_batch, 0),
+                                                       zero_const));
+    }
+    return {std::make_shared<default_opset::Concat>(all_signals, 0)};
+}
+
+}  // namespace set_17
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/op/stft.hpp b/src/frontends/onnx/frontend/src/op/stft.hpp
new file mode 100644
index 00000000000000..c288c99419e5de
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/stft.hpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_17 {
+OutputVector stft(const Node& node);
+
+}  // namespace set_17
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp
index 655d4efda7a294..c9073f43013c02 100644
--- a/src/frontends/onnx/frontend/src/ops_bridge.cpp
+++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp
@@ -161,6 +161,7 @@
 #include "op/split.hpp"
 #include "op/sqrt.hpp"
 #include "op/squeeze.hpp"
+#include "op/stft.hpp"
 #include "op/sub.hpp"
 #include "op/sum.hpp"
 #include "op/tan.hpp"
@@ -479,6 +480,10 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("SpaceToDepth", 1, space_to_depth);
     REGISTER_OPERATOR("Split", 1, split);
     REGISTER_OPERATOR("Split", 13, split);
+    register_operator("STFT",
+                      VersionRange::single_version_for_all_opsets(),
+                      op::set_17::stft,
+                      "frame_step and frame_length inputs must be constants; signal shape must be static;");
     REGISTER_OPERATOR("Sqrt", 1, sqrt);
     REGISTER_OPERATOR("Squeeze", 1, squeeze);
     REGISTER_OPERATOR("Squeeze", 13, squeeze);
diff --git a/src/frontends/onnx/frontend/src/utils/dft.cpp b/src/frontends/onnx/frontend/src/utils/dft.cpp
new file mode 100644
index 00000000000000..d025516d15e57e
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/utils/dft.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dft.hpp"
+
+#include "default_opset.hpp"
+#include "onnx_import/core/null_node.hpp"
+
+using namespace ngraph::onnx_import;
+
+namespace ngraph {
+namespace onnx_import {
+namespace dft {
+
+namespace {
+// For DFT, IDFT, IRDFT cases, if real signal are provided (with shape [D_0, D_1, ..., D_{N-1}, 1])
+// it's needed to fill tensors with zero imaginary part to be aligned with Core ops requirements.
+bool try_convert_real_to_complex(ov::Output<ov::Node>& signal) {
+    if (signal.get_partial_shape().rank().is_static()) {
+        const auto length = signal.get_partial_shape().rank().get_length();
+        const auto last_axis_pos = length - 1;
+        const auto last_dim = signal.get_partial_shape()[last_axis_pos];
+        if (last_dim.is_static() && last_dim.get_length() == 1) {
+            ov::Output<ov::Node> imag_part = default_opset::Constant::create(signal.get_element_type(), {}, {0});
+            imag_part =
+                std::make_shared<default_opset::Broadcast>(imag_part, std::make_shared<default_opset::ShapeOf>(signal));
+            signal = std::make_shared<default_opset::Concat>(OutputVector{signal, imag_part}, last_axis_pos);
+            return true;
+        }
+    }
+    // [D_0, D_1, ..., D_{N-1}, 2] case, so additional transformations not needed or we are not able to check it during
+    // importing.
+    return false;
+}
+}  // namespace
+
+ov::Output<ov::Node> make_dft(const ov::Output<ov::Node>& signal,
+                              const ov::Output<ov::Node>& length,
+                              int64_t axis,
+                              bool is_inversed,
+                              bool is_onesided) {
+    auto processed_signal = signal;
+    const auto axis_const = default_opset::Constant::create(element::i64, {1}, {axis});
+    bool conversion_to_complex_applied = false;
+    if (is_inversed || !is_onesided) {  // skip for RDFT case
+        conversion_to_complex_applied = try_convert_real_to_complex(processed_signal);
+    }
+    bool dft_length_provided = !ngraph::op::is_null(length);
+
+    ov::Output<ov::Node> result;
+    if (is_inversed) {
+        if (is_onesided) {
+            result = dft_length_provided ? std::make_shared<default_opset::IRDFT>(processed_signal, axis_const, length)
+                                         : std::make_shared<default_opset::IRDFT>(processed_signal, axis_const);
+            if (conversion_to_complex_applied) {  // align the output shape with a real numbers representation
+                const auto unsqueeze_axis = default_opset::Constant::create(element::i64, {}, {-1});
+                result = std::make_shared<default_opset::Unsqueeze>(result, unsqueeze_axis);
+            }
+        } else {
+            result = dft_length_provided ? std::make_shared<default_opset::IDFT>(processed_signal, axis_const, length)
+                                         : std::make_shared<default_opset::IDFT>(processed_signal, axis_const);
+        }
+    } else {
+        if (is_onesided) {
+            result = dft_length_provided ? std::make_shared<default_opset::RDFT>(processed_signal, axis_const, length)
+                                         : std::make_shared<default_opset::RDFT>(processed_signal, axis_const);
+        } else {
+            result = dft_length_provided ? std::make_shared<default_opset::DFT>(processed_signal, axis_const, length)
+                                         : std::make_shared<default_opset::DFT>(processed_signal, axis_const);
+        }
+    }
+    return {result};
+}
+}  // namespace  dft
+}  // namespace onnx_import
+}  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/utils/dft.hpp b/src/frontends/onnx/frontend/src/utils/dft.hpp
new file mode 100644
index 00000000000000..6ab0b01580df43
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/utils/dft.hpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace dft {
+
+ov::Output<ov::Node> make_dft(const ov::Output<ov::Node>& signal,
+                              const ov::Output<ov::Node>& length,
+                              int64_t axis,
+                              bool is_inversed,
+                              bool is_one_sided);
+}  // namespace  dft
+}  // namespace onnx_import
+}  // namespace ngraph
diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt
index b6fed5f851ae43..597bdd33a76c0f 100644
--- a/src/frontends/onnx/tests/CMakeLists.txt
+++ b/src/frontends/onnx/tests/CMakeLists.txt
@@ -68,6 +68,7 @@ set(MULTI_TEST_SRC
     onnx_import_org_pytorch.in.cpp
     onnx_import_reshape.in.cpp
     onnx_import_rnn.in.cpp
+    onnx_import_signal.in.cpp
     onnx_import_quant.in.cpp
     onnx_test_utils.in.cpp
     onnx_import_with_editor.in.cpp)
diff --git a/src/frontends/onnx/tests/models/stft_dynamic_signal_shape.prototxt b/src/frontends/onnx/tests/models/stft_dynamic_signal_shape.prototxt
new file mode 100644
index 00000000000000..193223dd12514b
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_dynamic_signal_shape.prototxt
@@ -0,0 +1,99 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 16
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 1
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_default_length.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_default_length.prototxt
new file mode 100644
index 00000000000000..8357764b45a5c2
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_default_length.prototxt
@@ -0,0 +1,85 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 4
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: ""
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_no_default_length.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_no_default_length.prototxt
new file mode 100644
index 00000000000000..6ff6e7f25e4c24
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_given_window_no_default_length.prototxt
@@ -0,0 +1,101 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 12
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 10
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 48
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 10
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length.prototxt
new file mode 100644
index 00000000000000..a8308903c9ae85
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length.prototxt
@@ -0,0 +1,72 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: ""
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 128
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length_2.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length_2.prototxt
new file mode 100644
index 00000000000000..f6c615292ea62d
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_default_length_2.prototxt
@@ -0,0 +1,72 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: ""
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 64
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_no_default_length.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_no_default_length.prototxt
new file mode 100644
index 00000000000000..4589ef8fd71b0a
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_complex_input_no_window_no_default_length.prototxt
@@ -0,0 +1,87 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 24
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: ""
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_no_onesided_real_input_no_window_default_length.prototxt b/src/frontends/onnx/tests/models/stft_no_onesided_real_input_no_window_default_length.prototxt
new file mode 100644
index 00000000000000..74e24441fc71ba
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_no_onesided_real_input_no_window_default_length.prototxt
@@ -0,0 +1,72 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: ""
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 0
+      type: INT
+    }
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 128
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_non_const_frame_length.prototxt b/src/frontends/onnx/tests/models/stft_non_const_frame_length.prototxt
new file mode 100644
index 00000000000000..de4e8479301a81
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_non_const_frame_length.prototxt
@@ -0,0 +1,98 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 1
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "frame_length"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_non_const_frame_step.prototxt b/src/frontends/onnx/tests/models/stft_non_const_frame_step.prototxt
new file mode 100644
index 00000000000000..9e52d2d5859de6
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_non_const_frame_step.prototxt
@@ -0,0 +1,99 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 16
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 1
+      type: INT
+    }
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "frame_step"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_onesided_complex_input.prototxt b/src/frontends/onnx/tests/models/stft_onesided_complex_input.prototxt
new file mode 100644
index 00000000000000..adf0584ac97d94
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_onesided_complex_input.prototxt
@@ -0,0 +1,100 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 16
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 1
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_onesided_real_input_given_window_no_default_length.prototxt b/src/frontends/onnx/tests/models/stft_onesided_real_input_given_window_no_default_length.prototxt
new file mode 100644
index 00000000000000..dc0e5236524408
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_onesided_real_input_given_window_no_default_length.prototxt
@@ -0,0 +1,100 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "frame_length"
+    name: "frame_length_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 16
+        name: "frame_length_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: "window"
+    input: "frame_length"
+    output: "Y"
+    op_type: "STFT"
+    attribute {
+      name: "onesided"
+      i: 1
+      type: INT
+    }
+  }
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 32
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "window"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 16
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/models/stft_onesided_real_input_no_window_default_length.prototxt b/src/frontends/onnx/tests/models/stft_onesided_real_input_no_window_default_length.prototxt
new file mode 100644
index 00000000000000..d736d9b4089f4a
--- /dev/null
+++ b/src/frontends/onnx/tests/models/stft_onesided_real_input_no_window_default_length.prototxt
@@ -0,0 +1,67 @@
+ir_version: 8
+graph {
+  node {
+    output: "frame_step"
+    name: "frame_step_const"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 7
+        int64_data: 8
+        name: "frame_step_tensor"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "signal"
+    input: "frame_step"
+    input: ""
+    output: "Y"
+    op_type: "STFT"
+  }
+  name: "g"
+  input {
+    name: "signal"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 128
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 17
+}
diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp
index b49861e1806fa7..3a991b69a6a734 100644
--- a/src/frontends/onnx/tests/onnx_import.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import.in.cpp
@@ -6452,249 +6452,3 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unique_3d_with_duplicates_and_axis_2) {
 
     test_case.run();
 }
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft) {
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
-                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
-                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
-                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
-                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
-    test_case.add_expected_output<float>(
-        Shape{3, 5, 2},
-        {10.000000f, 0.000000f,  -2.500000f, 3.440955f,  -2.500000f, 0.812299f, -2.500000f, -0.812299f,
-         -2.500000f, -3.440955f, 35.000000f, 0.000000f,  -2.500000f, 3.440955f, -2.500000f, 0.812299f,
-         -2.500000f, -0.812299f, -2.500000f, -3.440955f, 60.000000f, 0.000000f, -2.500000f, 3.440955f,
-         -2.500000f, 0.812299f,  -2.500000f, -0.812299f, -2.500000f, -3.440955f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_only_real) {
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_only_real.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 1},
-                               {
-                                   0.000000f,
-                                   1.000000f,
-                                   2.000000f,
-                                   3.000000f,
-                                   4.000000f,
-                                   5.000000f,
-                                   6.000000f,
-                                   7.000000f,
-                                   8.000000f,
-                                   9.000000f,
-                                   10.000000f,
-                                   11.000000f,
-                                   12.000000f,
-                                   13.000000f,
-                                   14.000000f,
-                               });
-    test_case.add_expected_output<float>(
-        Shape{3, 5, 2},
-        {10.000000f, 0.000000f,  -2.500000f, 3.440955f,  -2.500000f, 0.812299f, -2.500000f, -0.812299f,
-         -2.500000f, -3.440955f, 35.000000f, 0.000000f,  -2.500000f, 3.440955f, -2.500000f, 0.812299f,
-         -2.500000f, -0.812299f, -2.500000f, -3.440955f, 60.000000f, 0.000000f, -2.500000f, 3.440955f,
-         -2.500000f, 0.812299f,  -2.500000f, -0.812299f, -2.500000f, -3.440955f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_onesided) {
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_onesided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(
-        Shape{2, 4},
-        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
-    test_case.add_expected_output<float>(Shape{2, 3, 2},
-                                         {6.000000f,
-                                          0.000000f,
-                                          -2.000000f,
-                                          2.000000f,
-                                          -2.000000f,
-                                          0.000000f,
-                                          22.000000f,
-                                          0.000000f,
-                                          -2.000000f,
-                                          2.000000f,
-                                          -2.000000f,
-                                          0.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_onesided_skip_convert_to_complex) {
-    auto function =
-        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                            SERIALIZED_ZOO,
-                                                            "onnx/dft_onesided_skip_convert_to_complex.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(
-        Shape{2, 4, 1},
-        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
-    test_case.add_expected_output<float>(Shape{2, 3, 1, 2},
-                                         {6.000000f,
-                                          0.000000f,
-                                          -2.000000f,
-                                          2.000000f,
-                                          -2.000000f,
-                                          0.000000f,
-                                          22.000000f,
-                                          0.000000f,
-                                          -2.000000f,
-                                          2.000000f,
-                                          -2.000000f,
-                                          0.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_length_provided) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_lenght_provided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
-                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
-                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
-                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
-                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
-    test_case.add_expected_output<float>(
-        Shape{1, 5, 2},
-        {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f, 3.000000f, 0.000000f, 4.000000f, 0.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_length_provided_onesided) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_lenght_provided_onesided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{4, 3},
-                               {0.000000f,
-                                1.000000f,
-                                2.000000f,
-                                3.000000f,
-                                4.000000f,
-                                5.000000f,
-                                6.000000f,
-                                7.000000f,
-                                8.000000f,
-                                9.000000f,
-                                10.000000f,
-                                11.000000f});
-    test_case.add_expected_output<float>(Shape{1, 3, 2},
-                                         {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse) {
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_inverse.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
-                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
-                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
-                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
-                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
-    test_case.add_expected_output<float>(
-        Shape{3, 5, 2},
-        {2.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f, -0.500000f, 0.162460f,
-         -0.500000f, 0.688191f,  7.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f,
-         -0.500000f, 0.162460f,  -0.500000f, 0.688191f,  12.000000f, 0.000000f,  -0.500000f, -0.688191f,
-         -0.500000f, -0.162460f, -0.500000f, 0.162460f,  -0.500000f, 0.688191f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_only_real) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_inverse_only_real.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 1},
-                               {0.000000f,
-                                1.000000f,
-                                2.000000f,
-                                3.000000f,
-                                4.000000f,
-                                5.000000f,
-                                6.000000f,
-                                7.000000f,
-                                8.000000f,
-                                9.000000f,
-                                10.000000f,
-                                11.000000f,
-                                12.000000f,
-                                13.000000f,
-                                14.000000f});
-    test_case.add_expected_output<float>(
-        Shape{3, 5, 2},
-        {2.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f, -0.500000f, 0.162460f,
-         -0.500000f, 0.688191f,  7.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f,
-         -0.500000f, 0.162460f,  -0.500000f, 0.688191f,  12.000000f, 0.000000f,  -0.500000f, -0.688191f,
-         -0.500000f, -0.162460f, -0.500000f, 0.162460f,  -0.500000f, 0.688191f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_onesided) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_inverse_onesided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{2, 3, 2},
-                               {6.000000f,
-                                0.000000f,
-                                -2.000000f,
-                                2.000000f,
-                                -2.000000f,
-                                0.000000f,
-                                22.000000f,
-                                0.000000f,
-                                -2.000000f,
-                                2.000000f,
-                                -2.000000f,
-                                0.000000f});
-    test_case.add_expected_output<float>(
-        Shape{2, 4},
-        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_onesided_real_input) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_inverse_onesided_real_input.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{2, 3, 1}, {1.000000f, 0.000000f, -1.000000f, 0.5000000f, -0.5000000f, 0.000000f});
-    test_case.add_expected_output<float>(Shape{2, 3, 1},
-                                         {0.750000f, -0.250000f, -0.500000f, 0.250000f, 0.250000f, -0.500000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inversed_length_provided) {
-    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                                        SERIALIZED_ZOO,
-                                                                        "onnx/dft_inversed_lenght_provided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
-                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
-                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
-                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
-                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
-    test_case.add_expected_output<float>(
-        Shape{1, 5, 2},
-        {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f, 3.000000f, 0.000000f, 4.000000f, 0.000000f});
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_length_provided_onesided) {
-    auto function =
-        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
-                                                            SERIALIZED_ZOO,
-                                                            "onnx/dft_inverse_lenght_provided_onesided.onnx"));
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>(Shape{1, 3, 2}, {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f});
-    test_case.add_expected_output<float>(Shape{4, 3},
-                                         {0.000000f,
-                                          1.000000f,
-                                          2.000000f,
-                                          3.000000f,
-                                          4.000000f,
-                                          5.000000f,
-                                          6.000000f,
-                                          7.000000f,
-                                          8.000000f,
-                                          9.000000f,
-                                          10.000000f,
-                                          11.000000f});
-}
diff --git a/src/frontends/onnx/tests/onnx_import_signal.in.cpp b/src/frontends/onnx/tests/onnx_import_signal.in.cpp
new file mode 100644
index 00000000000000..af186a83f80894
--- /dev/null
+++ b/src/frontends/onnx/tests/onnx_import_signal.in.cpp
@@ -0,0 +1,1578 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+// clang-format off
+#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#endif
+#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#endif
+// clang-format on
+
+#include "common_test_utils/file_utils.hpp"
+#include "default_opset.hpp"
+#include "engines_util/test_case.hpp"
+#include "engines_util/test_engines.hpp"
+#include "onnx_import/onnx.hpp"
+#include "util/test_control.hpp"
+
+NGRAPH_SUPPRESS_DEPRECATED_START
+
+using namespace ngraph;
+
+static std::string s_manifest = "${MANIFEST}";
+static std::string s_device = test::backend_name_to_device("${BACKEND_NAME}");
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
+                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
+                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
+                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
+                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
+    test_case.add_expected_output<float>(
+        Shape{3, 5, 2},
+        {10.000000f, 0.000000f,  -2.500000f, 3.440955f,  -2.500000f, 0.812299f, -2.500000f, -0.812299f,
+         -2.500000f, -3.440955f, 35.000000f, 0.000000f,  -2.500000f, 3.440955f, -2.500000f, 0.812299f,
+         -2.500000f, -0.812299f, -2.500000f, -3.440955f, 60.000000f, 0.000000f, -2.500000f, 3.440955f,
+         -2.500000f, 0.812299f,  -2.500000f, -0.812299f, -2.500000f, -3.440955f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_only_real) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_only_real.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 1},
+                               {
+                                   0.000000f,
+                                   1.000000f,
+                                   2.000000f,
+                                   3.000000f,
+                                   4.000000f,
+                                   5.000000f,
+                                   6.000000f,
+                                   7.000000f,
+                                   8.000000f,
+                                   9.000000f,
+                                   10.000000f,
+                                   11.000000f,
+                                   12.000000f,
+                                   13.000000f,
+                                   14.000000f,
+                               });
+    test_case.add_expected_output<float>(
+        Shape{3, 5, 2},
+        {10.000000f, 0.000000f,  -2.500000f, 3.440955f,  -2.500000f, 0.812299f, -2.500000f, -0.812299f,
+         -2.500000f, -3.440955f, 35.000000f, 0.000000f,  -2.500000f, 3.440955f, -2.500000f, 0.812299f,
+         -2.500000f, -0.812299f, -2.500000f, -3.440955f, 60.000000f, 0.000000f, -2.500000f, 3.440955f,
+         -2.500000f, 0.812299f,  -2.500000f, -0.812299f, -2.500000f, -3.440955f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_onesided) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_onesided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(
+        Shape{2, 4},
+        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
+    test_case.add_expected_output<float>(Shape{2, 3, 2},
+                                         {6.000000f,
+                                          0.000000f,
+                                          -2.000000f,
+                                          2.000000f,
+                                          -2.000000f,
+                                          0.000000f,
+                                          22.000000f,
+                                          0.000000f,
+                                          -2.000000f,
+                                          2.000000f,
+                                          -2.000000f,
+                                          0.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_onesided_skip_convert_to_complex) {
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/dft_onesided_skip_convert_to_complex.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(
+        Shape{2, 4, 1},
+        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
+    test_case.add_expected_output<float>(Shape{2, 3, 1, 2},
+                                         {6.000000f,
+                                          0.000000f,
+                                          -2.000000f,
+                                          2.000000f,
+                                          -2.000000f,
+                                          0.000000f,
+                                          22.000000f,
+                                          0.000000f,
+                                          -2.000000f,
+                                          2.000000f,
+                                          -2.000000f,
+                                          0.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_length_provided) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_lenght_provided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
+                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
+                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
+                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
+                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
+    test_case.add_expected_output<float>(
+        Shape{1, 5, 2},
+        {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f, 3.000000f, 0.000000f, 4.000000f, 0.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_length_provided_onesided) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_lenght_provided_onesided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{4, 3},
+                               {0.000000f,
+                                1.000000f,
+                                2.000000f,
+                                3.000000f,
+                                4.000000f,
+                                5.000000f,
+                                6.000000f,
+                                7.000000f,
+                                8.000000f,
+                                9.000000f,
+                                10.000000f,
+                                11.000000f});
+    test_case.add_expected_output<float>(Shape{1, 3, 2},
+                                         {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/dft_inverse.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
+                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
+                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
+                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
+                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
+    test_case.add_expected_output<float>(
+        Shape{3, 5, 2},
+        {2.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f, -0.500000f, 0.162460f,
+         -0.500000f, 0.688191f,  7.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f,
+         -0.500000f, 0.162460f,  -0.500000f, 0.688191f,  12.000000f, 0.000000f,  -0.500000f, -0.688191f,
+         -0.500000f, -0.162460f, -0.500000f, 0.162460f,  -0.500000f, 0.688191f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_only_real) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_inverse_only_real.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 1},
+                               {0.000000f,
+                                1.000000f,
+                                2.000000f,
+                                3.000000f,
+                                4.000000f,
+                                5.000000f,
+                                6.000000f,
+                                7.000000f,
+                                8.000000f,
+                                9.000000f,
+                                10.000000f,
+                                11.000000f,
+                                12.000000f,
+                                13.000000f,
+                                14.000000f});
+    test_case.add_expected_output<float>(
+        Shape{3, 5, 2},
+        {2.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f, -0.500000f, 0.162460f,
+         -0.500000f, 0.688191f,  7.000000f,  0.000000f,  -0.500000f, -0.688191f, -0.500000f, -0.162460f,
+         -0.500000f, 0.162460f,  -0.500000f, 0.688191f,  12.000000f, 0.000000f,  -0.500000f, -0.688191f,
+         -0.500000f, -0.162460f, -0.500000f, 0.162460f,  -0.500000f, 0.688191f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_onesided) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_inverse_onesided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{2, 3, 2},
+                               {6.000000f,
+                                0.000000f,
+                                -2.000000f,
+                                2.000000f,
+                                -2.000000f,
+                                0.000000f,
+                                22.000000f,
+                                0.000000f,
+                                -2.000000f,
+                                2.000000f,
+                                -2.000000f,
+                                0.000000f});
+    test_case.add_expected_output<float>(
+        Shape{2, 4},
+        {0.000000f, 1.000000f, 2.000000f, 3.000000f, 4.000000f, 5.000000f, 6.000000f, 7.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_onesided_real_input) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_inverse_onesided_real_input.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{2, 3, 1}, {1.000000f, 0.000000f, -1.000000f, 0.5000000f, -0.5000000f, 0.000000f});
+    test_case.add_expected_output<float>(Shape{2, 3, 1},
+                                         {0.750000f, -0.250000f, -0.500000f, 0.250000f, 0.250000f, -0.500000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inversed_length_provided) {
+    auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                                        SERIALIZED_ZOO,
+                                                                        "onnx/dft_inversed_lenght_provided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{3, 5, 2}, {0.000000f,  0.000000f, 1.000000f,  0.000000f, 2.000000f,  0.000000f,
+                                                3.000000f,  0.000000f, 4.000000f,  0.000000f, 5.000000f,  0.000000f,
+                                                6.000000f,  0.000000f, 7.000000f,  0.000000f, 8.000000f,  0.000000f,
+                                                9.000000f,  0.000000f, 10.000000f, 0.000000f, 11.000000f, 0.000000f,
+                                                12.000000f, 0.000000f, 13.000000f, 0.000000f, 14.000000f, 0.000000f});
+    test_case.add_expected_output<float>(
+        Shape{1, 5, 2},
+        {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f, 3.000000f, 0.000000f, 4.000000f, 0.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dft_inverse_length_provided_onesided) {
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/dft_inverse_lenght_provided_onesided.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(Shape{1, 3, 2}, {0.000000f, 0.000000f, 1.000000f, 0.000000f, 2.000000f, 0.000000f});
+    test_case.add_expected_output<float>(Shape{4, 3},
+                                         {0.000000f,
+                                          1.000000f,
+                                          2.000000f,
+                                          3.000000f,
+                                          4.000000f,
+                                          5.000000f,
+                                          6.000000f,
+                                          7.000000f,
+                                          8.000000f,
+                                          9.000000f,
+                                          10.000000f,
+                                          11.000000f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_onesided_real_input_no_window_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_onesided_real_input_no_window_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{1, 128, 1};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_expected_output<float>(
+        Shape{1, 15, 9, 2},
+        {120.0f,  0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,
+         -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       248.0f,  0.0f,
+         -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       376.0f,  0.0f,       -8.0f,   40.218716f,
+         -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f, -8.0f,   0.0f,       504.0f,  0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,
+         -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f,
+         -8.0f,   0.0f,       632.0f,  0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f,
+         -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,
+         760.0f,  0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,
+         -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       888.0f,  0.0f,
+         -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       1016.0f, 0.0f,       -8.0f,   40.218716f,
+         -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f, -8.0f,   0.0f,       1144.0f, 0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,
+         -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f,
+         -8.0f,   0.0f,       1272.0f, 0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f,
+         -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,
+         1400.0f, 0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,
+         -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       1528.0f, 0.0f,
+         -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f,       1656.0f, 0.0f,       -8.0f,   40.218716f,
+         -8.0f,   19.31371f,  -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f, -8.0f,   0.0f,       1784.0f, 0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,
+         -8.0f,   11.972846f, -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f,
+         -8.0f,   0.0f,       1912.0f, 0.0f,       -8.0f,   40.218716f, -8.0f,   19.31371f,  -8.0f,   11.972846f,
+         -8.0f,   8.0f,       -8.0f,   5.3454294f, -8.0f,   3.3137083f, -8.0f,   1.5912971f, -8.0f,   0.0f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_real_input_no_window_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_real_input_no_window_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{1, 128, 1};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_expected_output<float>(
+        Shape{1, 15, 16, 2},
+        {120.0f,  0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,
+         -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f,
+         -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,
+         -8.0f,   -40.218716f, 248.0f,  0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,
+         -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,
+         -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f,
+         -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 376.0f,  0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,
+         -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,
+         -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,
+         -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 504.0f,  0.0f,        -8.0f,   40.218716f,
+         -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f,
+         -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 632.0f,  0.0f,
+         -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f,
+         -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f,
+         760.0f,  0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,
+         -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f,
+         -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,
+         -8.0f,   -40.218716f, 888.0f,  0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,
+         -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,
+         -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f,
+         -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1016.0f, 0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,
+         -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,
+         -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,
+         -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1144.0f, 0.0f,        -8.0f,   40.218716f,
+         -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f,
+         -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1272.0f, 0.0f,
+         -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f,
+         -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f,
+         1400.0f, 0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,
+         -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f,
+         -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,
+         -8.0f,   -40.218716f, 1528.0f, 0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,
+         -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,
+         -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f,
+         -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1656.0f, 0.0f,        -8.0f,   40.218716f,  -8.0f,   19.31371f,
+         -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,  -8.0f,   1.5912971f,
+         -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f, -8.0f,   -8.0f,
+         -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1784.0f, 0.0f,        -8.0f,   40.218716f,
+         -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,  -8.0f,   3.3137083f,
+         -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f, -8.0f,   -5.3454294f,
+         -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f, 1912.0f, 0.0f,
+         -8.0f,   40.218716f,  -8.0f,   19.31371f,   -8.0f,   11.972846f,  -8.0f,   8.0f,        -8.0f,   5.3454294f,
+         -8.0f,   3.3137083f,  -8.0f,   1.5912971f,  -8.0f,   0.0f,        -8.0f,   -1.5912971f, -8.0f,   -3.3137083f,
+         -8.0f,   -5.3454294f, -8.0f,   -8.0f,       -8.0f,   -11.972846f, -8.0f,   -19.31371f,  -8.0f,   -40.218716f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_complex_input_no_window_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_complex_input_no_window_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{1, 128, 2};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_expected_output<float>(Shape{1, 15, 16, 2},
+                                         {240.0f,
+                                          256.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          496.0f,
+                                          512.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          752.0f,
+                                          768.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          1008.0f,
+                                          1024.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          1264.0f,
+                                          1280.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          1520.0f,
+                                          1536.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          1776.0f,
+                                          1792.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          2032.0f,
+                                          2048.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          2288.0f,
+                                          2304.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          2544.0f,
+                                          2560.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          2800.0f,
+                                          2816.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          3056.0f,
+                                          3072.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          3312.0f,
+                                          3328.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          3568.0f,
+                                          3584.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f,
+                                          3824.0f,
+                                          3840.0f,
+                                          -96.43743187401357f,
+                                          64.43743187401357f,
+                                          -54.62741699796952f,
+                                          22.627416997969522f,
+                                          -39.94569220264782f,
+                                          7.945692202647823f,
+                                          -32.0f,
+                                          0.0f,
+                                          -26.69085820670878f,
+                                          -5.3091417932912215f,
+                                          -22.627416997969522f,
+                                          -9.372583002030478f,
+                                          -19.182597878074528f,
+                                          -12.817402121925468f,
+                                          -16.0f,
+                                          -16.0f,
+                                          -12.817402121925468f,
+                                          -19.182597878074528f,
+                                          -9.372583002030478f,
+                                          -22.627416997969522f,
+                                          -5.3091417932912215f,
+                                          -26.69085820670878f,
+                                          0.0f,
+                                          -32.0f,
+                                          7.945692202647823f,
+                                          -39.94569220264782f,
+                                          22.627416997969522f,
+                                          -54.62741699796952f,
+                                          64.43743187401357f,
+                                          -96.43743187401357f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_complex_input_no_window_default_length_2) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_complex_input_no_window_default_length_2.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{2, 64, 2};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_expected_output<float>(Shape{2, 8, 8, 2},
+                                         {56.0f,   64.0f,   -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          184.0f,  192.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          312.0f,  320.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          440.0f,  448.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          568.0f,  576.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          696.0f,  704.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          824.0f,  832.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          952.0f,  960.0f,  -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1080.0f, 1088.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1208.0f, 1216.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1336.0f, 1344.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1464.0f, 1472.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1592.0f, 1600.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1720.0f, 1728.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1848.0f, 1856.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f,  -27.31370849898476f,
+                                          1976.0f, 1984.0f, -27.31370849898476f,  11.313708498984761f,
+                                          -16.0f,  0.0f,    -11.313708498984761f, -4.686291501015239f,
+                                          -8.0f,   -8.0f,   -4.686291501015239f,  -11.313708498984761f,
+                                          0.0f,    -16.0f,  11.313708498984761f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_complex_input_no_window_no_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_complex_input_no_window_no_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{4, 32, 2};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_expected_output<float>(Shape{4, 2, 24, 2},
+                                         {552.0f,
+                                          576.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          936.0f,
+                                          960.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          2088.0f,
+                                          2112.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          2472.0f,
+                                          2496.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          3624.0f,
+                                          3648.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          4008.0f,
+                                          4032.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          5160.0f,
+                                          5184.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f,
+                                          5544.0f,
+                                          5568.0f,
+                                          -206.29809870540362f,
+                                          158.29809870540362f,
+                                          -113.56921938165306f,
+                                          65.56921938165306f,
+                                          -81.94112549695429f,
+                                          33.941125496954285f,
+                                          -65.56921938165306f,
+                                          17.569219381653056f,
+                                          -55.277408948188935f,
+                                          7.277408948188935f,
+                                          -48.0f,
+                                          0.0f,
+                                          -42.41584771149505f,
+                                          -5.584152288504953f,
+                                          -37.856406460551014f,
+                                          -10.143593539448982f,
+                                          -33.941125496954285f,
+                                          -14.058874503045715f,
+                                          -30.430780618346944f,
+                                          -17.569219381653056f,
+                                          -27.159659942097512f,
+                                          -20.840340057902488f,
+                                          -24.0f,
+                                          -24.0f,
+                                          -20.840340057902495f,
+                                          -27.159659942097505f,
+                                          -17.569219381653056f,
+                                          -30.430780618346944f,
+                                          -14.058874503045715f,
+                                          -33.941125496954285f,
+                                          -10.143593539448982f,
+                                          -37.856406460551014f,
+                                          -5.584152288504953f,
+                                          -42.41584771149505f,
+                                          0.0f,
+                                          -48.0f,
+                                          7.277408948188942f,
+                                          -55.27740894818894f,
+                                          17.569219381653056f,
+                                          -65.56921938165306f,
+                                          33.941125496954285f,
+                                          -81.94112549695429f,
+                                          65.56921938165306f,
+                                          -113.56921938165306f,
+                                          158.29809870540362f,
+                                          -206.29809870540362f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_complex_input_given_window_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_complex_input_given_window_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{3, 32, 2};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_input<float>(Shape{8},
+                               {1.0f, 0.8535616f, 0.5000232f, 0.1464712f, 0.0f, 0.1464057f, 0.49993038f, 0.853496f});
+    test_case.add_expected_output<float>(
+        Shape{3, 7, 8, 2},
+        {23.9982088804245f,    27.99809694290161f,   -3.6551388463010377f, 21.654558190325933f,  -17.655848562717438f,
+         1.6573804020881653f,  -11.656437880922656f, -4.342796101262486f,  -7.999694228172302f,  -7.999675154685974f,
+         -4.342819217778247f,  -11.656414517382817f, 1.6573339104652405f,  -17.655802190303802f, 19.65439594500194f,
+         -1.6553475716806292f, 55.997313380241394f,  59.997201442718506f,  12.346159116290885f,  37.6528883872892f,
+         -17.65547662973404f,  1.6577513813972473f,  -11.65625294879657f,  -4.342609192943764f,  -7.999541640281677f,
+         -7.999522566795349f,  -4.342632309459525f,  -11.656229585256732f, 1.6577048897743225f,  -17.655430257320404f,
+         35.65272614196521f,   14.345950390911293f,  87.99641788005829f,   91.9963059425354f,    28.347457078882808f,
+         53.651218584252476f,  -17.65510469675064f,  1.6581223607063293f,  -11.656068016670485f, -4.342422284625041f,
+         -7.999389052391052f,  -7.999369978904724f,  -4.342445401140804f,  -11.65604465313065f,  1.6580758690834045f,
+         -17.655058324337006f, 51.651056338928484f,  30.347248353503215f,  119.99552237987518f,  123.9954104423523f,
+         44.34875504147473f,   69.64954878121574f,   -17.654732763767242f, 1.6584933400154114f,  -11.6558830845444f,
+         -4.34223537630632f,   -7.999236464500427f,  -7.999217391014099f,  -4.342258492822079f,  -11.65585972100456f,
+         1.6584468483924866f,  -17.654686391353607f, 67.64938653589175f,   46.34854631609514f,   151.99462687969208f,
+         155.9945149421692f,   60.35005300406665f,   85.64787897817901f,   -17.654360830783844f, 1.6588643193244934f,
+         -11.65569815241831f,  -4.3420484679875955f, -7.999083876609802f,  -7.999064803123474f,  -4.342071584503358f,
+         -11.655674788878471f, 1.6588178277015686f,  -17.65431445837021f,  83.64771673285502f,   62.34984427868706f,
+         183.99373137950897f,  187.99361944198608f,  76.35135096665857f,   101.64620917514227f,  -17.653988897800446f,
+         1.6592352986335754f,  -11.655513220292228f, -4.341861559668871f,  -7.998931288719177f,  -7.998912215232849f,
+         -4.341884676184634f,  -11.65548985675239f,  1.6591888070106506f,  -17.65394252538681f,  99.64604692981828f,
+         78.35114224127898f,   215.99283587932587f,  219.99272394180298f,  92.3526489292505f,    117.64453937210556f,
+         -17.653616964817047f, 1.6596062779426575f,  -11.655328288166146f, -4.341674651350154f,  -7.998778700828552f,
+         -7.998759627342224f,  -4.3416977678659165f, -11.655304924626307f, 1.6595597863197327f,  -17.653570592403412f,
+         115.64437712678156f,  94.35244020387091f,   279.99104487895966f,  283.99093294143677f,  124.35524485443435f,
+         149.6411997660321f,   -17.65287309885025f,  1.6603482365608215f,  -11.654958423913968f, -4.341300834712712f,
+         -7.998473525047302f,  -7.998454451560974f,  -4.341323951228475f,  -11.654935060374129f, 1.6603017449378967f,
+         -17.652826726436615f, 147.6410375207081f,   126.35503612905475f,  311.99014937877655f,  315.99003744125366f,
+         140.35654281702625f,  165.63952996299537f,  -17.652501165866852f, 1.6607192158699036f,  -11.654773491787878f,
+         -4.341113926393987f,  -7.998320937156677f,  -7.998301863670349f,  -4.34113704290975f,   -11.65475012824804f,
+         1.6606727242469788f,  -17.652454793453217f, 163.63936771767135f,  142.35633409164666f,  343.98925387859344f,
+         347.98914194107056f,  156.3578407796182f,   181.63786015995862f,  -17.652129232883453f, 1.6610901951789856f,
+         -11.654588559661818f, -4.340927018075263f,  -7.998168349266052f,  -7.998149275779724f,  -4.340950134591026f,
+         -11.654565196121965f, 1.6610437035560608f,  -17.652082860469818f, 179.63769791463466f,  158.3576320542386f,
+         375.98835837841034f,  379.98824644088745f,  172.35913874221012f,  197.63619035692193f,  -17.651757299900055f,
+         1.6614611744880676f,  -11.654403627535729f, -4.340740109756538f,  -7.998015761375427f,  -7.997996687889099f,
+         -4.340763226272301f,  -11.65438026399589f,  1.6614146828651428f,  -17.65171092748642f,  195.6360281115979f,
+         174.35893001683053f,  407.98746287822723f,  411.98735094070435f,  188.36043670480203f,  213.63452055388518f,
+         -17.651385366916656f, 1.6618321537971497f,  -11.65421869540964f,  -4.340553201437814f,  -7.997863173484802f,
+         -7.997844099998474f,  -4.340576317953577f,  -11.6541953318698f,   1.6617856621742249f,  -17.65133899450302f,
+         211.6343583085612f,   190.36022797942243f,  439.9865673780441f,   443.98645544052124f,  204.36173466739393f,
+         229.63285075084843f,  -17.651013433933258f, 1.6622031331062317f,  -11.65403376328355f,  -4.340366293119089f,
+         -7.997710585594177f,  -7.997691512107849f,  -4.340389409634852f,  -11.654010399743711f, 1.6621566414833069f,
+         -17.650967061519623f, 227.63268850552447f,  206.36152594201434f,  471.985671877861f,    475.98555994033813f,
+         220.36303262998587f,  245.6311809478117f,   -17.65064150094986f,  1.6625741124153137f,  -11.653848831157461f,
+         -4.340179384800379f,  -7.997557997703552f,  -7.997538924217224f,  -4.340202501316128f,  -11.653825467617622f,
+         1.662527620792389f,   -17.650595128536224f, 243.63101870248772f,  222.36282390460627f,  535.9838808774948f,
+         539.9837689399719f,   252.3656285551697f,   277.62784134173825f,  -17.649897634983063f, 1.6633160710334778f,
+         -11.653478966905283f, -4.339805568162916f,  -7.997252821922302f,  -7.997233748435974f,  -4.339828684678679f,
+         -11.653455603365444f, 1.663269579410553f,   -17.649851262569427f, 275.6276790964142f,   254.3654198297901f,
+         567.9829853773117f,   571.9828734397888f,   268.3669265177616f,   293.62617153870156f,  -17.649525701999664f,
+         1.6636870503425598f,  -11.653294034779236f, -4.339618659844206f,  -7.997100234031677f,  -7.997081160545349f,
+         -4.339641776359969f,  -11.653270671239369f, 1.663640558719635f,   -17.64947932958603f,  291.6260092933776f,
+         270.366717792382f,    599.9820898771286f,   603.9819779396057f,   284.36822448035355f,  309.6245017356648f,
+         -17.649153769016266f, 1.6640580296516418f,  -11.653109102653133f, -4.339431751525495f,  -7.996947646141052f,
+         -7.996928572654724f,  -4.33945486804123f,   -11.653085739113294f, 1.664011538028717f,   -17.64910739660263f,
+         307.62433949034084f,  286.36801575497395f,  631.9811943769455f,   635.9810824394226f,   300.3695224429455f,
+         325.62283193262806f,  -17.648781836032867f, 1.6644290089607239f,  -11.652924170527058f, -4.339244843206757f,
+         -7.996795058250427f,  -7.996775984764099f,  -4.33926795972252f,   -11.65290080698719f,  1.664382517337799f,
+         -17.648735463619232f, 323.6226696873041f,   302.3693137175659f,   663.9802988767624f,   667.9801869392395f,
+         316.3708204055374f,   341.6211621295913f,   -17.64840990304947f,  1.664799988269806f,   -11.652739238400954f,
+         -4.3390579348880465f, -7.996642470359802f,  -7.996623396873474f,  -4.339081051403809f,  -11.652715874861116f,
+         1.664753496646881f,   -17.648363530635834f, 339.62099988426735f,  318.3706116801578f,   695.9794033765793f,
+         699.9792914390564f,   332.37211836812935f,  357.61949232655456f,  -17.64803797006607f,  1.665170967578888f,
+         -11.65255430627488f,  -4.338871026569308f,  -7.996489882469177f,  -7.996470808982849f,  -4.338894143085071f,
+         -11.652530942735012f, 1.6651244759559631f,  -17.647991597652435f, 355.6193300812306f,   334.37190964274976f,
+         727.9785078763962f,   731.9783959388733f,   348.3734163307213f,   373.6178225235179f,   -17.647666037082672f,
+         1.66554194688797f,    -11.652369374148776f, -4.3386841182505975f, -7.996337294578552f,  -7.996318221092224f,
+         -4.33870723476636f,   -11.652346010608937f, 1.6654954552650452f,  -17.647619664669037f, 371.61766027819385f,
+         350.3732076053417f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_no_onesided_complex_input_given_window_no_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_no_onesided_complex_input_given_window_no_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{2, 48, 2};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_input<float>(Shape{10},
+                               {1.0f,
+                                0.90451396f,
+                                0.6545261f,
+                                0.345518f,
+                                0.095513284f,
+                                0.0f,
+                                0.095458746f,
+                                0.34542978f,
+                                0.654438f,
+                                0.9044596f});
+    test_case.add_expected_output<float>(
+        Shape{2, 4, 10, 2},
+        {39.99714946746826f,   44.997006952762604f,  -3.8268060472046876f, 36.32573598813075f,   -26.390974611367387f,
+         6.3932079238754085f,  -17.885399480520576f, -2.113604756384621f,  -13.440595214714953f, -6.558716539769538f,
+         -9.999701499938965f,  -9.999686658382416f,  -6.558733350911233f,  -13.440578420114413f, -2.1136289848897007f,
+         -17.88537512225368f,  6.393153709525318f,   -26.390919916754058f, 33.82553601255393f,   -1.327069451110034f,
+         159.99372911453247f,  164.9935865998268f,   56.17799336663901f,   96.31941429440245f,   -26.389673466965206f,
+         6.3945205945953365f,  -17.884817996398652f, -2.113020157979113f,  -13.440191747314223f, -6.558313469356559f,
+         -9.999345302581787f,  -9.999330461025238f,  -6.558330280498261f,  -13.440174952713676f, -2.113044386484189f,
+         -17.88479363813176f,  6.394466380245246f,   -26.38961877235188f,  93.81921431882563f,   58.677729962733665f,
+         279.9903087615967f,   284.990166246891f,    116.1827927804827f,   156.31309260067414f,  -26.38837232256303f,
+         6.395833265315265f,   -17.88423651227673f,  -2.1124355595736013f, -13.4397882799135f,   -6.557910398943584f,
+         -9.99898910522461f,   -9.99897426366806f,   -6.557927210085282f,  -13.439771485312946f, -2.112459788078681f,
+         -17.88421215400984f,  6.3957790509651735f,  -26.3883176279497f,   153.81289262509733f,  118.68252937657735f,
+         399.9868884086609f,   404.98674589395523f,  176.18759219432638f,  216.30677090694581f,  -26.38707117816086f,
+         6.397145936035198f,   -17.88365502815482f,  -2.1118509611680913f, -13.43938481251277f,  -6.557507328530605f,
+         -9.998632907867432f,  -9.998618066310883f,  -6.557524139672296f,  -13.43936801791223f,  -2.1118751896731798f,
+         -17.883630669887918f, 6.397091721685095f,   -26.387016483547516f, 213.806570931369f,    178.68732879042108f,
+         519.9834680557251f,   524.9833255410194f,   236.19239160817006f,  276.3004492132175f,   -26.385770033758675f,
+         6.398458606755116f,   -17.8830735440329f,   -2.1112663627625903f, -13.43898134511204f,  -6.5571042581176044f,
+         -9.998276710510254f,  -9.998261868953705f,  -6.55712106925931f,   -13.438964550511457f, -2.1112905912676663f,
+         -17.883049185766005f, 6.398404392405028f,   -26.385715339145342f, 273.8002492376407f,   238.69212820426475f,
+         639.9800477027893f,   644.9799051880836f,   296.1971910220138f,   336.2941275194893f,   -26.384468889356498f,
+         6.399771277475056f,   -17.882492059910966f, -2.1106817643570555f, -13.438577877711282f, -6.556701187704647f,
+         -9.997920513153076f,  -9.997905671596527f,  -6.556717998846352f,  -13.438561083110784f, -2.1107059928621457f,
+         -17.88246770164406f,  6.399717063124951f,   -26.384414194743158f, 333.7939275439124f,   298.6969276181085f,
+         759.9766273498535f,   764.9764848351479f,   356.2019904358574f,   396.28780582576087f,  -26.38316774495431f,
+         6.401083948194983f,   -17.88191057578905f,  -2.1100971659515597f, -13.438174410310552f, -6.556298117291675f,
+         -9.997564315795898f,  -9.99754947423935f,   -6.556314928433352f,  -13.438157615710026f, -2.1101213944566375f,
+         -17.88188621752216f,  6.401029733844893f,   -26.38311305034098f,  393.78760585018404f,  358.70172703195215f,
+         879.9732069969177f,   884.9730644822121f,   416.2067898497012f,   456.2814841320326f,   -26.38186660055215f,
+         6.402396618914892f,   -17.881329091667133f, -2.109512567546055f,  -13.437770942909793f, -6.555895046878646f,
+         -9.99720811843872f,   -9.997193276882172f,  -6.55591185802038f,   -13.437754148309267f, -2.1095367960511293f,
+         -17.881304733400242f, 6.402342404564804f,   -26.381811905938818f, 453.78128415645574f,  418.7065264457958f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_onesided_real_input_given_window_no_default_length) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                             SERIALIZED_ZOO,
+                             "onnx/stft_onesided_real_input_given_window_no_default_length.onnx"));
+    auto test_case = test::TestCase(function, s_device);
+    const Shape signal_shape{2, 32, 1};
+    std::vector<float> signal(ov::shape_size(signal_shape));
+    std::iota(std::begin(signal), std::end(signal), 0.f);
+    test_case.add_input<float>(signal_shape, signal);
+    test_case.add_input<float>(Shape{16},
+                               {1.0f,
+                                0.96194196f,
+                                0.8535616f,
+                                0.6913578f,
+                                0.5000232f,
+                                0.308685f,
+                                0.1464712f,
+                                0.038075745f,
+                                0.0f,
+                                0.03804031f,
+                                0.1464057f,
+                                0.3085994f,
+                                0.49993038f,
+                                0.6912722f,
+                                0.853496f,
+                                0.96190655f});
+    test_case.add_expected_output<float>(
+        Shape{2, 3, 9, 2},
+        {55.996273f,  0.0f,       23.999104f,  24.93398f,  -7.99869f,   22.70421f,  -7.999475f,  12.814739f,
+         -7.999694f,  8.329526f,  -7.999781f,  5.5011215f, -7.999824f,  3.3910275f, -7.9998474f, 1.6240749f,
+         -7.999859f,  0.0f,       119.99441f,  0.0f,       55.998657f,  24.931015f, -7.9980354f, 22.704208f,
+         -7.9992137f, 12.814741f, -7.999542f,  8.329527f,  -7.9996705f, 5.5011196f, -7.999737f,  3.3910284f,
+         -7.999769f,  1.6240768f, -7.9997787f, 0.0f,       183.99254f,  0.0f,       87.998215f,  24.928047f,
+         -7.9973803f, 22.70421f,  -7.9989514f, 12.814744f, -7.9993896f, 8.329525f,  -7.999561f,  5.5011206f,
+         -7.999648f,  3.3910275f, -7.9996986f, 1.6240759f, -7.99971f,   0.0f,       311.98883f,  0.0f,
+         151.99731f,  24.922113f, -7.9960704f, 22.704214f, -7.998431f,  12.814743f, -7.9990845f, 8.329521f,
+         -7.999341f,  5.5011144f, -7.999474f,  3.3910275f, -7.999542f,  1.6240788f, -7.9995728f, 0.0f,
+         375.98694f,  0.0f,       183.99686f,  24.919151f, -7.9954133f, 22.704203f, -7.9981623f, 12.814751f,
+         -7.998932f,  8.329529f,  -7.9992285f, 5.501122f,  -7.9993834f, 3.3910313f, -7.9994736f, 1.6240854f,
+         -7.9994965f, 0.0f,       439.98508f,  0.0f,       215.9964f,   24.916183f, -7.99476f,   22.704206f,
+         -7.9979024f, 12.814749f, -7.9987793f, 8.329529f,  -7.999122f,  5.5011153f, -7.9992967f, 3.391035f,
+         -7.999382f,  1.6240807f, -7.99942f,   0.0f});
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_proper_exception_if_non_const_frame_step) {
+    try {
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/stft_non_const_frame_step.onnx"));
+        FAIL() << "Unknown error during STFT import";
+    } catch (const std::runtime_error& exc) {
+        std::string msg{exc.what()};
+        EXPECT_TRUE(msg.find("frame_step input must be a scalar or Shape{1} constant.") != std::string::npos);
+    }
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_proper_exception_if_dynamic_singal_shape) {
+    try {
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/stft_dynamic_signal_shape.onnx"));
+        FAIL() << "Unknown error during STFT import";
+    } catch (const std::runtime_error& exc) {
+        std::string msg{exc.what()};
+        EXPECT_TRUE(msg.find("Shape of signal input must be static with the rank equal to 3.") != std::string::npos);
+    }
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_proper_exception_if_non_const_frame_length) {
+    try {
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/stft_non_const_frame_length.onnx"));
+        FAIL() << "Unknown error during STFT import";
+    } catch (const std::runtime_error& exc) {
+        std::string msg{exc.what()};
+        EXPECT_TRUE(msg.find("frame_length input must be a scalar or Shape{1} constant.") != std::string::npos);
+    }
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_stft_proper_exception_if_complex_signal_and_onesided) {
+    try {
+        onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(),
+                                                            SERIALIZED_ZOO,
+                                                            "onnx/stft_onesided_complex_input.onnx"));
+        FAIL() << "Unknown error during STFT import";
+    } catch (const std::runtime_error& exc) {
+        std::string msg{exc.what()};
+        EXPECT_TRUE(msg.find("If attribute onesided==1, signal input can NOT be complex.") != std::string::npos);
+    }
+}

From 8d90c11a356ad75bf3d02e670d81f36fbf28ce75 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 28 Mar 2023 20:47:35 +0200
Subject: [PATCH 139/296] Fix sporadic fails when beta==0 in baddmm (#16610)

* Fix sporadic fails when beta==0 in baddmm

* Remove sporadic test loop
---
 tests/layer_tests/pytorch_tests/test_addmm.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/layer_tests/pytorch_tests/test_addmm.py b/tests/layer_tests/pytorch_tests/test_addmm.py
index 791732f18efa0f..f73894766e812f 100644
--- a/tests/layer_tests/pytorch_tests/test_addmm.py
+++ b/tests/layer_tests/pytorch_tests/test_addmm.py
@@ -83,8 +83,14 @@ def forward(self, m0, m1, m2):
 
     ])
     @pytest.mark.parametrize("alpha,beta",
-                             [(1., 1.), (0., 1.), (1., 0.), (1., 2.), (2., 1.), (-5., -6.), (3., 4.), (0.5, 0.75),
-                              (1, 1)])
+                             [  # beta==0 in some cases produce nan in pytorch
+                                 (1., 1.),
+                                 (0., 1.),
+                                 (-5., -6.),
+                                 (3., 4.),
+                                 (0.5, 0.75),
+                                 (1, 1)
+                             ])
     @pytest.mark.nightly
     @pytest.mark.precommit
     def test_baddbmm(self, kwargs_to_prepare_input, alpha, beta, ie_device, precision, ir_version):

From 796bd989136bec1f029560e225d396a31ae01dd4 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Tue, 28 Mar 2023 21:10:08 +0200
Subject: [PATCH 140/296] Review convolution classes for shape inference
 aspects (#16375)

* Review adaptive max pool shape inference

* Review AvgPool and MaxPool

* Review convolution operator

* Review GroupConvolution shape inference

* Review ConvolutionBackpropData operator

* Review GroupConvolutionBackpropData op

* Review BinaryConvolution operator
- add common bases for convolution ops
- refactor convolution ops

* Review DeformableConvolution operator

* Use new convolution shape_infer in GPU

* Fix build and test issues

* Correct set output spatial shape
in default constructed back prop convolutions

* The convolution shape_infer use pads as parameters
the external padding can be operators or other class padding properties shape_infer should not modify operators padding when
called from plugin

* Apply code formatting

* Fix padding validation and update

* Use shape inference with padding instead fallback
for DeformableConvolution from opset1

* Update convertPadding function to be template
---
 .../openvino/op/binary_convolution.hpp        |  51 +-
 src/core/include/openvino/op/convolution.hpp  | 174 +---
 .../openvino/op/deformable_convolution.hpp    |   1 +
 src/core/include/openvino/op/group_conv.hpp   | 172 +---
 .../op/util/convolution_backprop_base.hpp     |  64 ++
 .../openvino/op/util/convolution_base.hpp     | 144 ++++
 .../op/util/deformable_convolution_base.hpp   |  39 +-
 .../binary_convolution_shape_inference.hpp    |  25 +
 .../convolution_backprop_shape_inference.hpp  |  80 ++
 ...volution_backprop_shape_inference_util.hpp | 191 +++++
 .../include/convolution_shape_inference.hpp   | 755 +-----------------
 .../convolution_shape_inference_util.hpp      | 357 +++++++++
 ...deformable_convolution_shape_inference.hpp | 246 ++++++
 .../include/dimension_util.hpp                |  98 ++-
 ...p_convolution_backprop_shape_inference.hpp | 118 +++
 .../group_convolution_shape_inference.hpp     |  87 ++
 .../include/max_pool_shape_inference.hpp      |   3 +-
 .../include/pooling_shape_inference_util.hpp  |  32 +-
 src/core/src/op/binary_convolution.cpp        |  67 +-
 src/core/src/op/convolution.cpp               | 143 ++--
 src/core/src/op/deformable_convolution.cpp    | 138 ++--
 src/core/src/op/group_conv.cpp                | 155 ++--
 .../op/util/deformable_convolution_base.cpp   | 165 +---
 .../tests/type_prop/binary_convolution.cpp    | 149 +++-
 src/core/tests/type_prop/convolution.cpp      | 210 ++++-
 .../type_prop/convolution_backprop_data.cpp   | 112 ++-
 .../type_prop/deformable_convolution.cpp      | 215 +++--
 .../deformable_convolution_opset8.cpp         | 229 +++---
 .../tests/type_prop/group_convolution.cpp     | 178 +++--
 .../group_convolution_backprop_data.cpp       | 108 ++-
 .../utils/shape_inference/shape_inference.cpp | 118 +--
 ...inary_convolution_shape_inference_test.cpp | 117 +++
 ...volution_backprop_shape_inference_test.cpp | 147 ++++
 .../convolution_shape_inference.cpp           | 156 ----
 .../convolution_shape_inference_test.cpp      | 114 +++
 ...mable_convolution_shape_inference_test.cpp | 146 ++++
 ...volution_backprop_shape_inference_test.cpp | 128 +++
 ...group_convolution_shape_inference_test.cpp | 130 +++
 .../intel_gpu/src/graph/convolution.cpp       |  22 +-
 .../intel_gpu/src/graph/deconvolution.cpp     |  59 +-
 40 files changed, 3364 insertions(+), 2279 deletions(-)
 create mode 100644 src/core/include/openvino/op/util/convolution_backprop_base.hpp
 create mode 100644 src/core/include/openvino/op/util/convolution_base.hpp
 create mode 100644 src/core/shape_inference/include/binary_convolution_shape_inference.hpp
 create mode 100644 src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
 create mode 100644 src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
 create mode 100644 src/core/shape_inference/include/convolution_shape_inference_util.hpp
 create mode 100644 src/core/shape_inference/include/deformable_convolution_shape_inference.hpp
 create mode 100644 src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
 create mode 100644 src/core/shape_inference/include/group_convolution_shape_inference.hpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
 delete mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference.cpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
 create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp

diff --git a/src/core/include/openvino/op/binary_convolution.hpp b/src/core/include/openvino/op/binary_convolution.hpp
index 8cdcd91e1b03a2..c327df0181297f 100644
--- a/src/core/include/openvino/op/binary_convolution.hpp
+++ b/src/core/include/openvino/op/binary_convolution.hpp
@@ -7,6 +7,7 @@
 #include "openvino/core/coordinate_diff.hpp"
 #include "openvino/op/op.hpp"
 #include "openvino/op/util/attr_types.hpp"
+#include "openvino/op/util/convolution_base.hpp"
 
 namespace ov {
 namespace op {
@@ -14,9 +15,9 @@ namespace v1 {
 /// \brief BinaryConvolution operation.
 ///
 /// \ingroup ov_ops_cpp_api
-class OPENVINO_API BinaryConvolution : public Op {
+class OPENVINO_API BinaryConvolution : public util::ConvolutionFwdPropBase {
 public:
-    OPENVINO_OP("BinaryConvolution", "opset1", op::Op);
+    OPENVINO_OP("BinaryConvolution", "opset1", op::util::ConvolutionFwdPropBase);
 
     enum class BinaryConvolutionMode {
         // Interpret input data and kernel values: 0 as -1, 1 as 1
@@ -63,46 +64,6 @@ class OPENVINO_API BinaryConvolution : public Op {
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
-    /// \return The strides.
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    /// \return The dilations.
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    /// \return The padding-below sizes (possibly negative).
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    /// \return The padding-above sizes (possibly negative).
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    OPENVINO_DEPRECATED("This method is deprecated and will be removed soon. Please use set_pads_end instead.")
-    void set_adding_above(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    /// \return The pad type for convolution.
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
     /// \return The mode of convolution.
     const BinaryConvolutionMode& get_mode() const {
         return m_mode;
@@ -120,13 +81,9 @@ class OPENVINO_API BinaryConvolution : public Op {
 
 protected:
     BinaryConvolutionMode mode_from_string(const std::string& mode) const;
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
+
     BinaryConvolutionMode m_mode;
     float m_pad_value;
-    PadType m_auto_pad;
 };
 }  // namespace v1
 }  // namespace op
diff --git a/src/core/include/openvino/op/convolution.hpp b/src/core/include/openvino/op/convolution.hpp
index f340fbb544c8c5..de40ae07605ea9 100644
--- a/src/core/include/openvino/op/convolution.hpp
+++ b/src/core/include/openvino/op/convolution.hpp
@@ -7,6 +7,8 @@
 #include "openvino/core/coordinate_diff.hpp"
 #include "openvino/op/op.hpp"
 #include "openvino/op/util/attr_types.hpp"
+#include "openvino/op/util/convolution_backprop_base.hpp"
+#include "openvino/op/util/convolution_base.hpp"
 
 namespace ov {
 namespace op {
@@ -14,9 +16,9 @@ namespace v1 {
 /// \brief Batched convolution operation, with optional window dilation and stride.
 ///
 /// \ingroup ov_ops_cpp_api
-class OPENVINO_API Convolution : public Op {
+class OPENVINO_API Convolution : public util::ConvolutionFwdPropBase {
 public:
-    OPENVINO_OP("Convolution", "opset1", op::Op);
+    OPENVINO_OP("Convolution", "opset1", op::util::ConvolutionFwdPropBase);
 
     /// \brief Constructs a batched convolution operation.
     Convolution() = default;
@@ -51,85 +53,13 @@ class OPENVINO_API Convolution : public Op {
     bool visit_attributes(AttributeVisitor& visitor) override;
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-
-    /// \return The strides.
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    /// \return The dilations.
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    /// \return The padding-below sizes (possibly negative).
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    /// \return The padding-above sizes (possibly negative).
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    OPENVINO_DEPRECATED("This method is deprecated and will be removed soon. Please use set_pads_end instead.")
-    void set_adding_above(const CoordinateDiff& pads_end) {
-        set_pads_end(pads_end);
-    }
-    /// \return The pad type for convolution.
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
-
-protected:
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
-    PadType m_auto_pad;
-    int64_t m_num_spatial = -1;
-
-private:
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend void update_and_validate_attributes(ConvType* op, int64_t num_spatial);
-    template <class ConvType, class ShapeType>
-    friend bool resolve_auto_pad_for_shape(const ConvType* op,
-                                           CoordinateDiff& pads_begin,
-                                           CoordinateDiff& pads_end,
-                                           const std::vector<ShapeType>& input_shapes,
-                                           const int64_t& num_non_spatial_data_dims,
-                                           const int64_t& num_non_spatial_filter_dims);
-    template <class T>
-    friend void shape_infer(const Convolution* op,
-                            const CoordinateDiff& pads_begin,
-                            const CoordinateDiff& pads_end,
-                            const std::vector<T>& input_shapes,
-                            std::vector<T>& output_shapes);
 };
 
 /// \brief Data batch backprop for batched convolution operation.
 /// \ingroup ov_ops_cpp_api
-class OPENVINO_API ConvolutionBackpropData : public Op {
+class OPENVINO_API ConvolutionBackpropData : public util::ConvolutionBackPropBase {
 public:
-    OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op);
+    OPENVINO_OP("ConvolutionBackpropData", "opset1", op::util::ConvolutionBackPropBase);
 
     /// \brief Constructs a batched-convolution data batch-backprop operation.
     ConvolutionBackpropData() = default;
@@ -197,48 +127,7 @@ class OPENVINO_API ConvolutionBackpropData : public Op {
     /// \return The output spatial dimensions shape.
     const PartialShape get_output_shape() const;
     void set_output_shape(const Shape& output_shape);
-    /// \return The strides from the forward prop.
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    /// \return The dilations from the forward prop.
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    /// \return The padding-below sizes (possibly negative) from the forward prop.
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    /// \return The padding-above sizes (possibly negative) from the forward prop.
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    /// \return The auto pad.
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
-    /// \return The output padding.
-    const CoordinateDiff& get_output_padding() const {
-        return m_output_padding;
-    }
-    void set_output_padding(const CoordinateDiff& output_padding) {
-        m_output_padding = output_padding;
-    }
+
     /// \brief      Calculates output spatial features size.
     ///
     /// \param[in]  input_data_shape      The input data partial shape
@@ -251,6 +140,7 @@ class OPENVINO_API ConvolutionBackpropData : public Op {
     /// \param      output_spatial_shape  The placeholder for computed output spatial partial
     /// shape.
     ///
+    OPENVINO_DEPRECATED("This member function is deprecated and will be removed soon.")
     void infer_conv_backprop_output_spatial_shape(const std::vector<Dimension>& input_data_shape,
                                                   const std::vector<Dimension>& filters_shape,
                                                   const Strides& strides,
@@ -259,54 +149,6 @@ class OPENVINO_API ConvolutionBackpropData : public Op {
                                                   const CoordinateDiff& pads_end,
                                                   const CoordinateDiff& output_padding,
                                                   std::vector<Dimension>& output_spatial_shape);
-
-protected:
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
-    PadType m_auto_pad;
-    CoordinateDiff m_output_padding;
-
-    int64_t m_num_spatial = -1;
-
-private:
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const PartialShape& output_shapes_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend void update_and_validate_attributes(ConvType* op, int64_t num_spatial);
-    template <class ConvType>
-    friend void update_and_validate_attributes_back_prop(ConvType* op, int64_t num_spatial);
-
-    template <class ConvType, class ShapeType>
-    friend bool resolve_auto_pad_for_shape_back_prop(const ConvType* op,
-                                                     CoordinateDiff& pads_begin,
-                                                     CoordinateDiff& pads_end,
-                                                     const std::vector<ShapeType>& input_shapes,
-                                                     ShapeType& output_spatial_shape,
-                                                     const int64_t& num_non_spatial_data_dims,
-                                                     const int64_t& num_non_spatial_filter_dims);
-
-    template <class T>
-    friend void shape_infer(const ConvolutionBackpropData* op,
-                            const CoordinateDiff& pads_begin,
-                            const CoordinateDiff& pads_end,
-                            const T& output_shape_from_input,
-                            const std::vector<T>& input_shapes,
-                            std::vector<T>& output_shapes);
 };
 }  // namespace v1
 }  // namespace op
diff --git a/src/core/include/openvino/op/deformable_convolution.hpp b/src/core/include/openvino/op/deformable_convolution.hpp
index 993c779e5cb2bd..5d2573ec4bb988 100644
--- a/src/core/include/openvino/op/deformable_convolution.hpp
+++ b/src/core/include/openvino/op/deformable_convolution.hpp
@@ -53,6 +53,7 @@ class OPENVINO_API DeformableConvolution : public op::util::DeformableConvolutio
                           const int64_t deformable_group = 1);
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
 };
 }  // namespace v1
 
diff --git a/src/core/include/openvino/op/group_conv.hpp b/src/core/include/openvino/op/group_conv.hpp
index a37a26e480e8de..cfb7c6e313c640 100644
--- a/src/core/include/openvino/op/group_conv.hpp
+++ b/src/core/include/openvino/op/group_conv.hpp
@@ -7,14 +7,15 @@
 #include "openvino/op/convolution.hpp"
 #include "openvino/op/op.hpp"
 #include "openvino/op/util/attr_types.hpp"
+#include "openvino/op/util/convolution_base.hpp"
 
 namespace ov {
 namespace op {
 namespace v1 {
 /// \brief Batched convolution operation, with optional window dilation and stride.
-class OPENVINO_API GroupConvolution : public Op {
+class OPENVINO_API GroupConvolution : public util::ConvolutionFwdPropBase {
 public:
-    OPENVINO_OP("GroupConvolution", "opset1", op::Op);
+    OPENVINO_OP("GroupConvolution", "opset1", op::util::ConvolutionFwdPropBase);
 
     /// \brief Constructs a batched convolution operation.
     GroupConvolution() = default;
@@ -49,84 +50,12 @@ class OPENVINO_API GroupConvolution : public Op {
     void validate_and_infer_types() override;
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-    /// \return The strides.
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    /// \return The dilations.
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    /// \return The padding-below sizes (possibly negative).
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    /// \return The padding-above sizes (possibly negative).
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    OPENVINO_DEPRECATED("This method is deprecated and will be removed soon. Please use set_pads_end instead.")
-    void set_adding_above(const CoordinateDiff& pads_end) {
-        set_pads_end(pads_end);
-    }
-    /// \return The pad type for convolution.
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
-
-protected:
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
-    PadType m_auto_pad;
-    int64_t m_num_spatial = -1;
-
-private:
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend void update_and_validate_attributes(ConvType* op, int64_t num_spatial);
-    template <class ConvType, class ShapeType>
-    friend bool resolve_auto_pad_for_shape(const ConvType* op,
-                                           CoordinateDiff& pads_begin,
-                                           CoordinateDiff& pads_end,
-                                           const std::vector<ShapeType>& input_shapes,
-                                           const int64_t& num_non_spatial_data_dims,
-                                           const int64_t& num_non_spatial_filter_dims);
-
-    template <class T>
-    friend void shape_infer(const GroupConvolution* op,
-                            const CoordinateDiff& pads_begin,
-                            const CoordinateDiff& pads_end,
-                            const std::vector<T>& input_shapes,
-                            std::vector<T>& output_shapes);
 };
 
 /// \brief Data batch backprop for batched convolution operation.
-class OPENVINO_API GroupConvolutionBackpropData : public Op {
+class OPENVINO_API GroupConvolutionBackpropData : public util::ConvolutionBackPropBase {
 public:
-    OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op);
+    OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::util::ConvolutionBackPropBase);
 
     /// \brief Constructs a batched-convolution data batch-backprop operation.
     GroupConvolutionBackpropData();
@@ -243,97 +172,6 @@ class OPENVINO_API GroupConvolutionBackpropData : public Op {
     /// \return The spatial shape of the output.
     const PartialShape get_convolution_output_shape() const;
     void set_output_shape(const Shape& output_shape);
-    /// \return The strides from the forward prop.
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    /// \return The dilations from the forward prop.
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    /// \return The number of pixels to add to the beginning along each axis.
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    /// \return The number of pixels to add to the ending along each axis.
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    /// \return The auto pad.
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
-    /// \return The output padding.
-    const CoordinateDiff& get_output_padding() const {
-        return m_output_padding;
-    }
-    void set_output_padding(const CoordinateDiff& output_padding) {
-        m_output_padding = output_padding;
-    }
-
-protected:
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
-    PadType m_auto_pad;
-    CoordinateDiff m_output_padding;
-
-    int64_t m_num_spatial = -1;
-
-private:
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const PartialShape& output_shapes_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend int64_t calculate_num_spatial(const ConvType* op,
-                                         const PartialShape& input_shape,
-                                         const PartialShape& filters_shape,
-                                         const int64_t& num_non_spatial_data_dims,
-                                         const int64_t& num_non_spatial_filter_dims);
-
-    template <class ConvType>
-    friend void update_and_validate_attributes(ConvType* op, int64_t num_spatial);
-
-    template <class ConvType>
-    friend void update_and_validate_attributes_back_prop(ConvType* op, int64_t num_spatial);
-
-    template <class ConvType, class ShapeType>
-    friend bool resolve_auto_pad_for_shape_back_prop(const ConvType* op,
-                                                     CoordinateDiff& pads_begin,
-                                                     CoordinateDiff& pads_end,
-                                                     const std::vector<ShapeType>& input_shapes,
-                                                     ShapeType& output_spatial_shape,
-                                                     const int64_t& num_non_spatial_data_dims,
-                                                     const int64_t& num_non_spatial_filter_dims);
-
-    template <class T>
-    friend void shape_infer(const GroupConvolutionBackpropData* op,
-                            const CoordinateDiff& pads_begin,
-                            const CoordinateDiff& pads_end,
-                            const T& output_shape_from_input,
-                            const std::vector<T>& input_shapes,
-                            std::vector<T>& output_shapes);
 };
 }  // namespace v1
 }  // namespace op
diff --git a/src/core/include/openvino/op/util/convolution_backprop_base.hpp b/src/core/include/openvino/op/util/convolution_backprop_base.hpp
new file mode 100644
index 00000000000000..2adeec2e887bf1
--- /dev/null
+++ b/src/core/include/openvino/op/util/convolution_backprop_base.hpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "convolution_base.hpp"
+
+namespace ov {
+namespace op {
+namespace util {
+/// \brief Base class for operations like back propagation convolution
+class OPENVINO_API ConvolutionBackPropBase : public ConvolutionBase {
+public:
+    OPENVINO_OP("ConvolutionBackPropBase", "util");
+
+    /// \brief Constructs a conversion operation.
+    ConvolutionBackPropBase() = default;
+
+    /// \brief Constructs a conversion operation.
+    /// \param strides            Convolution strides.
+    /// \param pads_begin         Amount of padding to be added to the beginning along
+    ///                           each axis. For example in case of a 2D input the value
+    ///                           of (1, 2) means that 1 element will be added to the
+    ///                           top and 2 elements to the left.
+    /// \param pads_end           Amount of padding to be added to the end along each
+    ///                           axis.
+    /// \param dilations          The distance in width and height between the weights
+    ///                           in the filters tensor.
+    /// \param auto_pad           Specifies how the automatic calculation of padding
+    ///                           should be done.
+    /// \param      output_padding  The output padding adds additional amount of paddings per
+    ///                             each spatial axis in the output tensor. clang-format on
+    ConvolutionBackPropBase(const OutputVector& arguments,
+                            const Strides& strides,
+                            const CoordinateDiff& pads_begin,
+                            const CoordinateDiff& pads_end,
+                            const Strides& dilations,
+                            const PadType& auto_pad = PadType::EXPLICIT,
+                            const CoordinateDiff& output_padding = {})
+        : ConvolutionBase(arguments, strides, pads_begin, pads_end, dilations, auto_pad),
+          m_output_padding{output_padding} {}
+
+    const CoordinateDiff& get_output_padding() const {
+        return m_output_padding;
+    }
+    void set_output_padding(const CoordinateDiff& output_padding) {
+        m_output_padding = output_padding;
+    }
+
+protected:
+    CoordinateDiff m_output_padding;
+
+    void resize_attributes(size_t num_spatial) {
+        ConvolutionBase::resize_attributes(num_spatial);
+
+        if (m_output_padding.empty()) {
+            m_output_padding.resize(num_spatial, 0);
+        }
+    }
+};
+}  // namespace util
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/include/openvino/op/util/convolution_base.hpp b/src/core/include/openvino/op/util/convolution_base.hpp
new file mode 100644
index 00000000000000..0681d1a2e8e2b0
--- /dev/null
+++ b/src/core/include/openvino/op/util/convolution_base.hpp
@@ -0,0 +1,144 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/coordinate_diff.hpp"
+#include "openvino/op/op.hpp"
+#include "openvino/op/util/attr_types.hpp"
+
+namespace ov {
+namespace op {
+namespace util {
+/// \brief Base class for operations like convolutions
+class OPENVINO_API ConvolutionBase : public Op {
+public:
+    OPENVINO_OP("ConvolutionBase", "util");
+
+    /// \brief Constructs a conversion operation.
+    ConvolutionBase() = default;
+
+    /// \brief Constructs a conversion operation.
+    /// \param strides            Convolution strides.
+    /// \param pads_begin         Amount of padding to be added to the beginning along
+    ///                           each axis. For example in case of a 2D input the value
+    ///                           of (1, 2) means that 1 element will be added to the
+    ///                           top and 2 elements to the left.
+    /// \param pads_end           Amount of padding to be added to the end along each
+    ///                           axis.
+    /// \param dilations          The distance in width and height between the weights
+    ///                           in the filters tensor.
+    /// \param auto_pad           Specifies how the automatic calculation of padding
+    ///                           should be done.
+    ConvolutionBase(const OutputVector& arguments,
+                    const Strides& strides,
+                    const CoordinateDiff& pads_begin,
+                    const CoordinateDiff& pads_end,
+                    const Strides& dilations,
+                    const PadType& auto_pad = PadType::EXPLICIT)
+        : Op(arguments),
+          m_strides(strides),
+          m_dilations(dilations),
+          m_pads_begin(pads_begin),
+          m_pads_end(pads_end),
+          m_auto_pad(auto_pad) {}
+
+    const Strides& get_strides() const {
+        return m_strides;
+    }
+    void set_strides(const Strides& strides) {
+        m_strides = strides;
+    }
+    const Strides& get_dilations() const {
+        return m_dilations;
+    }
+    void set_dilations(const Strides& dilations) {
+        m_dilations = dilations;
+    }
+    const CoordinateDiff& get_pads_begin() const {
+        return m_pads_begin;
+    }
+    void set_pads_begin(const CoordinateDiff& pads_begin) {
+        m_pads_begin = pads_begin;
+    }
+    const CoordinateDiff& get_pads_end() const {
+        return m_pads_end;
+    }
+    OPENVINO_DEPRECATED("This method is deprecated and will be removed soon. Please use set_pads_end instead.")
+    void set_adding_above(const CoordinateDiff& pads_end) {
+        set_pads_end(pads_end);
+    }
+    void set_pads_end(const CoordinateDiff& pads_end) {
+        m_pads_end = pads_end;
+    }
+    const PadType& get_auto_pad() const {
+        return m_auto_pad;
+    }
+    void set_auto_pad(const PadType& auto_pad) {
+        m_auto_pad = auto_pad;
+    }
+
+protected:
+    Strides m_strides;
+    Strides m_dilations;
+    CoordinateDiff m_pads_begin;
+    CoordinateDiff m_pads_end;
+    PadType m_auto_pad;
+    size_t m_num_spatial = std::numeric_limits<size_t>::max();
+
+    void resize_attributes(size_t num_spatial) {
+        if (m_strides.empty()) {
+            m_strides.resize(num_spatial, 1);
+        }
+        if (m_dilations.empty()) {
+            m_dilations.resize(num_spatial, 1);
+        }
+    }
+
+    void set_num_spatial(size_t num_spatial, const std::vector<PartialShape>& input_shapes) {
+        if (input_shapes[0].rank().is_static() && input_shapes[1].rank().is_static()) {
+            m_num_spatial = num_spatial;
+        }
+    }
+
+private:
+    friend bool is_attr_validation_required(const ConvolutionBase* op);
+    friend size_t get_num_spatial(const ConvolutionBase* op);
+};
+
+/// \brief Base class for operations like back propagation convolution
+class OPENVINO_API ConvolutionFwdPropBase : public ConvolutionBase {
+public:
+    OPENVINO_OP("ConvolutionFwdPropBase", "util");
+
+    /// \brief Constructs a conversion operation.
+    ConvolutionFwdPropBase() = default;
+
+    /// \brief Constructs a conversion operation.
+    /// \param strides            Convolution strides.
+    /// \param pads_begin         Amount of padding to be added to the beginning along
+    ///                           each axis. For example in case of a 2D input the value
+    ///                           of (1, 2) means that 1 element will be added to the
+    ///                           top and 2 elements to the left.
+    /// \param pads_end           Amount of padding to be added to the end along each
+    ///                           axis.
+    /// \param dilations          The distance in width and height between the weights
+    ///                           in the filters tensor.
+    /// \param auto_pad           Specifies how the automatic calculation of padding
+    ///                           should be done.
+    ConvolutionFwdPropBase(const OutputVector& arguments,
+                           const Strides& strides,
+                           const CoordinateDiff& pads_begin,
+                           const CoordinateDiff& pads_end,
+                           const Strides& dilations,
+                           const PadType& auto_pad = PadType::EXPLICIT)
+        : ConvolutionBase(arguments, strides, pads_begin, pads_end, dilations, auto_pad) {}
+
+private:
+    friend bool is_attr_validation_required(const ConvolutionBase* op);
+};
+
+}  // namespace util
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/include/openvino/op/util/deformable_convolution_base.hpp b/src/core/include/openvino/op/util/deformable_convolution_base.hpp
index 5bcca3091e282e..e2b1c530909159 100644
--- a/src/core/include/openvino/op/util/deformable_convolution_base.hpp
+++ b/src/core/include/openvino/op/util/deformable_convolution_base.hpp
@@ -7,13 +7,14 @@
 #include "openvino/core/coordinate_diff.hpp"
 #include "openvino/op/op.hpp"
 #include "openvino/op/util/attr_types.hpp"
+#include "openvino/op/util/convolution_base.hpp"
 
 namespace ov {
 namespace op {
 namespace util {
 /// \brief Base class for operations DeformableConvolution v1 and DeformableConvolution
 /// v8.
-class OPENVINO_API DeformableConvolutionBase : public Op {
+class OPENVINO_API DeformableConvolutionBase : public util::ConvolutionBase {
 public:
     OPENVINO_OP("DeformableConvolutionBase", "util");
 
@@ -46,38 +47,7 @@ class OPENVINO_API DeformableConvolutionBase : public Op {
                               int64_t deformable_group = 1);
 
     bool visit_attributes(AttributeVisitor& visitor) override;
-    void validate_and_infer_types() override;
 
-    const Strides& get_strides() const {
-        return m_strides;
-    }
-    void set_strides(const Strides& strides) {
-        m_strides = strides;
-    }
-    const Strides& get_dilations() const {
-        return m_dilations;
-    }
-    void set_dilations(const Strides& dilations) {
-        m_dilations = dilations;
-    }
-    const CoordinateDiff& get_pads_begin() const {
-        return m_pads_begin;
-    }
-    void set_pads_begin(const CoordinateDiff& pads_begin) {
-        m_pads_begin = pads_begin;
-    }
-    const CoordinateDiff& get_pads_end() const {
-        return m_pads_end;
-    }
-    void set_pads_end(const CoordinateDiff& pads_end) {
-        m_pads_end = pads_end;
-    }
-    const PadType& get_auto_pad() const {
-        return m_auto_pad;
-    }
-    void set_auto_pad(const PadType& auto_pad) {
-        m_auto_pad = auto_pad;
-    }
     int64_t get_group() const {
         return m_group;
     }
@@ -92,11 +62,6 @@ class OPENVINO_API DeformableConvolutionBase : public Op {
     }
 
 protected:
-    Strides m_strides;
-    Strides m_dilations;
-    CoordinateDiff m_pads_begin;
-    CoordinateDiff m_pads_end;
-    PadType m_auto_pad;
     int64_t m_group;
     int64_t m_deformable_group;
 };
diff --git a/src/core/shape_inference/include/binary_convolution_shape_inference.hpp b/src/core/shape_inference/include/binary_convolution_shape_inference.hpp
new file mode 100644
index 00000000000000..a5d8bc0ac273f8
--- /dev/null
+++ b/src/core/shape_inference/include/binary_convolution_shape_inference.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include "openvino/op/binary_convolution.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace op {
+namespace convolution {
+namespace validate {
+/**
+ * @brief Specific check of data shape for binary convolution data shape must be rank 4.
+ *
+ * The shape_infer is same as for Convolution operator except this check. @see convolution_shape_inference.hpp
+ */
+template <class TShape>
+void data_shape(const v1::BinaryConvolution* op, const TShape& data_shape) {
+    NODE_VALIDATION_CHECK(op, data_shape.rank().compatible(4), "Expected 4D for the input. Got: ", data_shape);
+}
+}  // namespace validate
+}  // namespace convolution
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp b/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
new file mode 100644
index 00000000000000..a492a26b532dc9
--- /dev/null
+++ b/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include "convolution_backprop_shape_inference_util.hpp"
+#include "convolution_shape_inference_util.hpp"
+#include "openvino/op/convolution.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace op {
+namespace v1 {
+template <class TShape>
+std::vector<TShape> shape_infer(const ConvolutionBackpropData* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    const auto inputs_count = input_shapes.size();
+    const auto has_spatial_shape = inputs_count == 3;
+    NODE_VALIDATION_CHECK(op, inputs_count == 2 || has_spatial_shape);
+    using namespace ov::util;
+
+    TShape out_spatial_shape;
+    if (has_spatial_shape) {
+        const auto& spatial_shape = input_shapes[2];
+        NODE_VALIDATION_CHECK(op,
+                              spatial_shape.rank().compatible(1),
+                              "Input delivering output shape must have rank 1.");
+
+        if (!get_data_as_shape(2, op, out_spatial_shape, constant_data)) {
+            if (spatial_shape.is_static()) {
+                out_spatial_shape.resize(spatial_shape[0].get_length());
+            } else {
+                out_spatial_shape = PartialShape::dynamic();
+            }
+        }
+    }
+
+    const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes, out_spatial_shape);
+
+    TShape output_shape;
+    if (num_spatial != convolution::num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+
+        NODE_VALIDATION_CHECK(
+            op,
+            !has_spatial_shape || out_spatial_shape.rank().is_dynamic() || out_spatial_shape.size() == num_spatial,
+            "Output shape should be defined for all and only spatial dimensions.");
+
+        convolution::resize_empty_padding(num_spatial, pads_begin, pads_end);
+        convolution::validate::filter_shape(op, filters_shape, data_shape);
+        if (is_attr_validation_required(op)) {
+            convolution::validate::data_shape(op, data_shape);
+            convolution::validate::common_attributes(op, num_spatial, pads_begin, pads_end);
+        }
+        convolution::apply_padding(op, input_shapes, out_spatial_shape, pads_begin, pads_end);
+
+        output_shape.reserve(util::spatial_dim_offset + num_spatial);
+        output_shape.emplace_back(data_shape.rank().is_static() ? data_shape[0] : dim::inf_bound);
+        output_shape.emplace_back(filters_shape.rank().is_static() ? filters_shape[1] : dim::inf_bound);
+
+        if (has_spatial_shape) {
+            output_shape.insert(output_shape.end(),
+                                std::make_move_iterator(out_spatial_shape.begin()),
+                                std::make_move_iterator(out_spatial_shape.end()));
+        } else {
+            convolution::append_spatial_shape(op, data_shape, filters_shape, pads_begin, pads_end, output_shape);
+        }
+    } else {
+        output_shape = PartialShape::dynamic();
+    }
+
+    return {output_shape};
+}
+}  // namespace v1
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp b/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
new file mode 100644
index 00000000000000..71447fe56a85c2
--- /dev/null
+++ b/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
@@ -0,0 +1,191 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "convolution_shape_inference_util.hpp"
+#include "openvino/op/util/convolution_backprop_base.hpp"
+
+namespace ov {
+namespace op {
+namespace convolution {
+namespace validate {
+template <class TShape>
+void filter_shape(const ov::op::util::ConvolutionBackPropBase* op,
+                  const TShape& filters_shape,
+                  const TShape& data_shape) {
+    const auto& data_rank = data_shape.rank();
+    const auto& filters_rank = filters_shape.rank();
+
+    NODE_VALIDATION_CHECK(op,
+                          data_rank.compatible(filters_rank),
+                          "Data batch and filters rank do not match (data batch shape: ",
+                          data_shape,
+                          ", filters shape: ",
+                          filters_shape,
+                          ").");
+
+    NODE_VALIDATION_CHECK(
+        op,
+        data_rank.is_dynamic() || filters_rank.is_dynamic() || data_shape[1].compatible(filters_shape[0]),
+        "Data batch channel count (",
+        data_shape[1],
+        ") does not match filter input channel count (",
+        filters_shape[0],
+        ").");
+}
+}  // namespace validate
+
+template <class TOp,
+          class TShape,
+          typename std::enable_if<std::is_base_of<util::ConvolutionBackPropBase, TOp>::value>::type* = nullptr>
+size_t calculate_num_spatial(const TOp* op, const std::vector<TShape>& input_shapes, const TShape& out_spatial_shape) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() > 1);
+
+    auto num_spatial = util::get_num_spatial(op);
+    if (num_spatial == num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+        num_spatial = util::num_spatial_from_shapes(data_shape, filters_shape, filter_non_spatial_dims_count<TOp>());
+    }
+
+    if (num_spatial == num_spatial_undefined && out_spatial_shape.rank().is_static() && out_spatial_shape.size() > 0) {
+        num_spatial = out_spatial_shape.size();
+    }
+
+    if (num_spatial == num_spatial_undefined) {
+        num_spatial = num_spatial_from_attr(op);
+    }
+    return num_spatial;
+}
+
+/**
+ * @brief Apply auto padding for backward convolution.
+ *
+ * The auto padding can be applied only if inputs and attributes of operator are validated.
+ * The input shapes must have got static ranks.
+ *
+ * @param op                 Pointer to convolution operator.
+ * @param data_shape         Input data shape (must be static rank).
+ * @param filters_shape      Input filter shape (must be static rank).
+ * @param out_spatial_shape  Reference to input with out spatial shape.
+ * @param pads_begin         Iterator to begin of pads begin.
+ * @param pads_end           Iterator to begin of pads end.
+ */
+template <class TOp, class TShape, class TIter>
+void apply_auto_pad(const TOp* op,
+                    const TShape& data_shape,
+                    const TShape& filters_shape,
+                    const TShape& out_spatial_shape,
+                    TIter pads_begin,
+                    TIter pads_end) {
+    const auto& strides = op->get_strides();
+    const auto& dilations = op->get_dilations();
+    const auto& out_padding = op->get_output_padding();
+
+    const auto num_spatial = strides.size();
+    auto data_dim = data_shape.cend() - num_spatial;
+    auto filter_dim = filters_shape.cend() - num_spatial;
+
+    const auto padding_swap = op->get_auto_pad() == PadType::SAME_UPPER;
+    auto& pad_b = padding_swap ? pads_end : pads_begin;
+    auto& pad_e = padding_swap ? pads_begin : pads_end;
+
+    for (size_t i = 0; i < num_spatial; ++i, ++pad_b, ++pad_e, ++data_dim, ++filter_dim) {
+        using namespace ov::util;
+        if (data_dim->is_static() && filter_dim->is_static() && out_spatial_shape[i].is_static()) {
+            const auto dilated_filter = dim::dilated(*filter_dim, dilations[i]);
+            const auto dim_len = static_cast<int64_t>(data_dim->get_length() - 1);
+            const auto padding = std::max<int64_t>(
+                dim_len * strides[i] + dilated_filter.get_length() - out_spatial_shape[i].get_length() + out_padding[i],
+                0);
+
+            *pad_b = padding / 2;
+            *pad_e = padding - *pad_b;
+        } else {
+            *pad_b = 0;
+            *pad_e = 0;
+        }
+    }
+}
+
+/**
+ * @brief  Apply auto padding for back propagation convolutions.
+ *
+ * @tparam TShape            Shape type.
+ * @param op                 Pointer to back propagation convolution operator.
+ * @param data_shape         Input data shape.
+ * @param filters_shape      Input filter shape.
+ * @param out_spatial_shape  Input output spatial shape.
+ */
+template <class TShape>
+void apply_padding(const util::ConvolutionBackPropBase* op,
+                   const std::vector<TShape>& input_shapes,
+                   const TShape& out_spatial_shape,
+                   CoordinateDiff& pads_begin,
+                   CoordinateDiff& pads_end) {
+    const auto& data_shape = input_shapes[0];
+    const auto& filters_shape = input_shapes[1];
+
+    // apply padding if required
+    if (input_shapes.size() == 3 && convolution::is_auto_pad(op) && data_shape.rank().is_static() &&
+        filters_shape.rank().is_static()) {
+        convolution::apply_auto_pad(op,
+                                    data_shape,
+                                    filters_shape,
+                                    out_spatial_shape,
+                                    pads_begin.begin(),
+                                    pads_end.begin());
+    } else if (convolution::is_auto_pad(op) || op->get_auto_pad() == op::PadType::VALID) {
+        std::fill(pads_begin.begin(), pads_begin.end(), 0);
+        std::fill(pads_end.begin(), pads_end.end(), 0);
+    } else if (op->get_auto_pad() == op::PadType::EXPLICIT) {
+        std::copy(op->get_pads_begin().begin(), op->get_pads_begin().end(), pads_begin.begin());
+        std::copy(op->get_pads_end().begin(), op->get_pads_end().end(), pads_end.begin());
+    }
+}
+
+/**
+ * @brief Append spatial dimension at end of output shape of back propagation convolution.
+ *
+ * @tparam TOp           Back propagation convolution operator type.
+ * @tparam TShape        Type of shape.
+ * @param op             Pointer to operator.
+ * @param data_shape     Input data shape.
+ * @param filters_shape  Input filter shape.
+ * @param out_shape      Output shape to append spatial dimensions.
+ */
+template <class TOp,
+          class TShape,
+          class TContainer,
+          typename std::enable_if<std::is_base_of<ov::op::util::ConvolutionBackPropBase, TOp>::value>::type* = nullptr>
+void append_spatial_shape(const TOp* op,
+                          const TShape& data_shape,
+                          const TShape& filters_shape,
+                          const TContainer& pads_begin,
+                          const TContainer& pads_end,
+                          TShape& out_shape) {
+    using namespace ov::util;
+
+    const auto& strides = op->get_strides();
+    const auto& dilations = op->get_dilations();
+    const auto& output_padding = op->get_output_padding();
+
+    const auto spatial_num = strides.size();
+
+    const auto& d_shape = data_shape.rank().is_static() ? data_shape : PartialShape::dynamic(spatial_num);
+    auto data_dim = d_shape.cend() - spatial_num;
+
+    const auto& f_shape = filters_shape.rank().is_static() ? filters_shape : PartialShape::dynamic(spatial_num);
+    auto filters_dim = f_shape.cend() - spatial_num;
+
+    for (size_t i = 0; i < spatial_num; ++i, ++data_dim, ++filters_dim) {
+        auto dim = (*data_dim - 1) * strides[i];
+        dim += dim::dilated(*filters_dim, dilations[i]);
+        out_shape.push_back(dim::padded(dim, output_padding[i] - pads_begin[i] - pads_end[i]));
+    }
+}
+}  // namespace convolution
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/convolution_shape_inference.hpp b/src/core/shape_inference/include/convolution_shape_inference.hpp
index 9717fafbd4f2f0..e47ad7e1d88c77 100644
--- a/src/core/shape_inference/include/convolution_shape_inference.hpp
+++ b/src/core/shape_inference/include/convolution_shape_inference.hpp
@@ -3,736 +3,49 @@
 //
 #pragma once
 
-#include <openvino/op/convolution.hpp>
-#include <openvino/op/group_conv.hpp>
-
+#include "convolution_shape_inference_util.hpp"
+#include "openvino/op/convolution.hpp"
 #include "utils.hpp"
 
 namespace ov {
 namespace op {
 namespace v1 {
-
-template <class ConvType>
-int64_t calculate_num_spatial(const ConvType* op,
-                              const PartialShape& input_shape,
-                              const PartialShape& filters_shape,
-                              const int64_t& num_non_spatial_data_dims,
-                              const int64_t& num_non_spatial_filter_dims) {
-    int64_t num_spatial = op->m_num_spatial;
-    if (num_spatial == -1) {
-        const auto& input_rank = input_shape.rank();
-        const auto& filters_rank = filters_shape.rank();
-
-        if (input_rank.is_static())
-            num_spatial = input_rank.get_length() - num_non_spatial_data_dims;
-        if (filters_rank.is_static())
-            num_spatial = filters_rank.get_length() - num_non_spatial_filter_dims;
-
-        if (const auto& size = op->m_dilations.size()) {
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == static_cast<int64_t>(size),
-                                  "Dilations should be defined for all and only spatial dimensions.");
-            num_spatial = static_cast<int64_t>(size);
-        }
-        if (const auto& size = op->m_strides.size()) {
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == static_cast<int64_t>(size),
-                                  "Strides should be defined for all and only spatial dimensions.");
-            num_spatial = static_cast<int64_t>(size);
-        }
-        if (const auto& size = op->m_pads_begin.size()) {
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == static_cast<int64_t>(size),
-                                  "Pads begin should be defined for all and only spatial dimensions.");
-            num_spatial = static_cast<int64_t>(size);
-        }
-        if (const auto& size = op->m_pads_end.size()) {
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == static_cast<int64_t>(size),
-                                  "Pads end should be defined for all and only spatial dimensions.");
-            num_spatial = static_cast<int64_t>(size);
-        }
-    }
-    return num_spatial;
-}
-
-template <class ConvType, class ShapeType>
-int64_t calculate_num_spatial(const ConvType* op,
-                              const ShapeType& input_shape,
-                              const ShapeType& filters_shape,
-                              const int64_t& num_non_spatial_data_dims,
-                              const int64_t& num_non_spatial_filter_dims) {
-    return calculate_num_spatial(op,
-                                 input_shape.to_partial_shape(),
-                                 filters_shape.to_partial_shape(),
-                                 num_non_spatial_data_dims,
-                                 num_non_spatial_filter_dims);
-}
-
-template <class ConvType>
-void update_and_validate_attributes(ConvType* op, int64_t num_spatial) {
-    if (num_spatial != -1) {
-        auto& strides = op->m_strides;
-        auto& dilations = op->m_dilations;
-        auto& pad_begin = op->m_pads_begin;
-        auto& pad_end = op->m_pads_end;
-        auto& auto_pad = op->m_auto_pad;
-
-        if (strides.empty())
-            strides = Strides(num_spatial, 1);
-        if (dilations.empty())
-            dilations = Strides(num_spatial, 1);
-        if (pad_begin.empty() || auto_pad == op::PadType::VALID)
-            pad_begin = CoordinateDiff(num_spatial, 0);
-        if (pad_end.empty() || auto_pad == op::PadType::VALID)
-            pad_end = CoordinateDiff(num_spatial, 0);
-
-        NODE_VALIDATION_CHECK(op,
-                              static_cast<int64_t>(strides.size()) == num_spatial,
-                              "Strides should be defined for all and only spatial dimensions..");
-        NODE_VALIDATION_CHECK(op,
-                              static_cast<int64_t>(dilations.size()) == num_spatial,
-                              "Dilations should be defined for all and only spatial dimensions..");
-        NODE_VALIDATION_CHECK(op,
-                              static_cast<int64_t>(pad_begin.size()) == num_spatial &&
-                                  static_cast<int64_t>(pad_end.size()) == num_spatial,
-                              "Pads should be defined for all and only spatial dimensions..");
-        NODE_VALIDATION_CHECK(op,
-                              std::all_of(dilations.begin(),
-                                          dilations.end(),
-                                          [](const size_t& i) {
-                                              return i > 0;
-                                          }),
-                              "Filter dilation (",
-                              dilations,
-                              ") has zero dimension.");
-        NODE_VALIDATION_CHECK(op,
-                              std::all_of(strides.begin(),
-                                          strides.end(),
-                                          [](const size_t& i) {
-                                              return i > 0;
-                                          }),
-                              "Filter strides (",
-                              strides,
-                              ") has zero dimension.");
-    } else if (op->m_num_spatial != -1) {
-        update_and_validate_attributes(op, op->m_num_spatial);
-    }
-}
-
-template <class T>
-inline bool dynamic_check(const int64_t& num_spatial) {
-    OPENVINO_ASSERT(num_spatial != -1,
-                    "Convolution shape inference doesn't have enough information for static shape calculation");
-    return true;
-}
-
-template <>
-inline bool dynamic_check<PartialShape>(const int64_t& num_spatial) {
-    return num_spatial != -1;
-}
-
-template <class ConvType, class ShapeType>
-bool resolve_auto_pad_for_shape(const ConvType* op,
-                                CoordinateDiff& pads_begin,
-                                CoordinateDiff& pads_end,
-                                const std::vector<ShapeType>& input_shapes,
-                                const int64_t& num_non_spatial_data_dims,
-                                const int64_t& num_non_spatial_filter_dims) {
-    const auto& auto_pad = op->m_auto_pad;
-    if (auto_pad != op::PadType::SAME_UPPER && auto_pad != op::PadType::SAME_LOWER) {
-        pads_begin = op->m_pads_begin;
-        pads_end = op->m_pads_end;
-        return true;
-    }
-
-    auto input_shape = input_shapes[0];
-    auto filters_shape = input_shapes[1];
-
-    const auto num_spatial = op->m_num_spatial != -1 ? op->m_num_spatial
-                                                     : calculate_num_spatial(op,
-                                                                             input_shape,
-                                                                             filters_shape,
-                                                                             num_non_spatial_data_dims,
-                                                                             num_non_spatial_filter_dims);
-    if (!dynamic_check<ShapeType>(num_spatial))
-        return false;
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + num_non_spatial_data_dims);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
-
-    const auto& strides = op->m_strides;
-    const auto& dilations = op->m_dilations;
-    pads_begin.resize(num_spatial);
-    pads_end.resize(num_spatial);
-
-    bool status = true;
-    for (int64_t i = 0; i < num_spatial; ++i) {
-        const auto& input_dim = input_shape[i + num_non_spatial_data_dims];
-        const auto& filters_dim = filters_shape[i + num_non_spatial_filter_dims];
-        if (input_dim.is_static() && filters_dim.is_static()) {
-            const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1;
-            NODE_VALIDATION_CHECK(op,
-                                  window_dilated_dim > 0,
-                                  "Window after dilation has dimension less than 1 (dim: ",
-                                  window_dilated_dim,
-                                  ") at axis ",
-                                  i,
-                                  ".");
-
-            const int64_t& image_size = input_dim.get_length();
-            const int64_t& filter_stride = strides[i];
-            const int64_t& output_size = (image_size + filter_stride - 1) / filter_stride;
-
-            const int64_t& tmp = (output_size - 1) * filter_stride + window_dilated_dim;
-            const int64_t& padding_needed = tmp > image_size ? tmp - image_size : 0;
-
-            const size_t& padding_lhs = static_cast<size_t>(padding_needed / 2);
-            const size_t& padding_rhs = static_cast<size_t>(padding_needed - padding_lhs);
-
-            pads_begin[i] = auto_pad == op::PadType::SAME_UPPER ? padding_lhs : padding_rhs;
-            pads_end[i] = auto_pad == op::PadType::SAME_UPPER ? padding_rhs : padding_lhs;
-        } else {
-            status = false;
-        }
-    }
-    return status;
-}
-
-template <class DimType>
-void divide_ceil(const DimType& dividend, const typename DimType::value_type& divisor, DimType& quotient) {
-    OPENVINO_ASSERT(divisor >= 0, "divisor must be greater than 0");
-    if (dividend.get_max_length() == -1) {
-        quotient = -1;
-    } else {
-        auto lb = static_cast<int64_t>(ceil(1. * dividend.get_min_length() / divisor));
-        auto ub = static_cast<int64_t>(ceil(1. * dividend.get_max_length() / divisor));
-        quotient = DimType(lb, ub);
-    }
-}
-
-template <class DimType>
-void divide_floor(const DimType& dividend, const typename DimType::value_type& divisor, DimType& quotient) {
-    OPENVINO_ASSERT(divisor >= 0, "divisor must be greater than 0");
-    if (dividend.get_max_length() == -1) {
-        quotient = -1;
-    } else {
-        auto lb = static_cast<size_t>(floor(1. * dividend.get_min_length() / divisor));
-        auto ub = static_cast<size_t>(floor(1. * dividend.get_max_length() / divisor));
-        quotient = DimType(lb, ub);
-    }
-}
-
-template <class ConvType, class ShapeType>
-void calculate_output_spatial_dims_for_convolution(const ConvType* op,
-                                                   const ShapeType& input_shape,
-                                                   const ShapeType& filters_shape,
-                                                   ShapeType& output_shape,
-                                                   const int64_t& num_spatial,
-                                                   const Strides& strides,
-                                                   const Strides& dilations,
-                                                   const CoordinateDiff& pads_begin,
-                                                   const CoordinateDiff& pads_end,
-                                                   const int64_t& num_non_spatial_data_dims,
-                                                   const int64_t& num_non_spatial_filter_dims) {
-    bool auto_pad = op->get_auto_pad() == op::PadType::SAME_UPPER || op->get_auto_pad() == op::PadType::SAME_LOWER;
-    for (int64_t i = 0; i < num_spatial; ++i) {
-        auto input_dim = input_shape[i + num_non_spatial_data_dims];
-        if (auto_pad) {
-            divide_ceil(input_dim, strides[i], output_shape[i + num_non_spatial_data_dims]);
-            continue;
-        }
-        const auto& filters_dim = filters_shape[i + num_non_spatial_filter_dims];
-        const auto& window_dilated_dim = (filters_dim - 1) * dilations[i] + 1;
-        const auto& data_padded_dilated_dim = input_dim + pads_begin[i] + pads_end[i];
-        if (input_dim.is_static() && filters_dim.is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  window_dilated_dim.get_length() > 0,
-                                  "Window after dilation has dimension less than 1 (dim: ",
-                                  window_dilated_dim,
-                                  ") at axis ",
-                                  i,
-                                  ".");
-            NODE_VALIDATION_CHECK(op,
-                                  window_dilated_dim.get_length() <= data_padded_dilated_dim.get_length(),
-                                  "Window after dilation has dimension (dim: ",
-                                  window_dilated_dim,
-                                  ") larger than the data shape after padding (dim: ",
-                                  data_padded_dilated_dim,
-                                  ") at axis ",
-                                  i,
-                                  ".");
-        }
-        divide_floor(data_padded_dilated_dim - window_dilated_dim,
-                     strides[i],
-                     output_shape[i + num_non_spatial_data_dims]);
-        output_shape[i + num_non_spatial_data_dims] += 1;
-    }
-}
-
-template <class T>
-void shape_infer(const Convolution* op,
-                 const CoordinateDiff& pads_begin,
-                 const CoordinateDiff& pads_end,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
-    constexpr size_t num_non_spatial_data_dims = 2, num_non_spatial_filter_dims = 2;
-    auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
-
-    const auto num_spatial = op->m_num_spatial != -1 ? op->m_num_spatial
-                                                     : calculate_num_spatial(op,
-                                                                             input_shape,
-                                                                             filters_shape,
-                                                                             num_non_spatial_data_dims,
-                                                                             num_non_spatial_filter_dims);
-    NODE_VALIDATION_CHECK(op,
-                          num_spatial != -1,
-                          "Convolution shape_infer should be provided with correct num_spatial attribute");
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + 2);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + 2);
-
-    NODE_VALIDATION_CHECK(
-        op,
-        (static_cast<int64_t>(input_shape.size()) == static_cast<int64_t>(num_spatial + num_non_spatial_data_dims)) &&
-            (static_cast<int64_t>(filters_shape.size()) ==
-             static_cast<int64_t>(num_spatial + num_non_spatial_filter_dims)),
-        "Data batch and filters rank do not match (data batch shape: ",
-        input_shape,
-        ", filters shape: ",
-        filters_shape,
-        ").");
-
-    // ranks are originally static or aligned with num_spatial, attributes assumed to be valid
-    auto& output_shape = output_shapes[0];
-    output_shape.resize(num_spatial + num_non_spatial_data_dims);
-    output_shape[0] = input_shape[0];
-    output_shape[1] = filters_shape[0];
-
-    NODE_VALIDATION_CHECK(op,
-                          input_shape[1].compatible(filters_shape[1]),
-                          "Data batch channel count (",
-                          input_shape[1],
-                          ") does not match filter input ",
-                          "channel count (",
-                          filters_shape[1],
-                          ").");
-
-    calculate_output_spatial_dims_for_convolution(op,
-                                                  input_shape,
-                                                  filters_shape,
-                                                  output_shape,
-                                                  num_spatial,
-                                                  op->m_strides,
-                                                  op->m_dilations,
-                                                  pads_begin,
-                                                  pads_end,
-                                                  num_non_spatial_data_dims,
-                                                  num_non_spatial_filter_dims);
-}
-
-template <class T>
-void shape_infer(const GroupConvolution* op,
-                 const CoordinateDiff& pads_begin,
-                 const CoordinateDiff& pads_end,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
-    auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
-    constexpr size_t num_non_spatial_data_dims = 2, num_non_spatial_filter_dims = 3;
-
-    const auto num_spatial = op->m_num_spatial != -1 ? op->m_num_spatial
-                                                     : calculate_num_spatial(op,
-                                                                             input_shape,
-                                                                             filters_shape,
-                                                                             num_non_spatial_data_dims,
-                                                                             num_non_spatial_filter_dims);
-    NODE_VALIDATION_CHECK(op,
-                          num_spatial != -1,
-                          "GroupConvolution shape_infer should be provided with correct num_spatial attribute");
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + num_non_spatial_data_dims);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
-
-    NODE_VALIDATION_CHECK(
-        op,
-        (static_cast<int64_t>(input_shape.size()) == static_cast<int64_t>((num_spatial + num_non_spatial_data_dims))) &&
-            (static_cast<int64_t>(filters_shape.size()) ==
-             static_cast<int64_t>((num_spatial + num_non_spatial_filter_dims))),
-        "Data batch and filters rank do not match (data batch shape: ",
-        input_shape,
-        ", filters shape: ",
-        filters_shape,
-        ").");
-
-    // ranks are originally static or aligned with num_spatial, attributes assumed to be valid
-    auto& output_shape = output_shapes[0];
-    output_shape.resize(num_spatial + num_non_spatial_data_dims);
-    output_shape[0] = input_shape[0];
-
-    auto groups = filters_shape[0];
-    if (groups.is_dynamic()) {
-        // [N, GROUPS * C_IN, ...] x [GROUPS, C_OUT, C_IN, ...] = [N, GROUPS * C_OUT, ...]
-        if (input_shape[1].is_static() && filters_shape[2].is_static()) {
-            using DimensionType = typename std::iterator_traits<typename T::iterator>::value_type;
-            auto n_data_channels = input_shape[1].get_length();
-            auto input_channels = filters_shape[2].get_length();
-            NODE_VALIDATION_CHECK(op, (n_data_channels % input_channels) == 0);
-            groups = DimensionType(n_data_channels / input_channels);
-        }
-    }
-    if (input_shape[1].is_static()) {
-        // GROUPS and C_IN consistency checks
-        if (groups.is_static() && filters_shape[2].is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  input_shape[1].get_length() / groups.get_length() == filters_shape[2].get_length(),
-                                  "Input channels dimension of data batch has incompatible value "
-                                  "with filter shape.");
-        } else if (groups.is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  input_shape[1].get_length() % groups.get_length() == 0,
-                                  "Input channels dimension of data batch not a multiple of group size.");
-        }
-    }
-
-    output_shape[1] = groups * filters_shape[1];
-
-    calculate_output_spatial_dims_for_convolution(op,
-                                                  input_shape,
-                                                  filters_shape,
-                                                  output_shape,
-                                                  num_spatial,
-                                                  op->m_strides,
-                                                  op->m_dilations,
-                                                  pads_begin,
-                                                  pads_end,
-                                                  num_non_spatial_data_dims,
-                                                  num_non_spatial_filter_dims);
-}
-
-template <class ConvType>
-int64_t calculate_num_spatial(const ConvType* op,
-                              const PartialShape& input_shape,
-                              const PartialShape& filters_shape,
-                              const PartialShape& output_shapes_shape,
-                              const int64_t& num_non_spatial_data_dims,
-                              const int64_t& num_non_spatial_filter_dims) {
-    auto num_spatial = op->m_num_spatial;
-    if (num_spatial == -1) {
-        num_spatial = calculate_num_spatial(op,
-                                            input_shape,
-                                            filters_shape,
-                                            num_non_spatial_data_dims,
-                                            num_non_spatial_filter_dims);
-        if (const auto& size = op->m_output_padding.size()) {
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == static_cast<int64_t>(size),
-                                  "Output padding should be defined for all and only spatial dimensions.");
-            num_spatial = static_cast<int64_t>(size);
-        }
-        if (output_shapes_shape.is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  output_shapes_shape.size() == 1,
-                                  "Input delivering output shape must have rank 1");
-            NODE_VALIDATION_CHECK(op,
-                                  num_spatial == -1 || num_spatial == output_shapes_shape[0].get_length(),
-                                  "Output shape should be specified only and for all spatial dimensions.");
-            num_spatial = static_cast<int64_t>(output_shapes_shape[0].get_length());
-        }
-    }
-    return num_spatial;
-}
-
-template <class ConvType, class ShapeType>
-int64_t calculate_num_spatial(const ConvType* op,
-                              const ShapeType& input_shape,
-                              const ShapeType& filters_shape,
-                              const ShapeType& output_shapes_shape,
-                              const int64_t& num_non_spatial_data_dims,
-                              const int64_t& num_non_spatial_filter_dims) {
-    return calculate_num_spatial(op,
-                                 input_shape.to_partial_shape(),
-                                 filters_shape.to_partial_shape(),
-                                 output_shapes_shape.to_partial_shape(),
-                                 num_non_spatial_data_dims,
-                                 num_non_spatial_filter_dims);
-}
-
-template <class ConvType>
-void update_and_validate_attributes_back_prop(ConvType* op, int64_t num_spatial) {
-    if (num_spatial != -1) {
-        update_and_validate_attributes(op, num_spatial);
-        auto& output_padding = op->m_output_padding;
-        if (output_padding.empty())
-            output_padding = CoordinateDiff(num_spatial, 0);
-        NODE_VALIDATION_CHECK(op,
-                              static_cast<int64_t>(output_padding.size()) == num_spatial,
-                              "Output padding should be defined for all and only "
-                              "spatial dimensions..");
-    } else if (op->m_num_spatial != -1) {
-        update_and_validate_attributes_back_prop(op, op->m_num_spatial);
-    }
-}
-
-template <class ConvType, class ShapeType>
-bool resolve_auto_pad_for_shape_back_prop(const ConvType* op,
-                                          CoordinateDiff& pads_begin,
-                                          CoordinateDiff& pads_end,
-                                          const std::vector<ShapeType>& input_shapes,
-                                          ShapeType& output_spatial_shape,
-                                          const int64_t& num_non_spatial_data_dims,
-                                          const int64_t& num_non_spatial_filter_dims) {
-    const auto& auto_pad = op->m_auto_pad;
-    if (auto_pad != PadType::SAME_UPPER && auto_pad != PadType::SAME_LOWER) {
-        pads_begin = op->m_pads_begin;
-        pads_end = op->m_pads_end;
-        return true;
-    }
-
-    const auto& num_spatial = op->m_num_spatial;
-    if (!dynamic_check<ShapeType>(num_spatial))
-        return false;
-
-    if (input_shapes.size() != 3) {
-        pads_begin = CoordinateDiff(num_spatial, 0);
-        pads_end = CoordinateDiff(num_spatial, 0);
-        return true;
-    }
-    OPENVINO_ASSERT(input_shapes.size() == 3 && (auto_pad == PadType::SAME_UPPER || auto_pad == PadType::SAME_LOWER));
-
-    pads_begin = CoordinateDiff(num_spatial, 0);
-    pads_end = CoordinateDiff(num_spatial, 0);
-    if (output_spatial_shape.rank().is_dynamic())
-        output_spatial_shape.resize(num_spatial);
-
-    auto input_shape = input_shapes[0];
-    auto filters_shape = input_shapes[1];
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + num_non_spatial_data_dims);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
-
-    bool status = true;
-    for (auto i = 0; i < num_spatial; ++i) {
-        const auto& data_dim = input_shape[i + num_non_spatial_data_dims];
-        const auto& filter_dim = filters_shape[i + num_non_spatial_filter_dims];
-        const auto& output_dim = output_spatial_shape[i];
-        const auto& output_padding = op->m_output_padding[i];
-
-        if (data_dim.is_static() && filter_dim.is_static() && output_dim.is_static()) {
-            const auto& strides = op->m_strides[i];
-            const auto& dilations = op->m_dilations[i];
-            int total_padding = std::max<int>(
-                static_cast<int>(strides * (data_dim.get_length() - 1) + dilations * (filter_dim.get_length() - 1) + 1 -
-                                 output_dim.get_length() + output_padding),
-                0);
-            if (auto_pad != op::PadType::SAME_UPPER) {
-                pads_begin[i] = total_padding / 2;
-                pads_end[i] = total_padding - pads_begin[i];
-            } else {
-                pads_end[i] = total_padding / 2;
-                pads_begin[i] = total_padding - pads_end[i];
-            }
-        } else {
-            status = false;
-        }
-    }
-    return status;
-}
-
-template <class T>
-void shape_infer(const ConvolutionBackpropData* op,
-                 const CoordinateDiff& pads_begin,
-                 const CoordinateDiff& pads_end,
-                 const T& output_shape_from_input,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes) {
-    constexpr size_t num_non_spatial_data_dims = 2, num_non_spatial_filter_dims = 2;
-    size_t input_size = input_shapes.size();
-    NODE_VALIDATION_CHECK(op, (input_size == 2 || input_size == 3) && output_shapes.size() == 1);
-    auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
-
-    const auto num_spatial = op->m_num_spatial != -1
-                                 ? op->m_num_spatial
-                                 : input_size == 3 ? calculate_num_spatial(op,
-                                                                           input_shape,
-                                                                           filters_shape,
-                                                                           input_shapes[2],
-                                                                           num_non_spatial_data_dims,
-                                                                           num_non_spatial_filter_dims)
-                                                   : calculate_num_spatial(op,
-                                                                           input_shape,
-                                                                           filters_shape,
-                                                                           num_non_spatial_data_dims,
-                                                                           num_non_spatial_filter_dims);
-
-    NODE_VALIDATION_CHECK(op,
-                          num_spatial != -1,
-                          "ConvolutionBackpropData shape_infer should be provided with correct num_spatial attribute");
-
-    NODE_VALIDATION_CHECK(op,
-                          num_spatial == 1 || num_spatial == 2 || num_spatial == 3,
-                          "Data and filters inputs must have rank 3, 4 or 5");
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + num_non_spatial_data_dims);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
-
-    NODE_VALIDATION_CHECK(
-        op,
-        (static_cast<int64_t>(input_shape.size()) == static_cast<int64_t>(num_spatial + num_non_spatial_data_dims)) &&
-            (static_cast<int64_t>(filters_shape.size()) ==
-             static_cast<int64_t>(num_spatial + num_non_spatial_filter_dims)),
-        "Data and filters rank do not match (data batch shape: ",
-        input_shape,
-        ", filters shape: ",
-        filters_shape,
-        ").");
-
-    // ranks are originally static or aligned with num_spatial, attributes assumed to be valid
-    auto& output_shape = output_shapes[0];
-    output_shape.resize(num_spatial + num_non_spatial_data_dims);
-    output_shape[0] = input_shape[0];
-    output_shape[1] = filters_shape[1];
-
-    NODE_VALIDATION_CHECK(op,
-                          input_shape[1].compatible(filters_shape[0]),
-                          "Input channels dimension of data and filters inputs must be equal");
-
-    if (input_size == 3) {
-        if (output_shape_from_input.rank().is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  static_cast<int64_t>(output_shape_from_input.size()) == num_spatial,
-                                  "Output shape should be specified only and for all spatial dimensions.");
-            for (int64_t i = 0; i < num_spatial; ++i)
-                output_shape[i + num_non_spatial_data_dims] = output_shape_from_input[i];
-        }
+template <class TFrowardConv, class TShape, class TContainer>
+std::vector<TShape> shape_infer(const TFrowardConv* op,
+                                const std::vector<TShape>& input_shapes,
+                                TContainer& pads_begin,
+                                TContainer& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    using namespace ov::util;
+
+    const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes);
+
+    TShape output_shape;
+    if (num_spatial != util::num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+        const auto data_rank = data_shape.rank();
+        const auto filters_rank = filters_shape.rank();
+
+        convolution::resize_empty_padding(num_spatial, pads_begin, pads_end);
+        convolution::validate::filter_shape(op, filters_shape, data_shape);
+        if (is_attr_validation_required(op)) {
+            convolution::validate::data_shape(op, data_shape);
+            convolution::validate::common_attributes(op, num_spatial, pads_begin, pads_end);
+        }
+        convolution::apply_padding(op, data_shape, filters_shape, pads_begin, pads_end);
+
+        output_shape.reserve(util::spatial_dim_offset + num_spatial);
+        output_shape.emplace_back(data_rank.is_static() ? data_shape[0] : dim::inf_bound);
+        output_shape.emplace_back(filters_rank.is_static() ? filters_shape[0] : dim::inf_bound);
+        convolution::append_spatial_shape(op, data_shape, filters_shape, pads_begin, pads_end, output_shape);
     } else {
-        const auto& strides = op->m_strides;
-        const auto& dilations = op->m_dilations;
-        const auto& output_padding = op->m_output_padding;
-        for (int64_t i = 0; i < num_spatial; ++i) {
-            const auto &data_idx = i + num_non_spatial_data_dims, filter_idx = i + num_non_spatial_filter_dims;
-            output_shape[data_idx] = (input_shape[data_idx] - 1) * strides[i] +
-                                     (filters_shape[filter_idx] - 1) * dilations[i] + 1 - pads_begin[i] - pads_end[i] +
-                                     output_padding[i];
-        }
-    }
-}
-
-template <class T>
-void shape_infer(const GroupConvolutionBackpropData* op,
-                 const CoordinateDiff& pads_begin,
-                 const CoordinateDiff& pads_end,
-                 const T& output_shape_from_input,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes) {
-    constexpr size_t num_non_spatial_data_dims = 2, num_non_spatial_filter_dims = 3;
-    size_t input_size = input_shapes.size();
-    NODE_VALIDATION_CHECK(op, (input_size == 2 || input_size == 3) && output_shapes.size() == 1);
-    auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
-
-    const auto num_spatial = op->m_num_spatial != -1
-                                 ? op->m_num_spatial
-                                 : input_size == 3 ? calculate_num_spatial(op,
-                                                                           input_shape,
-                                                                           filters_shape,
-                                                                           input_shapes[2],
-                                                                           num_non_spatial_data_dims,
-                                                                           num_non_spatial_filter_dims)
-                                                   : calculate_num_spatial(op,
-                                                                           input_shape,
-                                                                           filters_shape,
-                                                                           num_non_spatial_data_dims,
-                                                                           num_non_spatial_filter_dims);
-
-    NODE_VALIDATION_CHECK(
-        op,
-        num_spatial != -1,
-        "GroupConvolutionBackpropData shape_infer should be provided with correct num_spatial attribute");
-
-    NODE_VALIDATION_CHECK(op,
-                          num_spatial == 1 || num_spatial == 2 || num_spatial == 3,
-                          "Data and filters inputs must have rank 3, 4 or 5");
-
-    if (input_shape.rank().is_dynamic())
-        input_shape.resize(num_spatial + num_non_spatial_data_dims);
-    if (filters_shape.rank().is_dynamic())
-        filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
-
-    NODE_VALIDATION_CHECK(
-        op,
-        (static_cast<int64_t>(input_shape.size()) == static_cast<int64_t>(num_spatial + num_non_spatial_data_dims)) &&
-            (static_cast<int64_t>(filters_shape.size()) ==
-             static_cast<int64_t>(num_spatial + num_non_spatial_filter_dims)),
-        "Data and filters rank do not match (data batch shape: ",
-        input_shape,
-        ", filters shape: ",
-        filters_shape,
-        ").");
-
-    // ranks are originally static or aligned with num_spatial, attributes assumed to be valid
-    auto& output_shape = output_shapes[0];
-    output_shape.resize(num_spatial + num_non_spatial_data_dims);
-    output_shape[0] = input_shape[0];
-
-    auto groups = filters_shape[0];
-    if (groups.is_dynamic()) {
-        // [N, GROUPS * C_IN, ...] x [GROUPS, C_IN, C_OUT, ...] = [N, GROUPS * C_OUT, ...]
-        if (input_shape[1].is_static() && filters_shape[1].is_static()) {
-            using DimensionType = typename std::iterator_traits<typename T::iterator>::value_type;
-            auto n_data_channels = input_shape[1].get_length();
-            auto input_channels = filters_shape[1].get_length();
-            NODE_VALIDATION_CHECK(op, (n_data_channels % input_channels) == 0);
-            groups = DimensionType(n_data_channels / input_channels);
-        }
+        output_shape = PartialShape::dynamic();
     }
-    if (input_shape[1].is_static()) {
-        // GROUPS and C_IN consistency checks
-        if (groups.is_static() && filters_shape[1].is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  input_shape[1].get_length() / groups.get_length() == filters_shape[1].get_length(),
-                                  "Input channels dimension of data batch has incompatible value "
-                                  "with filter shape.");
-        } else if (groups.is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  input_shape[1].get_length() % groups.get_length() == 0,
-                                  "Input channels dimension of data batch not a multiple of group size.");
-        }
-    }
-
-    output_shape[1] = filters_shape[2] * groups;
 
-    if (input_size == 3) {
-        if (output_shape_from_input.rank().is_static()) {
-            NODE_VALIDATION_CHECK(op,
-                                  static_cast<int64_t>(output_shape_from_input.size()) == num_spatial,
-                                  "Output shape should be specified only and for all spatial dimensions.");
-            for (int64_t i = 0; i < num_spatial; ++i)
-                output_shape[i + num_non_spatial_data_dims] = output_shape_from_input[i];
-        }
-    } else {
-        const auto& strides = op->m_strides;
-        const auto& dilations = op->m_dilations;
-        const auto& output_padding = op->m_output_padding;
-        for (int64_t i = 0; i < num_spatial; ++i) {
-            const auto &data_idx = i + num_non_spatial_data_dims, filter_idx = i + num_non_spatial_filter_dims;
-            output_shape[data_idx] = (input_shape[data_idx] - 1) * strides[i] +
-                                     (filters_shape[filter_idx] - 1) * dilations[i] + 1 - pads_begin[i] - pads_end[i] +
-                                     output_padding[i];
-        }
-    }
+    return {output_shape};
 }
-
 }  // namespace v1
 }  // namespace op
 }  // namespace ov
diff --git a/src/core/shape_inference/include/convolution_shape_inference_util.hpp b/src/core/shape_inference/include/convolution_shape_inference_util.hpp
new file mode 100644
index 00000000000000..cb3d0adf52b227
--- /dev/null
+++ b/src/core/shape_inference/include/convolution_shape_inference_util.hpp
@@ -0,0 +1,357 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "dimension_util.hpp"
+#include "openvino/op/util/convolution_backprop_base.hpp"
+#include "openvino/op/util/convolution_base.hpp"
+#include "pooling_shape_inference_util.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace op {
+
+namespace util {
+constexpr size_t num_spatial_undefined = std::numeric_limits<size_t>::max();
+constexpr size_t spatial_dim_offset = 2;
+
+/**
+ * @brief Get num of spatial form convolution operator.
+ *
+ * Tries get value from operator member if is not deduced (has -1 value) then tries evaluate it from input shapes.
+ *
+ * @tparam TConv       Convolution type (this function must be a friend of TConv to access private member).
+ * @tparam TShape      Shape type.
+ * @param op           Pointer to convolution operator.
+ * @param data_shape   Input data shape.
+ * @param flter_shape  Input filter shape.
+ * @return Value of spatial dimension number or infinite bound (-1) if cannot evaluate.
+ */
+template <class TShape>
+size_t num_spatial_from_shapes(const TShape& data_shape,
+                               const TShape& filter_shape,
+                               const size_t filter_non_spatial_dims_count) {
+    const auto& data_rank = data_shape.rank();
+    const auto& filters_rank = filter_shape.rank();
+
+    size_t num_spatial;
+
+    if (data_rank.is_static()) {
+        num_spatial = data_rank.get_length() - spatial_dim_offset;
+    } else if (filters_rank.is_static()) {
+        num_spatial = filters_rank.get_length() - filter_non_spatial_dims_count;
+    } else {
+        num_spatial = num_spatial_undefined;
+    }
+
+    return num_spatial;
+}
+
+/**
+ * @brief Checks if validation attributes is required.
+ *
+ * @param op  Pointer to convolution base operator.
+ * @return True if internal number of spatial dimension not defined otherwise false.
+ */
+inline bool is_attr_validation_required(const ConvolutionBase* op) {
+    return num_spatial_undefined == op->m_num_spatial;
+}
+
+/**
+ * @brief Get the num spatil object
+ *
+ * @param op
+ * @return size_t
+ */
+inline size_t get_num_spatial(const ConvolutionBase* op) {
+    return op->m_num_spatial;
+}
+}  // namespace util
+
+namespace convolution {
+
+constexpr auto num_spatial_undefined = util::num_spatial_undefined;
+constexpr size_t spatial_dim_offset = 2;
+
+/**
+ * @brief Provides convolution filter non spatial dimension count.
+ *
+ * @note If specific convolution operator requires different value provide specialization for this operator.
+ * @tparam TConv  Type of convolution operator.
+ * @return Default value for convolution operators (2).
+ */
+template <class TConv>
+constexpr size_t filter_non_spatial_dims_count() {
+    return 2;
+}
+
+/**
+ * @brief Checks if Op property auto_pad is set to same lower or upper.
+ *
+ * @tparam TOp  Type of operator (must have get_auto_pad member function).
+ * @param op    Pointer to operator.
+ * @return True if auto pad enabled.
+ */
+template <class TOp>
+bool is_auto_pad(const TOp* op) {
+    return (op->get_auto_pad() == PadType::SAME_LOWER) || (op->get_auto_pad() == PadType::SAME_UPPER);
+}
+
+/**
+ * @brief Resize paddings if empty to number of spatial dimensions.
+ *
+ * @param num_spatial  Number of spatial dimensions.
+ * @param pads_begin   Begin padding to resize.
+ * @param pads_end     End padding to resize.
+ */
+inline void resize_empty_padding(const size_t num_spatial, CoordinateDiff& pads_begin, CoordinateDiff& pads_end) {
+    if (pads_begin.empty()) {
+        pads_begin.resize(num_spatial);
+    }
+
+    if (pads_end.empty()) {
+        pads_end.resize(num_spatial);
+    }
+}
+
+inline size_t num_spatial_from_attr(const util::ConvolutionBase* op) {
+    size_t num_spatial;
+
+    if (!op->get_strides().empty()) {
+        num_spatial = op->get_strides().size();
+    } else if (!op->get_dilations().empty()) {
+        num_spatial = op->get_dilations().size();
+    } else if (!op->get_pads_begin().empty()) {
+        num_spatial = op->get_pads_begin().size();
+    } else if (!op->get_pads_end().empty()) {
+        num_spatial = op->get_pads_end().size();
+    } else {
+        num_spatial = num_spatial_undefined;
+    }
+
+    return num_spatial;
+}
+
+template <class TOp,
+          class TShape,
+          typename std::enable_if<std::is_base_of<util::ConvolutionFwdPropBase, TOp>::value>::type* = nullptr>
+size_t calculate_num_spatial(const TOp* op, const std::vector<TShape>& input_shapes) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() > 1);
+    auto num_spatial = get_num_spatial(op);
+
+    if (num_spatial == num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+        num_spatial = util::num_spatial_from_shapes(data_shape, filters_shape, filter_non_spatial_dims_count<TOp>());
+    }
+
+    if (num_spatial == num_spatial_undefined) {
+        num_spatial = num_spatial_from_attr(op);
+    }
+
+    return num_spatial;
+}
+
+/**
+ * @brief Apply auto padding for forward convolution.
+ *
+ * The auto padding can be applied only if inputs and attributes of operator are validated.
+ * The input shapes must have got static ranks.
+ *
+ * @param op             Pointer to convolution operator.
+ * @param data_shape     Input data shape (must be static rank).
+ * @param filters_shape  Input filter shape (must be static rank).
+ * @param pads_begin     Iterator to begin of pads begin.
+ * @param pads_end       Iterator to begin of pads end.
+ */
+template <class TOp,
+          class TShape,
+          class TIter,
+          typename std::enable_if<std::is_base_of<util::ConvolutionFwdPropBase, TOp>::value ||
+                                  std::is_base_of<util::DeformableConvolutionBase, TOp>::value>::type* = nullptr>
+void apply_auto_pad(const TOp* op,
+                    const TShape& data_shape,
+                    const TShape& filters_shape,
+                    TIter pads_begin,
+                    TIter pads_end) {
+    const auto& dilations = op->get_dilations();
+    const auto& strides = op->get_strides();
+
+    const auto num_spatial = strides.size();
+    auto data_dim = data_shape.cend() - num_spatial;
+    auto kernel_dim = filters_shape.cend() - num_spatial;
+
+    const auto padding_swap = op->get_auto_pad() == PadType::SAME_UPPER;
+    auto& pad_b = padding_swap ? pads_begin : pads_end;
+    auto& pad_e = padding_swap ? pads_end : pads_begin;
+
+    for (size_t i = 0; i < num_spatial; ++i, ++pad_b, ++pad_e, ++data_dim, ++kernel_dim) {
+        using namespace ov::util;
+        if (kernel_dim->is_static()) {
+            std::tie(*pad_b, *pad_e) = dim::padding(*data_dim, kernel_dim->get_length(), dilations[i], strides[i]);
+        } else {
+            *pad_b = 0;
+            *pad_e = 0;
+        }
+    }
+}
+
+/**
+ * @brief Apply padding to forward propagation convolution besed on padding.
+ *
+ * @tparam TShape
+ *
+ * @param op            Pointer to coevolution operator.
+ * @param data_shape    Input data shapes for shape inference.
+ * @param filters_shape Input filters shape for shape inference.
+ * @param pads_begin    Begin padding to updated.
+ * @param pads_end      End padding to update.
+ */
+template <class TOp,
+          class TShape,
+          typename std::enable_if<std::is_base_of<util::ConvolutionFwdPropBase, TOp>::value ||
+                                  std::is_base_of<util::DeformableConvolutionBase, TOp>::value>::type* = nullptr>
+void apply_padding(const TOp* op,
+                   const TShape& data_shape,
+                   const TShape& filters_shape,
+                   CoordinateDiff& pads_begin,
+                   CoordinateDiff& pads_end) {
+    if (convolution::is_auto_pad(op) && data_shape.rank().is_static() && filters_shape.rank().is_static()) {
+        convolution::apply_auto_pad(op, data_shape, filters_shape, pads_begin.begin(), pads_end.begin());
+    } else if (op->get_auto_pad() == op::PadType::VALID) {
+        std::fill(pads_begin.begin(), pads_begin.end(), 0);
+        std::fill(pads_end.begin(), pads_end.end(), 0);
+    } else if (op->get_auto_pad() == op::PadType::EXPLICIT) {
+        std::copy(op->get_pads_begin().begin(), op->get_pads_begin().end(), pads_begin.begin());
+        std::copy(op->get_pads_end().begin(), op->get_pads_end().end(), pads_end.begin());
+    }
+}
+
+/**
+ * @brief Append spatial dimension at end of output shape of forward propagation convolution.
+ *
+ * @tparam TOp           Forward propagation convolution operator type.
+ * @tparam TShape        Type of shape.
+ * @param op             Pointer to operator.
+ * @param data_shape     Input data shape.
+ * @param filters_shape  Input filter shape.
+ * @param out_shape      Output shape to append spatial dimensions.
+ */
+template <class TOp,
+          class TShape,
+          typename std::enable_if<std::is_base_of<util::ConvolutionFwdPropBase, TOp>::value ||
+                                  std::is_base_of<util::DeformableConvolutionBase, TOp>::value>::type* = nullptr>
+void append_spatial_shape(const TOp* op,
+                          const TShape& data_shape,
+                          const TShape& filters_shape,
+                          CoordinateDiff& pads_begin,
+                          CoordinateDiff& pads_end,
+                          TShape& out_shape) {
+    using namespace ov::util;
+    using TDim = typename TShape::value_type;
+
+    const auto& strides = op->get_strides();
+    const auto spatial_num = strides.size();
+
+    const auto& d_shape = data_shape.rank().is_static() ? data_shape : PartialShape::dynamic(spatial_num);
+    auto data_dim = d_shape.cend() - spatial_num;
+
+    if (is_auto_pad(op)) {
+        std::transform(data_dim, d_shape.cend(), strides.cbegin(), std::back_inserter(out_shape), &dim::ceil_div<TDim>);
+    } else {
+        const auto& f_shape = filters_shape.rank().is_static() ? filters_shape : PartialShape::dynamic(spatial_num);
+        auto filters_dim = f_shape.cend() - spatial_num;
+        const auto& dilations = op->get_dilations();
+
+        for (size_t i = 0; i < spatial_num; ++i, ++data_dim, ++filters_dim) {
+            auto dim = *data_dim + (pads_begin[i] + pads_end[i]);
+            const auto filter_dilated = dim::dilated(*filters_dim, dilations[i]);
+
+            if (dim.is_static() && filter_dilated.is_static()) {
+                // Use check from pooling op as it is same.
+                pooling::valid_dilated_kernel_with_dim(op, filter_dilated.get_length(), dim, i);
+            }
+
+            dim = dim::floor_div(dim - filter_dilated, strides[i]);
+            dim += 1;
+            out_shape.push_back(std::move(dim));
+        }
+    }
+}
+
+namespace validate {
+template <class TShape>
+void data_shape(const ov::op::util::ConvolutionBase* op, const TShape& data_shape) {
+    NODE_VALIDATION_CHECK(op,
+                          is_rank_compatible_any_of(data_shape.rank(), {3, 4, 5}),
+                          "Expected a 3D, 4D or 5D tensor for the input. Got: ",
+                          data_shape);
+}
+
+template <class TShape>
+void filter_shape(const ov::op::util::ConvolutionBase* op, const TShape& filters_shape, const TShape& data_shape) {
+    const auto& data_rank = data_shape.rank();
+    const auto& filters_rank = filters_shape.rank();
+
+    NODE_VALIDATION_CHECK(op,
+                          data_rank.compatible(filters_rank),
+                          "Data batch and filters rank do not match (data batch shape: ",
+                          data_shape,
+                          ", filters shape: ",
+                          filters_shape,
+                          ").");
+
+    NODE_VALIDATION_CHECK(
+        op,
+        data_rank.is_dynamic() || filters_rank.is_dynamic() || data_shape[1].compatible(filters_shape[1]),
+        "Data batch channel count (",
+        data_shape[1],
+        ") does not match filter input channel count (",
+        filters_shape[1],
+        ").");
+}
+
+inline void common_attributes(const util::ConvolutionBase* op,
+                              const size_t num_spatial,
+                              const CoordinateDiff& pads_begin,
+                              const CoordinateDiff& pads_end) {
+    auto& strides = op->get_strides();
+    auto& dilations = op->get_dilations();
+
+    NODE_VALIDATION_CHECK(op,
+                          strides.size() == num_spatial,
+                          "Strides should be defined for all and only spatial dimensions.");
+    NODE_VALIDATION_CHECK(op,
+                          dilations.size() == num_spatial,
+                          "Dilations should be defined for all and only spatial dimensions.");
+    NODE_VALIDATION_CHECK(op,
+                          pads_begin.size() == num_spatial && pads_end.size() == pads_begin.size(),
+                          "Pads begin and end should be defined for all and only spatial dimensions.");
+
+    constexpr auto is_zero = cmp::Equal<size_t>(0);
+    NODE_VALIDATION_CHECK(op,
+                          std::none_of(strides.cbegin(), strides.cend(), is_zero),
+                          "Strides has zero dimension(s). ",
+                          strides);
+    NODE_VALIDATION_CHECK(op,
+                          std::none_of(dilations.cbegin(), dilations.cend(), is_zero),
+                          "Filter dilations has zero dimension(s). ",
+                          dilations);
+}
+
+inline void common_attributes(const util::ConvolutionBackPropBase* op,
+                              const size_t num_spatial,
+                              const CoordinateDiff& pads_begin,
+                              const CoordinateDiff& pads_end) {
+    common_attributes(static_cast<const util::ConvolutionBase*>(op), num_spatial, pads_begin, pads_end);
+    NODE_VALIDATION_CHECK(op,
+                          op->get_output_padding().size() == num_spatial,
+                          "Output padding should be defined for all and only spatial dimensions.");
+}
+}  // namespace validate
+}  // namespace convolution
+
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp b/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp
new file mode 100644
index 00000000000000..d78924ca615ab8
--- /dev/null
+++ b/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp
@@ -0,0 +1,246 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <array>
+
+#include "convolution_shape_inference_util.hpp"
+#include "openvino/op/util/deformable_convolution_base.hpp"
+
+namespace ov {
+namespace op {
+namespace deformable_conv {
+template <class TShape>
+size_t calculate_num_spatial(const util::DeformableConvolutionBase* op, const std::vector<TShape>& input_shapes) {
+    constexpr auto non_spatial_count = convolution::filter_non_spatial_dims_count<util::DeformableConvolutionBase>();
+
+    auto num_spatial = util::num_spatial_from_shapes(input_shapes[0], input_shapes[2], non_spatial_count);
+
+    if (num_spatial == convolution::num_spatial_undefined && input_shapes[1].rank().is_static()) {
+        num_spatial = input_shapes[1].size() - non_spatial_count;
+    }
+
+    return num_spatial;
+}
+
+namespace validate {
+template <class TDeformableConv, class TShape>
+void input_shape(const TDeformableConv* op, const TShape& shape, const std::string& name) {
+    const auto& shape_rank = shape.rank();
+    NODE_VALIDATION_CHECK(op, shape_rank.compatible(4), name, " must be of rank 4. Got: ", shape_rank);
+}
+
+template <class TDeformableConv>
+void group_attribute(const TDeformableConv* op, int64_t group, const std::string& name) {
+    NODE_VALIDATION_CHECK(op, group > 0, "Attribute '", name, "' must be any value starting from 1. Got: ", group);
+}
+
+template <class TDeformableConv, class TDim>
+void group_divisible_dimension(const TDeformableConv* op, const TDim& dim, const std::string name) {
+    const auto group = op->get_group();
+    NODE_VALIDATION_CHECK(op,
+                          ov::util::dim::is_divisible(dim, group),
+                          name,
+                          " channels dimension (",
+                          dim,
+                          ") must be evenly divisible by the 'group': ",
+                          group);
+}
+
+template <class TDeformableConv, class TDim>
+void deformable_group_divisible_dimension(const TDeformableConv* op, const TDim& dim, const std::string name) {
+    const auto group = op->get_deformable_group();
+    NODE_VALIDATION_CHECK(op,
+                          ov::util::dim::is_divisible(dim, group),
+                          name,
+                          " channels dimension (",
+                          dim,
+                          ") must be evenly divisible by the 'deformable group': ",
+                          group);
+}
+}  // namespace validate
+}  // namespace deformable_conv
+
+namespace util {
+template <class TShape>
+std::vector<TShape> shape_infer(const DeformableConvolutionBase* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    static constexpr std::array<const char*, 4> names{"Input", "Offsets", "Filters", "Mask"};
+    using namespace ov::util;
+    using TDim = typename TShape::value_type;
+
+    const auto num_spatial = deformable_conv::calculate_num_spatial(op, input_shapes);
+
+    TShape output_shape;
+    if (num_spatial != convolution::num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& offsets_shape = input_shapes[1];
+        const auto& filters_shape = input_shapes[2];
+
+        const auto data_rank = data_shape.rank();
+        const auto filters_rank = filters_shape.rank();
+        const auto offsets_rank = offsets_shape.rank();
+
+        output_shape.reserve(num_spatial + util::spatial_dim_offset);
+
+        convolution::resize_empty_padding(num_spatial, pads_begin, pads_end);
+        for (size_t i = 0; i < input_shapes.size(); ++i) {
+            deformable_conv::validate::input_shape(op, input_shapes[i], names[i]);
+        }
+        deformable_conv::validate::group_attribute(op, op->get_group(), "group");
+        deformable_conv::validate::group_attribute(op, op->get_deformable_group(), "deformable group");
+        convolution::validate::common_attributes(op, num_spatial, pads_begin, pads_end);
+        convolution::apply_padding(op, data_shape, filters_shape, pads_begin, pads_end);
+
+        // add to output shape number of batches
+        if (data_rank.is_static()) {
+            deformable_conv::validate::group_divisible_dimension(op, data_shape[1], names[0]);
+
+            output_shape.push_back(data_shape[0]);
+        } else {
+            output_shape.emplace_back(dim::inf_bound);
+        }
+        if (offsets_rank.is_static()) {
+            if (filters_rank.is_static()) {
+                auto offsets_channels = filters_shape[2] * filters_shape[3] * 2 * op->get_deformable_group();
+
+                NODE_VALIDATION_CHECK(op,
+                                      offsets_shape[1].compatible(offsets_channels),
+                                      "The channels dimension of offsets input is not compatible with filters and "
+                                      "'deformable group' attribute. Offsets input shape: ",
+                                      offsets_shape,
+                                      ", deformable 'group' attribute value: ",
+                                      op->get_deformable_group(),
+                                      ", filters shape: ",
+                                      filters_shape);
+            }
+            deformable_conv::validate::deformable_group_divisible_dimension(op, offsets_shape[1], names[1]);
+
+            NODE_VALIDATION_CHECK(op,
+                                  TDim::merge(output_shape[0], offsets_shape[0], output_shape[0]),
+                                  "Data batch and offsets batch dimension must be same value. Got: ",
+                                  output_shape[0],
+                                  " and ",
+                                  data_shape[0]);
+        }
+
+        // add to output shape number output channels
+        if (filters_rank.is_static()) {
+            deformable_conv::validate::group_divisible_dimension(op, filters_shape[0], names[2]);
+
+            NODE_VALIDATION_CHECK(
+                op,
+                data_rank.is_dynamic() || data_shape[1].compatible(filters_shape[1] * op->get_group()),
+                "Data batch channel count (",
+                data_shape[1],
+                ") does not match filter input channel count (",
+                filters_shape[1] * op->get_group(),
+                ")");
+
+            output_shape.push_back(filters_shape[0]);
+        } else {
+            output_shape.emplace_back(dim::inf_bound);
+        }
+        convolution::append_spatial_shape(op, data_shape, filters_shape, pads_begin, pads_end, output_shape);
+
+        // post infer check.
+        if (offsets_rank.is_static()) {
+            auto offset_dim = offsets_shape.begin() + util::spatial_dim_offset;
+            NODE_VALIDATION_CHECK(op,
+                                  std::all_of(output_shape.begin() + util::spatial_dim_offset,
+                                              output_shape.end(),
+                                              [&offset_dim](const TDim& d) {
+                                                  return d.compatible(*offset_dim++);
+                                              }),
+                                  "Spatial dimensions of offsets and output must be compatible.",
+                                  output_shape);
+        }
+    } else {
+        output_shape = PartialShape::dynamic();
+    }
+
+    return {output_shape};
+}
+}  // namespace util
+
+namespace v1 {
+template <class TShape>
+std::vector<TShape> shape_infer(const DeformableConvolution* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 3);
+    return util::shape_infer(op, input_shapes, pads_begin, pads_end, constant_data);
+}
+}  // namespace v1
+
+namespace v8 {
+template <class TShape>
+std::vector<TShape> shape_infer(const DeformableConvolution* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    const auto has_mask_shape = input_shapes.size() == 4;
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 3 || has_mask_shape);
+    using TDim = typename TShape::value_type;
+
+    const auto& data_shape = input_shapes[0];
+    const auto& offsets_shape = input_shapes[1];
+    const auto& filters_shape = input_shapes[2];
+
+    const auto data_rank = data_shape.rank();
+    const auto filters_rank = filters_shape.rank();
+    const auto offsets_rank = offsets_shape.rank();
+
+    if (has_mask_shape) {
+        const auto mask_shape = input_shapes[3];
+        if (mask_shape.rank().is_static()) {
+            if (filters_rank.is_static()) {
+                auto offsets_channels = filters_shape[2] * filters_shape[3] * op->get_deformable_group();
+
+                NODE_VALIDATION_CHECK(op,
+                                      mask_shape[1].compatible(offsets_channels),
+                                      "The channels dimension of mask input is not "
+                                      "compatible with filters and 'deformable group' attribute. "
+                                      "Mask input shape: ",
+                                      mask_shape,
+                                      ", deformable 'group' attribute value: ",
+                                      op->get_deformable_group(),
+                                      ", filters shape: ",
+                                      filters_shape);
+            }
+
+            deformable_conv::validate::deformable_group_divisible_dimension(op, mask_shape[1], "Mask");
+
+            NODE_VALIDATION_CHECK(op,
+                                  data_rank.is_dynamic() || mask_shape[0].compatible(data_shape[0]),
+                                  "Data batch and mask batch dimension must be same value. Got: ",
+                                  mask_shape[0],
+                                  " and ",
+                                  data_shape[0]);
+        }
+    }
+
+    auto output_shapes = util::shape_infer(op, input_shapes, pads_begin, pads_end, constant_data);
+    // post infer checks
+    if (has_mask_shape && input_shapes[3].rank().is_static() && output_shapes[0].rank().is_static()) {
+        auto mask_dim = input_shapes[3].begin() + util::spatial_dim_offset;
+        NODE_VALIDATION_CHECK(op,
+                              std::all_of(output_shapes[0].begin() + util::spatial_dim_offset,
+                                          output_shapes[0].end(),
+                                          [&mask_dim](const TDim& d) {
+                                              return d.compatible(*mask_dim++);
+                                          }),
+                              "Spatial dimensions of mask and output must be compatible.");
+    }
+    return output_shapes;
+}
+}  // namespace v8
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/dimension_util.hpp b/src/core/shape_inference/include/dimension_util.hpp
index b3ffb07b7ec8c0..516a0aa13b190b 100644
--- a/src/core/shape_inference/include/dimension_util.hpp
+++ b/src/core/shape_inference/include/dimension_util.hpp
@@ -5,6 +5,7 @@
 
 #include <cstdint>
 
+#include "openvino/core/dimension.hpp"
 #include "openvino/util/common_util.hpp"
 
 namespace ov {
@@ -25,17 +26,82 @@ constexpr auto dilated(const T dim, const T dilation) -> T {
     return (dim < 1) ? inf_bound : dilation * (dim - 1) + 1;
 }
 
+/**
+ * @brief Calculate dilated dimension.
+ *
+ * @tparam TDim     Dimension type.
+ * @param dim       Dimension.
+ * @param dilation  Dilation value.
+ * @return Return dimension after dilation.
+ */
+template <class TDim>
+constexpr auto dilated(const TDim& dim, const typename TDim::value_type dilation) -> TDim {
+    return (dim - 1) * dilation + 1;
+}
+
 /**
  * @brief Calculate padded dimension size as dim size + padding size
  *
+ * @tparam TDim    Dimension type as dimension class value type or any arithmetic value.
  * @param dim      Dimension size value.
- * @param pad_num  Number of padded dimension.
+ * @param pad_num  Number of padding to add.
  * @return         Padded dimension value or infinite bound.
  */
-constexpr auto padded(const int64_t dim, const int64_t pad_num) -> int64_t {
+template <class TDim>
+constexpr typename std::enable_if<std::is_arithmetic<TDim>::value, TDim>::type padded(const TDim dim,
+                                                                                      const int64_t pad_num) {
     return ((dim == inf_bound) || (dim + pad_num < 0)) ? inf_bound : dim + pad_num;
 }
 
+/**
+ * @brief Calculate padded dimension size as dim + padding size
+ *
+ * @note the Dimension + operator cannot be used if padding is '-1' which result add dynamic dimension.
+ *
+ * @tparam TDim    Dimension type as dimension class.
+ * @param dim      Dimension.
+ * @param pad_num  Number padding to add.
+ * @return         Padded dimension.
+ */
+template <class TDim>
+typename std::enable_if<std::is_class<TDim>::value, TDim>::type padded(const TDim& dim, const int64_t pad_num) {
+    auto ub = padded(dim.get_max_length(), pad_num);
+    if (dim.is_static()) {
+        return {ub};
+    } else {
+        return {padded(dim.get_min_length(), pad_num), ub};
+    }
+}
+
+/**
+ * @brief Calculate dimension padding required by filter/kernel properties.
+ *
+ * Provides pair of padding values as left padding is total value of required padding divided by 2 and right as
+ * total required padding minus left padding.
+ *
+ * @param dim          input dimension to calculate its padding.
+ * @param filter_size  Kernel size for input dimension.
+ * @param dilation     Kernel dilation.
+ * @param stride       Kernel stride.
+ * @return Pair of left, right padding values for input dimension.
+ */
+template <class TDim, class T = typename TDim::value_type>
+inline std::pair<T, T> padding(const TDim& dim, const int64_t kernel_size, const int64_t dilation, int64_t stride) {
+    if (dim.is_static()) {
+        const auto dim_size = static_cast<int64_t>(dim.get_length());
+        const auto dilated_kernel = dilated(kernel_size, dilation);
+        const int64_t tmp = (dim_size + stride - 1) / stride;
+
+        const auto padding = std::max<int64_t>(0, (tmp - 1) * stride + dilated_kernel - dim_size);
+        const auto left_padding = padding / 2;
+        return {left_padding, padding - left_padding};
+    } else {
+        // If input dimension is infinite or interval the padding will be set to 0
+        // as operator cannot store paddings for both bounds.
+        return {0, 0};
+    }
+}
+
 /**
  * @brief Divide dimension using ceil rounding.
  *
@@ -46,8 +112,9 @@ constexpr auto padded(const int64_t dim, const int64_t pad_num) -> int64_t {
  * @param divisor  Dimension division.
  * @return Divided dimension with bounds round up.
  */
-template <class TDim, class T = typename TDim::value_type>
-auto ceil_div(const TDim& dim, const T divisor) -> TDim {
+template <class TDim>
+auto ceil_div(const TDim& dim, const typename TDim::value_type divisor) -> TDim {
+    using T = decltype(divisor);
     if (dim.is_static()) {
         return {util::ceil_div<T>(dim.get_length(), divisor)};
     } else if (dim.get_max_length() == static_cast<T>(dim::inf_bound)) {
@@ -67,8 +134,9 @@ auto ceil_div(const TDim& dim, const T divisor) -> TDim {
  * @param divisor  Dimension division.
  * @return Divided dimension with bound round down.
  */
-template <class TDim, class T = typename TDim::value_type>
-auto floor_div(const TDim& dim, const T divisor) -> TDim {
+template <class TDim>
+auto floor_div(const TDim& dim, const typename TDim::value_type divisor) -> TDim {
+    using T = decltype(divisor);
     if (dim.is_static()) {
         return {dim.get_length() / divisor};
     } else if (dim.get_max_length() == static_cast<T>(dim::inf_bound)) {
@@ -78,6 +146,24 @@ auto floor_div(const TDim& dim, const T divisor) -> TDim {
     }
 }
 
+/**
+ * @brief Check if dimension is evenly divisible.
+ *
+ * @tparam TDim     Dimension type.
+ * @param quotient  Dimension to check.
+ * @param dividend  Dividend to check.
+ * @return true if dimension is divisible other wise false.
+ */
+template <class TDim>
+bool is_divisible(const TDim& quotient, const typename TDim::value_type dividend) {
+    return quotient / dividend != TDim{};
+}
+
+template <>
+inline bool is_divisible<Dimension>(const Dimension& quotient, const typename Dimension::value_type dividend) {
+    return !(quotient / dividend).get_interval().empty();
+}
+
 }  // namespace dim
 }  // namespace util
 }  // namespace ov
diff --git a/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp b/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
new file mode 100644
index 00000000000000..1f4673b95de37a
--- /dev/null
+++ b/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include "convolution_backprop_shape_inference.hpp"
+#include "openvino/op/group_conv.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace op {
+namespace convolution {
+
+/**
+ * @brief Defines non-spatial dimension for filters for group convolution back propagation operator.
+ * @return Value of non-spatial filter dimensions (3).
+ */
+template <>
+constexpr size_t filter_non_spatial_dims_count<v1::GroupConvolutionBackpropData>() {
+    return 3;
+}
+}  // namespace convolution
+
+namespace v1 {
+template <class TShape>
+std::vector<TShape> shape_infer(const GroupConvolutionBackpropData* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    const auto inputs_count = input_shapes.size();
+    const auto has_spatial_shape = inputs_count == 3;
+    NODE_VALIDATION_CHECK(op, inputs_count == 2 || has_spatial_shape);
+    using namespace ov::util;
+
+    TShape out_spatial_shape;
+    if (has_spatial_shape) {
+        const auto& spatial_shape = input_shapes[2];
+        NODE_VALIDATION_CHECK(op,
+                              spatial_shape.rank().compatible(1),
+                              "Input delivering output shape must have rank 1.");
+
+        if (!get_data_as_shape(2, op, out_spatial_shape, constant_data)) {
+            if (spatial_shape.is_static()) {
+                out_spatial_shape.resize(spatial_shape[0].get_length());
+            } else {
+                out_spatial_shape = PartialShape::dynamic();
+            }
+        }
+    }
+
+    const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes, out_spatial_shape);
+
+    TShape output_shape;
+    if (num_spatial != util::num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+        const auto data_rank = data_shape.rank();
+        const auto filters_rank = filters_shape.rank();
+
+        NODE_VALIDATION_CHECK(
+            op,
+            !has_spatial_shape || out_spatial_shape.rank().is_dynamic() || out_spatial_shape.size() == num_spatial,
+            "Output shape should be defined for all and only spatial dimensions.");
+
+        convolution::resize_empty_padding(num_spatial, pads_begin, pads_end);
+        if (is_attr_validation_required(op)) {
+            convolution::validate::data_shape(op, data_shape);
+
+            NODE_VALIDATION_CHECK(op,
+                                  data_rank.compatible(filters_rank - 1),
+                                  "Data and filters rank do not match (data batch shape: ",
+                                  data_shape,
+                                  ", filters shape: ",
+                                  filters_shape,
+                                  ").");
+
+            convolution::validate::common_attributes(op, num_spatial, pads_begin, pads_end);
+        }
+        convolution::apply_padding(op, input_shapes, out_spatial_shape, pads_begin, pads_end);
+
+        output_shape.reserve(util::spatial_dim_offset + num_spatial);
+        output_shape.emplace_back(data_rank.is_static() ? data_shape[0] : dim::inf_bound);
+
+        // add groups dimension
+        if (filters_rank.is_static()) {
+            auto groups = filters_shape[0];
+
+            if (data_rank.is_static() && filters_shape[1].is_static()) {
+                NODE_VALIDATION_CHECK(
+                    op,
+                    groups.merge(groups, groups, (data_shape[1] / filters_shape[1].get_length())),
+                    "Input channels dimension of data batch is incompatible with filter groups or input channels.");
+            }
+
+            groups *= filters_shape[2];
+            output_shape.push_back(std::move(groups));
+        } else {
+            output_shape.emplace_back(dim::inf_bound);
+        }
+
+        // add spatial dimensions
+        if (has_spatial_shape) {
+            output_shape.insert(output_shape.end(),
+                                std::make_move_iterator(out_spatial_shape.begin()),
+                                std::make_move_iterator(out_spatial_shape.end()));
+        } else {
+            convolution::append_spatial_shape(op, data_shape, filters_shape, pads_begin, pads_end, output_shape);
+        }
+    } else {
+        output_shape = PartialShape::dynamic();
+    }
+
+    return {output_shape};
+}
+}  // namespace v1
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/group_convolution_shape_inference.hpp b/src/core/shape_inference/include/group_convolution_shape_inference.hpp
new file mode 100644
index 00000000000000..ea91dc117a7742
--- /dev/null
+++ b/src/core/shape_inference/include/group_convolution_shape_inference.hpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include "convolution_shape_inference_util.hpp"
+#include "openvino/op/group_conv.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace op {
+namespace convolution {
+
+/**
+ * @brief Defines non-spatial dimension for filters for group convolution operator.
+ * @return Value of non-spatial filter dimensions (3).
+ */
+template <>
+constexpr size_t filter_non_spatial_dims_count<v1::GroupConvolution>() {
+    return 3;
+}
+}  // namespace convolution
+
+namespace v1 {
+template <class TShape>
+std::vector<TShape> shape_infer(const GroupConvolution* op,
+                                const std::vector<TShape>& input_shapes,
+                                CoordinateDiff& pads_begin,
+                                CoordinateDiff& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
+    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    using namespace ov::util;
+
+    const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes);
+
+    TShape output_shape;
+    if (num_spatial != convolution::num_spatial_undefined) {
+        const auto& data_shape = input_shapes[0];
+        const auto& filters_shape = input_shapes[1];
+        const auto data_rank = data_shape.rank();
+        const auto filters_rank = filters_shape.rank();
+
+        convolution::resize_empty_padding(num_spatial, pads_begin, pads_end);
+        if (is_attr_validation_required(op)) {
+            convolution::validate::data_shape(op, data_shape);
+
+            NODE_VALIDATION_CHECK(op,
+                                  data_rank.compatible(filters_rank - 1),
+                                  "Data batch and filters rank do not match (data batch shape: ",
+                                  data_shape,
+                                  ", filters shape: ",
+                                  filters_shape,
+                                  ").");
+
+            convolution::validate::common_attributes(op, num_spatial, pads_begin, pads_end);
+        }
+        convolution::apply_padding(op, data_shape, filters_shape, pads_begin, pads_end);
+
+        output_shape.reserve(util::spatial_dim_offset + num_spatial);
+        output_shape.emplace_back(data_rank.is_static() ? data_shape[0] : dim::inf_bound);
+
+        if (filters_rank.is_static()) {
+            auto groups = filters_shape[0];
+
+            if (data_rank.is_static() && filters_shape[2].is_static()) {
+                NODE_VALIDATION_CHECK(
+                    op,
+                    groups.merge(groups, groups, (data_shape[1] / filters_shape[2].get_length())),
+                    "Input channels dimension of data batch is incompatible with filter groups or input channels.");
+            }
+
+            groups *= filters_shape[1];
+            output_shape.push_back(std::move(groups));
+        } else {
+            output_shape.emplace_back(dim::inf_bound);
+        }
+
+        convolution::append_spatial_shape(op, data_shape, filters_shape, pads_begin, pads_end, output_shape);
+    } else {
+        output_shape = PartialShape::dynamic();
+    }
+
+    return {output_shape};
+}
+}  // namespace v1
+}  // namespace op
+}  // namespace ov
diff --git a/src/core/shape_inference/include/max_pool_shape_inference.hpp b/src/core/shape_inference/include/max_pool_shape_inference.hpp
index 74dbd4962aecad..f6136016f6d523 100644
--- a/src/core/shape_inference/include/max_pool_shape_inference.hpp
+++ b/src/core/shape_inference/include/max_pool_shape_inference.hpp
@@ -43,8 +43,7 @@ std::vector<TShape> shape_infer(const MaxPool* op, const std::vector<TShape>& in
 
     pooling::update_and_validate_attributes(const_cast<MaxPool*>(op), data_shape, dilations);
 
-    auto output_shape = pooling::out_shape_infer(op, data_shape, dilations);
-    return {2, output_shape};
+    return {2, pooling::out_shape_infer(op, data_shape, dilations)};
 }
 
 template <class TShape>
diff --git a/src/core/shape_inference/include/pooling_shape_inference_util.hpp b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
index 8832f546f47e0e..17462a82edb6ab 100644
--- a/src/core/shape_inference/include/pooling_shape_inference_util.hpp
+++ b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
@@ -12,35 +12,6 @@ namespace op {
 namespace pooling {
 constexpr size_t spatial_dim_offset = 2;
 
-/**
- * @brief Calculate dimension padding required by filter/kernel properties.
- *
- * Provides pair of padding values as left padding is total value of required padding divided by 2 and right as
- * total required padding minus left padding.
- *
- * @param dim          input dimension to calculate its padding.
- * @param filter_size  Kernel size for input dimension.
- * @param dilation     Kernel dilation.
- * @param stride       Kernel stride.
- * @return Pair of left, right padding values for input dimension.
- */
-template <class TDim, class T = typename TDim::value_type>
-inline std::pair<T, T> dim_padding(const TDim& dim, const int64_t kernel_size, const int64_t dilation, int64_t stride) {
-    if (dim.is_static()) {
-        const auto dim_size = static_cast<int64_t>(dim.get_length());
-        const auto dilated_kernel = ov::util::dim::dilated(kernel_size, dilation);
-        const int64_t tmp = (dim_size + stride - 1) / stride;
-
-        const auto padding = std::max<int64_t>(0, (tmp - 1) * stride + dilated_kernel - dim_size);
-        const auto left_padding = padding / 2;
-        return {left_padding, padding - left_padding};
-    } else {
-        // If input dimension is infinite or interval the padding will be set to 0
-        // as operator cannot store paddings for both bounds.
-        return {0, 0};
-    }
-}
-
 template <class TOp, class TShape>
 void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Strides& dilations) {
     const auto& data_rank = data_shape.rank();
@@ -97,7 +68,8 @@ void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Str
             auto& pad_right = auto_pad == PadType::SAME_UPPER ? pad_end_ins : pad_begin_ins;
 
             for (size_t i = 0; i < num_spatial; ++i, ++pad_left, ++pad_right, ++data_dim) {
-                std::tie(*pad_left, *pad_right) = dim_padding(*data_dim, kernel[i], dilations[i], strides[i]);
+                using namespace ov::util;
+                std::tie(*pad_left, *pad_right) = dim::padding(*data_dim, kernel[i], dilations[i], strides[i]);
             }
 
             op->set_pads_begin(pads_begin);
diff --git a/src/core/src/op/binary_convolution.cpp b/src/core/src/op/binary_convolution.cpp
index d6e5c8955995e1..e3cf0ecea77b5a 100644
--- a/src/core/src/op/binary_convolution.cpp
+++ b/src/core/src/op/binary_convolution.cpp
@@ -2,15 +2,14 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/binary_convolution.hpp"
+#include "openvino/op/binary_convolution.hpp"
 
+#include "binary_convolution_shape_inference.hpp"
+#include "convolution_shape_inference.hpp"
 #include "itt.hpp"
-#include "ngraph/attribute_visitor.hpp"
-#include "ngraph/axis_vector.hpp"
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/op/reshape.hpp"
-#include "ngraph/util.hpp"
-#include "ngraph/validation_util.hpp"
+#include "openvino/core/attribute_visitor.hpp"
+#include "openvino/core/axis_vector.hpp"
+#include "openvino/core/coordinate_diff.hpp"
 
 using namespace std;
 
@@ -23,14 +22,9 @@ ov::op::v1::BinaryConvolution::BinaryConvolution(const Output<Node>& data,
                                                  BinaryConvolutionMode mode,
                                                  float pad_value,
                                                  const PadType& auto_pad)
-    : Op({data, kernel}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
+    : ConvolutionFwdPropBase({data, kernel}, strides, pads_begin, pads_end, dilations, auto_pad),
       m_mode(mode),
-      m_pad_value(pad_value),
-      m_auto_pad(auto_pad) {
+      m_pad_value(pad_value) {
     constructor_validate_and_infer_types();
 }
 
@@ -43,23 +37,16 @@ ov::op::v1::BinaryConvolution::BinaryConvolution(const Output<Node>& data,
                                                  const std::string& mode,
                                                  float pad_value,
                                                  const PadType& auto_pad)
-    : Op({data, kernel}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
+    : ConvolutionFwdPropBase({data, kernel}, strides, pads_begin, pads_end, dilations, auto_pad),
       m_mode(mode_from_string(mode)),
-      m_pad_value(pad_value),
-      m_auto_pad(auto_pad) {
+      m_pad_value(pad_value) {
     constructor_validate_and_infer_types();
 }
 
 void ov::op::v1::BinaryConvolution::validate_and_infer_types() {
     OV_OP_SCOPE(v1_BinaryConvolution_validate_and_infer_types);
-    const ov::PartialShape& data_batch_pshape = get_input_partial_shape(0);
-    element::Type data_batch_et = get_input_element_type(0);
-    const ov::PartialShape& filters_pshape = get_input_partial_shape(1);
 
+    const auto& data_batch_et = get_input_element_type(0);
     NODE_VALIDATION_CHECK(this,
                           data_batch_et.is_real() || data_batch_et.is_integral_number(),
                           "Data batch element type must be numeric. Got: ",
@@ -67,25 +54,15 @@ void ov::op::v1::BinaryConvolution::validate_and_infer_types() {
 
     // TODO: Add NodeValidationCheck to filters et once u1 is supported in nGraph Python API
     // (#52715)
+    const auto input_shapes = get_node_input_partial_shapes(*this);
 
-    Rank result_ps_rank;
-    NODE_VALIDATION_CHECK(this,
-                          Rank::merge(result_ps_rank, data_batch_pshape.rank(), filters_pshape.rank()),
-                          "Data batch and filters inputs must have same rank. Got: ",
-                          data_batch_pshape,
-                          " and ",
-                          filters_pshape);
+    auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
+    if (num_spatial != util::num_spatial_undefined) {
+        resize_attributes(num_spatial);
+    }
 
-    ov::PartialShape result_shape = ngraph::validate_and_infer_convolution_forward_output_shape(this,
-                                                                                                result_ps_rank,
-                                                                                                data_batch_pshape,
-                                                                                                filters_pshape,
-                                                                                                m_auto_pad,
-                                                                                                m_strides,
-                                                                                                m_dilations,
-                                                                                                m_pads_begin,
-                                                                                                m_pads_end);
-    set_output_type(0, data_batch_et, result_shape);
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
+    set_output_type(0, data_batch_et, output_shapes[0]);
 }
 
 shared_ptr<ov::Node> ov::op::v1::BinaryConvolution::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -116,11 +93,11 @@ bool ov::op::v1::BinaryConvolution::visit_attributes(AttributeVisitor& visitor)
 
 namespace ov {
 template <>
-NGRAPH_API EnumNames<ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode>&
-EnumNames<ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode>::get() {
-    static auto enum_names = EnumNames<ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode>(
+OPENVINO_API EnumNames<ov::op::v1::BinaryConvolution::BinaryConvolutionMode>&
+EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>::get() {
+    static auto enum_names = EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>(
         "op::v1::BinaryConvolution::BinaryConvolutionMode",
-        {{"xnor-popcount", ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT}});
+        {{"xnor-popcount", op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT}});
     return enum_names;
 }
 }  // namespace ov
diff --git a/src/core/src/op/convolution.cpp b/src/core/src/op/convolution.cpp
index e87b4bb9b13261..2fb28bb9f2fba7 100644
--- a/src/core/src/op/convolution.cpp
+++ b/src/core/src/op/convolution.cpp
@@ -2,21 +2,17 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/convolution.hpp"
-
-#include <convolution_shape_inference.hpp>
+#include "openvino/op/convolution.hpp"
 
 #include "bound_evaluate.hpp"
+#include "convolution_backprop_shape_inference.hpp"
+#include "convolution_shape_inference.hpp"
 #include "itt.hpp"
-#include "ngraph/axis_vector.hpp"
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/op/reshape.hpp"
-#include "ngraph/util.hpp"
 #include "openvino/op/util/precision_sensitive_attribute.hpp"
 
 using namespace std;
-using namespace ngraph;
 
+namespace ov {
 // *** Convolution OP SET 1 ***
 op::v1::Convolution::Convolution(const Output<Node>& data_batch,
                                  const Output<Node>& filters,
@@ -25,12 +21,7 @@ op::v1::Convolution::Convolution(const Output<Node>& data_batch,
                                  const CoordinateDiff& pads_end,
                                  const Strides& dilations,
                                  const PadType& auto_pad)
-    : Op({data_batch, filters}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad) {
+    : ConvolutionFwdPropBase({data_batch, filters}, strides, pads_begin, pads_end, dilations, auto_pad) {
     constructor_validate_and_infer_types();
 }
 
@@ -46,8 +37,8 @@ bool op::v1::Convolution::visit_attributes(AttributeVisitor& visitor) {
 
 void op::v1::Convolution::validate_and_infer_types() {
     OV_OP_SCOPE(v1_Convolution_validate_and_infer_types);
-    element::Type data_batch_et = get_input_element_type(0);
-    element::Type filters_et = get_input_element_type(1);
+    const auto& data_batch_et = get_input_element_type(0);
+    const auto& filters_et = get_input_element_type(1);
 
     element::Type result_et;
     NODE_VALIDATION_CHECK(this,
@@ -62,21 +53,17 @@ void op::v1::Convolution::validate_and_infer_types() {
                           result_et.is_real() || result_et.is_integral_number(),
                           "Element types must be numeric. Got: ",
                           result_et);
-    auto& data_shape = get_input_partial_shape(0);
-    auto& filter_shape = get_input_partial_shape(1);
-
-    m_num_spatial = calculate_num_spatial(this, data_shape, filter_shape, 2, 2);
-    update_and_validate_attributes(this, m_num_spatial);
 
-    std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
+    const auto input_shapes = get_node_input_partial_shapes(*this);
 
-    if (m_num_spatial != -1) {
-        resolve_auto_pad_for_shape(this, m_pads_begin, m_pads_end, input_shapes, 2, 2);
-        shape_infer(this, m_pads_begin, m_pads_end, input_shapes, output_shapes);
+    auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
+    if (num_spatial != util::num_spatial_undefined) {
+        resize_attributes(num_spatial);
     }
 
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
     set_output_type(0, result_et, output_shapes[0]);
+    set_num_spatial(num_spatial, input_shapes);
 }
 
 shared_ptr<Node> op::v1::Convolution::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -101,13 +88,13 @@ op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& dat
                                                          const Strides& dilations,
                                                          const PadType& auto_pad,
                                                          const CoordinateDiff& output_padding)
-    : Op({data, filters, output_shape}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad),
-      m_output_padding(output_padding) {
+    : ConvolutionBackPropBase({data, filters, output_shape},
+                              strides,
+                              pads_begin,
+                              pads_end,
+                              dilations,
+                              auto_pad,
+                              output_padding) {
     ov::mark_as_precision_sensitive(input(2));
     constructor_validate_and_infer_types();
 }
@@ -131,44 +118,45 @@ op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& dat
                                                          const Strides& dilations,
                                                          const PadType& auto_pad,
                                                          const CoordinateDiff& output_padding)
-    : Op({data, filters}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad),
-      m_output_padding(output_padding) {
+    : ConvolutionBackPropBase({data, filters}, strides, pads_begin, pads_end, dilations, auto_pad, output_padding) {
     constructor_validate_and_infer_types();
 }
 
 bool op::v1::ConvolutionBackpropData::is_dynamic() const {
-    bool is_dynamic = Node::is_dynamic();
-    if (inputs().size() == 3 && !is_dynamic) {
-        return !has_and_set_equal_bounds(input_value(2));
-    }
-    return is_dynamic;
+    return Node::is_dynamic() || (get_input_size() == 3 && !has_and_set_equal_bounds(input_value(2)));
 }
 
 const ov::PartialShape op::v1::ConvolutionBackpropData::get_output_shape() const {
-    ov::PartialShape shape;
-    if (get_input_size() == 3 && evaluate_as_partial_shape(input_value(2), shape))
-        return shape;
-
-    auto data_pshape = get_input_partial_shape(0);
-    auto filter_pshape = get_input_partial_shape(1);
-
-    if (data_pshape.rank().is_static())
-        shape = ov::PartialShape::dynamic(data_pshape.rank().get_length() - 2);
-    else if (filter_pshape.rank().is_static())
-        shape = ov::PartialShape::dynamic(filter_pshape.rank().get_length() - 2);
-    else
-        shape = ov::PartialShape::dynamic();
+    auto shape = PartialShape::dynamic();
+
+    if (get_input_size() < 3 || !evaluate_as_partial_shape(input_value(2), shape)) {
+        const auto& data_rank = get_input_partial_shape(0).rank();
+        const auto& filter_rank = get_input_partial_shape(1).rank();
+
+        if (data_rank.is_static()) {
+            shape.resize(data_rank.get_length() - convolution::spatial_dim_offset);
+        } else if (filter_rank.is_static()) {
+            shape.resize(filter_rank.get_length() - convolution::spatial_dim_offset);
+        } else if (get_input_size() == 3) {
+            const auto& out_spatial_shape = get_input_partial_shape(2);
+            if (out_spatial_shape.is_static()) {
+                shape.resize(out_spatial_shape[0].get_length());
+            }
+        }
+    }
+
     return shape;
 }
 
 void op::v1::ConvolutionBackpropData::set_output_shape(const ov::Shape& shape) {
-    this->input(2).replace_source_output(
-        op::v0::Constant::create(this->get_input_element_type(2), ov::Shape{shape.size()}, shape)->output(0));
+    element::Type_t et = (get_input_size() == 3) ? get_input_element_type(2) : element::i64;
+    if (get_input_size() == 0) {
+        // Add dummy inputs when adding output shape and op has no inputs at all.
+        auto dummy = std::make_shared<v0::Constant>(et, ov::Shape{0});
+        set_argument(0, dummy);
+        set_argument(1, dummy);
+    }
+    set_argument(2, v0::Constant::create(et, Shape{shape.size()}, shape));
 }
 
 void op::v1::ConvolutionBackpropData::infer_conv_backprop_output_spatial_shape(
@@ -200,8 +188,8 @@ void op::v1::ConvolutionBackpropData::infer_conv_backprop_output_spatial_shape(
 
 void op::v1::ConvolutionBackpropData::validate_and_infer_types() {
     OV_OP_SCOPE(v1_ConvolutionBackpropData_validate_and_infer_types);
-    element::Type delta_et = get_input_element_type(0);
-    element::Type filters_et = get_input_element_type(1);
+    const auto& delta_et = get_input_element_type(0);
+    const auto& filters_et = get_input_element_type(1);
 
     element::Type result_et;
     NODE_VALIDATION_CHECK(this,
@@ -217,9 +205,8 @@ void op::v1::ConvolutionBackpropData::validate_and_infer_types() {
                           "Element type of inputs must be numeric. Got: ",
                           result_et);
 
-    bool is_output_shape_present = inputs().size() == 3;
-    if (is_output_shape_present) {
-        const element::Type output_shape_et = get_input_element_type(2);
+    if (get_input_size() == 3) {
+        const auto& output_shape_et = get_input_element_type(2);
         NODE_VALIDATION_CHECK(this,
                               output_shape_et.is_integral_number(),
                               "Element type for output shape should be of integer type ",
@@ -228,26 +215,17 @@ void op::v1::ConvolutionBackpropData::validate_and_infer_types() {
                               ").");
     }
 
-    bool output_shape_input_present = get_input_size() == 3;
-
-    const auto& data_shape = get_input_partial_shape(0);
-    const auto& filter_shape = get_input_partial_shape(1);
-
-    auto& output_shapes_shape = output_shape_input_present ? get_input_partial_shape(2) : PartialShape::dynamic();
-    m_num_spatial = calculate_num_spatial(this, data_shape, filter_shape, output_shapes_shape, 2, 2);
-    update_and_validate_attributes_back_prop(this, m_num_spatial);
+    const auto input_shapes = get_node_input_partial_shapes(*this);
+    const auto out_spatial_shape = get_output_shape();
+    auto num_spatial = convolution::calculate_num_spatial(this, input_shapes, out_spatial_shape);
 
-    std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
-    if (output_shape_input_present)
-        input_shapes.push_back(get_input_partial_shape(2));
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
-
-    if (m_num_spatial != -1) {
-        ov::PartialShape output_spatial_shape = get_output_shape();
-        resolve_auto_pad_for_shape_back_prop(this, m_pads_begin, m_pads_end, input_shapes, output_spatial_shape, 2, 2);
-        shape_infer(this, m_pads_begin, m_pads_end, output_spatial_shape, input_shapes, output_shapes);
+    if (num_spatial != util::num_spatial_undefined) {
+        resize_attributes(num_spatial);
     }
+
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
     set_output_type(0, result_et, output_shapes[0]);
+    set_num_spatial(num_spatial, input_shapes);
 
     set_input_is_relevant_to_shape(0);
     set_input_is_relevant_to_shape(1);
@@ -277,3 +255,4 @@ shared_ptr<Node> op::v1::ConvolutionBackpropData::clone_with_new_inputs(const Ou
                                                         m_output_padding);
     }
 }
+}  // namespace ov
diff --git a/src/core/src/op/deformable_convolution.cpp b/src/core/src/op/deformable_convolution.cpp
index a4bd3b65904134..11fcd9a75cd453 100644
--- a/src/core/src/op/deformable_convolution.cpp
+++ b/src/core/src/op/deformable_convolution.cpp
@@ -2,18 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/deformable_convolution.hpp"
+#include "openvino/op/deformable_convolution.hpp"
 
+#include "deformable_convolution_shape_inference.hpp"
 #include "itt.hpp"
-#include "ngraph/axis_vector.hpp"
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/runtime/reference/deformable_convolution.hpp"
-#include "ngraph/util.hpp"
-#include "ngraph/validation_util.hpp"
 
 using namespace std;
-using namespace ngraph;
-
+namespace ov {
 op::v8::DeformableConvolution::DeformableConvolution(const Output<Node>& arg,
                                                      const Output<Node>& offsets,
                                                      const Output<Node>& filters,
@@ -70,74 +65,45 @@ bool op::v8::DeformableConvolution::visit_attributes(AttributeVisitor& visitor)
 void op::v8::DeformableConvolution::validate_and_infer_types() {
     OV_OP_SCOPE(DeformableConvolution_v8_validate_and_infer_types);
 
-    DeformableConvolutionBase::validate_and_infer_types();
-    if (inputs().size() == 4) {
-        const ov::PartialShape& data_pshape = get_input_partial_shape(0);
-        const ov::PartialShape& filters_pshape = get_input_partial_shape(2);
-        const ov::PartialShape& mask_pshape = get_input_partial_shape(3);
+    const auto& data_batch_et = get_input_element_type(0);
+    const auto& offsets_et = get_input_element_type(1);
+    const auto& filters_et = get_input_element_type(2);
+
+    element::Type result_et;
+    NODE_VALIDATION_CHECK(this,
+                          element::Type::merge(result_et, data_batch_et, offsets_et) &&
+                              element::Type::merge(result_et, result_et, filters_et),
+                          "Element types of inputs do not match. Got: data batch (",
+                          data_batch_et,
+                          "), offsets (",
+                          offsets_et,
+                          ") and filters (",
+                          filters_et,
+                          ")");
+
+    NODE_VALIDATION_CHECK(this,
+                          result_et.is_real() || result_et.is_integral_number(),
+                          "Element type of inputs must be numeric. Got: ",
+                          result_et);
+
+    if (get_input_size() == 4) {
         element::Type mask_et = get_input_element_type(3);
 
         NODE_VALIDATION_CHECK(this,
                               mask_et.is_real() || mask_et.is_integral_number(),
                               "Element type of Mask input must be numeric. Got: ",
                               mask_et);
+    }
 
-        NODE_VALIDATION_CHECK(this,
-                              mask_pshape.rank().compatible(4),
-                              "Mask input must be of rank 4. Got: ",
-                              mask_pshape.rank());
-
-        if (mask_pshape.rank().is_static() && mask_pshape[1].is_static()) {
-            if (filters_pshape.rank().is_static() && filters_pshape[2].is_static() && filters_pshape[3].is_static()) {
-                auto offsets_channels =
-                    m_deformable_group * filters_pshape[2].get_length() * filters_pshape[3].get_length();
-                NODE_VALIDATION_CHECK(this,
-                                      mask_pshape[1].get_length() == offsets_channels,
-                                      "The channels dimension of mask input is not "
-                                      "compatible with filters and 'deformable group' attribute. "
-                                      "Mask input shape: ",
-                                      mask_pshape,
-                                      ", deformable 'group' attribute value: ",
-                                      m_deformable_group,
-                                      ", filters shape: ",
-                                      filters_pshape);
-            }
-            // At least we can check if mask channels is evenly divisible by deformable
-            // group attribute
-            NODE_VALIDATION_CHECK(this,
-                                  mask_pshape[1].get_length() % m_deformable_group == 0,
-                                  "The channels dimension of mask input must be "
-                                  "evenly divisible by the 'deformable group' value along the "
-                                  "channels axis. Offsets input shape: ",
-                                  mask_pshape,
-                                  ", 'deformable group' attribute value: ",
-                                  m_deformable_group);
-
-            if (data_pshape.rank().is_static()) {
-                NODE_VALIDATION_CHECK(this,
-                                      mask_pshape[0].compatible(data_pshape[0]),
-                                      "Data batch and mask batch dimension must be same value. Got: ",
-                                      mask_pshape[0],
-                                      " and ",
-                                      data_pshape[0]);
-            }
-        }
-
-        ov::PartialShape result_pshape = get_output_partial_shape(0);
-        if (result_pshape.rank().is_static() && mask_pshape.rank().is_static()) {
-            NODE_VALIDATION_CHECK(
-                this,
-                result_pshape[2].compatible(mask_pshape[2]) && result_pshape[3].compatible(mask_pshape[3]),
-                "Spatial dimensions of mask and output must be equal. Got: ",
-                mask_pshape[2],
-                ", ",
-                mask_pshape[3],
-                " and ",
-                result_pshape[2],
-                ", ",
-                result_pshape[3]);
-        }
+    const auto input_shapes = get_node_input_partial_shapes(*this);
+
+    auto num_spatial = deformable_conv::calculate_num_spatial(this, input_shapes);
+    if (num_spatial != convolution::num_spatial_undefined) {
+        resize_attributes(num_spatial);
     }
+
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
+    set_output_type(0, result_et, output_shapes[0]);
 }
 
 std::shared_ptr<Node> op::v8::DeformableConvolution::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -208,3 +174,39 @@ std::shared_ptr<Node> op::v1::DeformableConvolution::clone_with_new_inputs(const
                                                    m_group,
                                                    m_deformable_group);
 }
+
+void op::v1::DeformableConvolution::validate_and_infer_types() {
+    OV_OP_SCOPE(DeformableConvolution_v1_validate_and_infer_types);
+
+    const auto& data_batch_et = get_input_element_type(0);
+    const auto& offsets_et = get_input_element_type(1);
+    const auto& filters_et = get_input_element_type(2);
+
+    element::Type result_et;
+    NODE_VALIDATION_CHECK(this,
+                          element::Type::merge(result_et, data_batch_et, offsets_et) &&
+                              element::Type::merge(result_et, result_et, filters_et),
+                          "Element types of inputs do not match. Got: data batch (",
+                          data_batch_et,
+                          "), offsets (",
+                          offsets_et,
+                          ") and filters (",
+                          filters_et,
+                          ")");
+
+    NODE_VALIDATION_CHECK(this,
+                          result_et.is_real() || result_et.is_integral_number(),
+                          "Element type of inputs must be numeric. Got: ",
+                          result_et);
+
+    const auto input_shapes = get_node_input_partial_shapes(*this);
+
+    auto num_spatial = deformable_conv::calculate_num_spatial(this, input_shapes);
+    if (num_spatial != convolution::num_spatial_undefined) {
+        resize_attributes(num_spatial);
+    }
+
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
+    set_output_type(0, result_et, output_shapes[0]);
+}
+}  // namespace ov
diff --git a/src/core/src/op/group_conv.cpp b/src/core/src/op/group_conv.cpp
index 0945336c87595e..a2ec0a220fb45e 100644
--- a/src/core/src/op/group_conv.cpp
+++ b/src/core/src/op/group_conv.cpp
@@ -2,22 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/group_conv.hpp"
-
-#include <convolution_shape_inference.hpp>
+#include "openvino/op/group_conv.hpp"
 
 #include "bound_evaluate.hpp"
+#include "group_convolution_backprop_shape_inference.hpp"
+#include "group_convolution_shape_inference.hpp"
 #include "itt.hpp"
-#include "ngraph/attribute_visitor.hpp"
 #include "openvino/op/util/precision_sensitive_attribute.hpp"
 
 using namespace std;
-using namespace ngraph;
 
 //------------------------------------------------------------------------------
 //                        v1::GroupConvolution
 //------------------------------------------------------------------------------
-
+namespace ov {
 op::v1::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
                                            const Output<Node>& filters,
                                            const Strides& strides,
@@ -25,16 +23,11 @@ op::v1::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
                                            const CoordinateDiff& pads_end,
                                            const Strides& dilations,
                                            const PadType& auto_pad)
-    : Op({data_batch, filters}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad) {
+    : ConvolutionFwdPropBase({data_batch, filters}, strides, pads_begin, pads_end, dilations, auto_pad) {
     constructor_validate_and_infer_types();
 }
 
-bool ngraph::op::v1::GroupConvolution::visit_attributes(AttributeVisitor& visitor) {
+bool op::v1::GroupConvolution::visit_attributes(AttributeVisitor& visitor) {
     OV_OP_SCOPE(v1_GroupConvolution_visit_attributes);
     visitor.on_attribute("strides", m_strides);
     visitor.on_attribute("pads_begin", m_pads_begin);
@@ -46,8 +39,8 @@ bool ngraph::op::v1::GroupConvolution::visit_attributes(AttributeVisitor& visito
 
 void op::v1::GroupConvolution::validate_and_infer_types() {
     OV_OP_SCOPE(v1_GroupConvolution_validate_and_infer_types);
-    element::Type data_batch_et = get_input_element_type(0);
-    element::Type filters_et = get_input_element_type(1);
+    const auto& data_batch_et = get_input_element_type(0);
+    const auto& filters_et = get_input_element_type(1);
 
     element::Type result_et;
     NODE_VALIDATION_CHECK(this,
@@ -63,21 +56,16 @@ void op::v1::GroupConvolution::validate_and_infer_types() {
                           "Element type of inputs must be numeric. Got: ",
                           result_et);
 
-    auto& data_shape = get_input_partial_shape(0);
-    auto& filter_shape = get_input_partial_shape(1);
-
-    m_num_spatial = calculate_num_spatial(this, data_shape, filter_shape, 2, 3);
-    update_and_validate_attributes(this, m_num_spatial);
-
-    std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
+    const auto input_shapes = get_node_input_partial_shapes(*this);
 
-    if (m_num_spatial != -1) {
-        resolve_auto_pad_for_shape(this, m_pads_begin, m_pads_end, input_shapes, 2, 3);
-        shape_infer(this, m_pads_begin, m_pads_end, input_shapes, output_shapes);
+    auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
+    if (num_spatial != convolution::num_spatial_undefined) {
+        resize_attributes(num_spatial);
     }
 
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
     set_output_type(0, result_et, output_shapes[0]);
+    set_num_spatial(num_spatial, input_shapes);
 }
 
 shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -96,14 +84,7 @@ shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec
 //                        v1::GroupConvolutionBackpropData
 //------------------------------------------------------------------------------
 
-op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
-    : Op(),
-      m_strides(),
-      m_dilations(),
-      m_pads_begin(),
-      m_pads_end(),
-      m_auto_pad(),
-      m_output_padding() {}
+op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData() : ConvolutionBackPropBase() {}
 
 op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData(const Output<Node>& data,
                                                                    const Output<Node>& filters,
@@ -114,13 +95,13 @@ op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData(const Output<
                                                                    const Strides& dilations,
                                                                    const PadType& auto_pad,
                                                                    const CoordinateDiff& output_padding)
-    : Op({data, filters, output_shape}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad),
-      m_output_padding(output_padding) {
+    : ConvolutionBackPropBase({data, filters, output_shape},
+                              strides,
+                              pads_begin,
+                              pads_end,
+                              dilations,
+                              auto_pad,
+                              output_padding) {
     ov::mark_as_precision_sensitive(input(2));
     constructor_validate_and_infer_types();
 }
@@ -150,17 +131,11 @@ op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData(const Output<
                                                                    const Strides& dilations,
                                                                    const PadType& auto_pad,
                                                                    const CoordinateDiff& output_padding)
-    : Op({data, filters}),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad),
-      m_output_padding(output_padding) {
+    : ConvolutionBackPropBase({data, filters}, strides, pads_begin, pads_end, dilations, auto_pad, output_padding) {
     constructor_validate_and_infer_types();
 }
 
-bool ngraph::op::v1::GroupConvolutionBackpropData::visit_attributes(AttributeVisitor& visitor) {
+bool op::v1::GroupConvolutionBackpropData::visit_attributes(AttributeVisitor& visitor) {
     OV_OP_SCOPE(v1_GroupConvolutionBackpropData_visit_attributes);
     visitor.on_attribute("strides", m_strides);
     visitor.on_attribute("pads_begin", m_pads_begin);
@@ -172,33 +147,40 @@ bool ngraph::op::v1::GroupConvolutionBackpropData::visit_attributes(AttributeVis
 }
 
 bool op::v1::GroupConvolutionBackpropData::is_dynamic() const {
-    bool is_dynamic = Node::is_dynamic();
-    if (inputs().size() == 3 && !is_dynamic) {
-        return !has_and_set_equal_bounds(input_value(2));
-    }
-    return is_dynamic;
+    return Node::is_dynamic() || (get_input_size() == 3 && !has_and_set_equal_bounds(input_value(2)));
 }
 
 const ov::PartialShape op::v1::GroupConvolutionBackpropData::get_convolution_output_shape() const {
-    ov::PartialShape shape;
-    if (get_input_size() == 3 && evaluate_as_partial_shape(input_value(2), shape))
-        return shape;
-
-    auto data_pshape = get_input_partial_shape(0);
-    auto filter_pshape = get_input_partial_shape(1);
-
-    if (data_pshape.rank().is_static())
-        shape = ov::PartialShape::dynamic(data_pshape.rank().get_length() - 2);
-    else if (filter_pshape.rank().is_static())
-        shape = ov::PartialShape::dynamic(filter_pshape.rank().get_length() - 2);
-    else
-        shape = ov::PartialShape::dynamic();
+    auto shape = PartialShape::dynamic();
+
+    if (get_input_size() < 3 || !evaluate_as_partial_shape(input_value(2), shape)) {
+        const auto& data_rank = get_input_partial_shape(0).rank();
+        const auto& filter_rank = get_input_partial_shape(1).rank();
+
+        if (data_rank.is_static()) {
+            shape.resize(data_rank.get_length() - util::spatial_dim_offset);
+        } else if (filter_rank.is_static()) {
+            shape.resize(filter_rank.get_length() - util::spatial_dim_offset);
+        } else if (get_input_size() == 3) {
+            const auto& out_spatial_shape = get_input_partial_shape(2);
+            if (out_spatial_shape.is_static()) {
+                shape.resize(out_spatial_shape[0].get_length());
+            }
+        }
+    }
+
     return shape;
 }
 
 void op::v1::GroupConvolutionBackpropData::set_output_shape(const ov::Shape& shape) {
-    this->input(2).replace_source_output(
-        op::v0::Constant::create(this->get_input_element_type(2), ov::Shape{shape.size()}, shape)->output(0));
+    element::Type_t et = (get_input_size() == 3) ? get_input_element_type(2) : element::i64;
+    if (get_input_size() == 0) {
+        // Add dummy inputs when adding output shape and op has no inputs at all.
+        auto dummy = std::make_shared<v0::Constant>(et, ov::Shape{0});
+        set_argument(0, dummy);
+        set_argument(1, dummy);
+    }
+    set_argument(2, v0::Constant::create(et, Shape{shape.size()}, shape));
 }
 
 void op::v1::GroupConvolutionBackpropData::infer_conv_backprop_output_spatial_shape(
@@ -211,9 +193,9 @@ void op::v1::GroupConvolutionBackpropData::infer_conv_backprop_output_spatial_sh
     const CoordinateDiff& output_padding,
     vector<Dimension>& output_spatial_shape) {
     size_t num_spatial_dims = input_data_shape.size();
-    NGRAPH_CHECK(filters_shape.size() == num_spatial_dims && strides.size() == num_spatial_dims &&
-                 dilations.size() == num_spatial_dims && pads_begin.size() == num_spatial_dims &&
-                 pads_end.size() == num_spatial_dims && output_padding.size() == num_spatial_dims);
+    OPENVINO_ASSERT(filters_shape.size() == num_spatial_dims && strides.size() == num_spatial_dims &&
+                    dilations.size() == num_spatial_dims && pads_begin.size() == num_spatial_dims &&
+                    pads_end.size() == num_spatial_dims && output_padding.size() == num_spatial_dims);
 
     for (size_t i = 0; i < num_spatial_dims; ++i) {
         if (input_data_shape[i].is_static() && filters_shape[i].is_static()) {
@@ -229,8 +211,8 @@ void op::v1::GroupConvolutionBackpropData::infer_conv_backprop_output_spatial_sh
 
 void op::v1::GroupConvolutionBackpropData::validate_and_infer_types() {
     OV_OP_SCOPE(v1_GroupConvolutionBackpropData_validate_and_infer_types);
-    element::Type data_et = get_input_element_type(0);
-    element::Type filters_et = get_input_element_type(1);
+    const auto& data_et = get_input_element_type(0);
+    const auto& filters_et = get_input_element_type(1);
 
     element::Type result_et;
     NODE_VALIDATION_CHECK(this,
@@ -257,25 +239,17 @@ void op::v1::GroupConvolutionBackpropData::validate_and_infer_types() {
                               ").");
     }
 
-    const auto& data_shape = get_input_partial_shape(0);
-    const auto& filter_shape = get_input_partial_shape(1);
-
-    auto& output_shapes_shape = output_shape_input_present ? get_input_partial_shape(2) : PartialShape::dynamic();
+    const auto input_shapes = get_node_input_partial_shapes(*this);
+    const auto out_spatial_shape = get_convolution_output_shape();
+    auto num_spatial = convolution::calculate_num_spatial(this, input_shapes, out_spatial_shape);
 
-    m_num_spatial = calculate_num_spatial(this, data_shape, filter_shape, output_shapes_shape, 2, 3);
-    update_and_validate_attributes_back_prop(this, m_num_spatial);
-
-    std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
-    if (output_shape_input_present)
-        input_shapes.push_back(get_input_partial_shape(2));
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
-
-    if (m_num_spatial != -1) {
-        ov::PartialShape output_spatial_shape = get_convolution_output_shape();
-        resolve_auto_pad_for_shape_back_prop(this, m_pads_begin, m_pads_end, input_shapes, output_spatial_shape, 2, 3);
-        shape_infer(this, m_pads_begin, m_pads_end, output_spatial_shape, input_shapes, output_shapes);
+    if (num_spatial != util::num_spatial_undefined) {
+        resize_attributes(num_spatial);
     }
+
+    const auto output_shapes = shape_infer(this, input_shapes, m_pads_begin, m_pads_end);
     set_output_type(0, result_et, output_shapes[0]);
+    set_num_spatial(num_spatial, input_shapes);
 
     set_input_is_relevant_to_shape(0);
     set_input_is_relevant_to_shape(1);
@@ -305,3 +279,4 @@ shared_ptr<Node> op::v1::GroupConvolutionBackpropData::clone_with_new_inputs(con
                                                              m_output_padding);
     }
 }
+}  // namespace ov
diff --git a/src/core/src/op/util/deformable_convolution_base.cpp b/src/core/src/op/util/deformable_convolution_base.cpp
index 8e8515c8ccdf48..32be72c2efba9d 100644
--- a/src/core/src/op/util/deformable_convolution_base.cpp
+++ b/src/core/src/op/util/deformable_convolution_base.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/op/util/deformable_convolution_base.hpp"
+#include "openvino/op/util/deformable_convolution_base.hpp"
 
 #include "itt.hpp"
-#include "ngraph/axis_vector.hpp"
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/op/reshape.hpp"
-#include "ngraph/util.hpp"
-#include "ngraph/validation_util.hpp"
 
 using namespace std;
 
@@ -21,12 +16,7 @@ ov::op::util::DeformableConvolutionBase::DeformableConvolutionBase(const OutputV
                                                                    const PadType& auto_pad,
                                                                    const int64_t group,
                                                                    const int64_t deformable_group)
-    : Op(arguments),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_pads_begin(pads_begin),
-      m_pads_end(pads_end),
-      m_auto_pad(auto_pad),
+    : ConvolutionBase(arguments, strides, pads_begin, pads_end, dilations, auto_pad),
       m_group(group),
       m_deformable_group(deformable_group) {}
 
@@ -41,154 +31,3 @@ bool ov::op::util::DeformableConvolutionBase::visit_attributes(AttributeVisitor&
     visitor.on_attribute("deformable_group", m_deformable_group);
     return true;
 }
-
-void ov::op::util::DeformableConvolutionBase::validate_and_infer_types() {
-    OV_OP_SCOPE(util_DeformableConvolutionBase_validate_and_infer_types);
-    const PartialShape& data_batch_pshape = get_input_partial_shape(0);
-    const PartialShape& offsets_pshape = get_input_partial_shape(1);
-    const PartialShape& filters_pshape = get_input_partial_shape(2);
-
-    element::Type data_batch_et = get_input_element_type(0);
-    element::Type offsets_et = get_input_element_type(1);
-    element::Type filters_et = get_input_element_type(2);
-
-    element::Type result_et;
-    NODE_VALIDATION_CHECK(this,
-                          element::Type::merge(result_et, data_batch_et, offsets_et) &&
-                              element::Type::merge(result_et, result_et, filters_et),
-                          "Element types of inputs do not match. Got: data batch (",
-                          data_batch_et,
-                          "), offsets (",
-                          offsets_et,
-                          ") and filters (",
-                          filters_et,
-                          ")");
-
-    NODE_VALIDATION_CHECK(this,
-                          result_et.is_real() || result_et.is_integral_number(),
-                          "Element type of inputs must be numeric. Got: ",
-                          result_et);
-
-    Rank result_ps_rank{};
-    NODE_VALIDATION_CHECK(this,
-                          Rank::merge(result_ps_rank, data_batch_pshape.rank(), offsets_pshape.rank()) &&
-                              Rank::merge(result_ps_rank, result_ps_rank, filters_pshape.rank()),
-                          "Ranks of inputs do not match. Got: data batch shape ",
-                          data_batch_pshape,
-                          ", offsets shape ",
-                          offsets_pshape,
-                          ", filters shape ",
-                          filters_pshape);
-
-    NODE_VALIDATION_CHECK(this, result_ps_rank.compatible(4), "Inputs must be of rank 4. Got: ", result_ps_rank);
-
-    NODE_VALIDATION_CHECK(this, m_group > 0, "Attribute 'group' must be any value starting from 1. Got: ", m_group);
-
-    NODE_VALIDATION_CHECK(this,
-                          m_deformable_group > 0,
-                          "Attribute 'deformable group' must be any value starting from 1. Got: ",
-                          m_deformable_group);
-
-    if (offsets_pshape.rank().is_static()) {
-        if (offsets_pshape[1].is_static()) {
-            if (filters_pshape.rank().is_static() && filters_pshape[2].is_static() && filters_pshape[3].is_static()) {
-                auto offsets_channels =
-                    m_deformable_group * filters_pshape[2].get_length() * filters_pshape[3].get_length() * 2;
-                NODE_VALIDATION_CHECK(this,
-                                      offsets_pshape[1].get_length() == offsets_channels,
-                                      "The channels dimension of offsets input is not "
-                                      "compatible with filters and 'deformable group' attribute. "
-                                      "Offsets input shape: ",
-                                      offsets_pshape,
-                                      ", deformable 'group' attribute value: ",
-                                      m_deformable_group,
-                                      ", filters shape: ",
-                                      filters_pshape);
-            } else {
-                // At least we can check if offsets channels is evenly divisible by deformable
-                // group attribute
-                NODE_VALIDATION_CHECK(this,
-                                      offsets_pshape[1].get_length() % m_deformable_group == 0,
-                                      "The channels dimension of offsets input must be "
-                                      "evenly divisible by the 'deformable group' value along the "
-                                      "channels axis. Offsets input shape: ",
-                                      offsets_pshape,
-                                      ", 'deformable group' attribute value: ",
-                                      m_deformable_group);
-            }
-        }
-
-        if (data_batch_pshape.rank().is_static()) {
-            NODE_VALIDATION_CHECK(this,
-                                  offsets_pshape[0].compatible(data_batch_pshape[0]),
-                                  "Data batch and offsets batch dimension must be same value. Got: ",
-                                  offsets_pshape[0],
-                                  " and ",
-                                  data_batch_pshape[0]);
-        }
-    }
-
-    if (data_batch_pshape.rank().is_static() && data_batch_pshape[1].is_static()) {
-        NODE_VALIDATION_CHECK(this,
-                              data_batch_pshape[1].get_length() % m_group == 0,
-                              "The input data shape must be evenly divisible by the 'group' value "
-                              "along the channels axis. Current input shape: ",
-                              data_batch_pshape,
-                              ", 'group' attribute value: ",
-                              m_group);
-    }
-
-    if (filters_pshape.rank().is_static() && filters_pshape[0].is_static()) {
-        NODE_VALIDATION_CHECK(this,
-                              filters_pshape[0].get_length() % m_group == 0,
-                              "The filters shape must be evenly divisible by the 'group' value along "
-                              "the channels axis. Current filters shape: ",
-                              filters_pshape,
-                              ", 'group' attribute value: ",
-                              m_group);
-    }
-
-    // adjust filter shape to reuse regular infer_convolution_forward()
-    const auto new_filters_pshape = [&](int64_t groups) {
-        auto new_shape(filters_pshape);
-        if (new_shape.rank().is_static()) {
-            new_shape[1] *= groups;
-        }
-        return new_shape;
-    }(m_group);
-    PartialShape result_shape = ngraph::validate_and_infer_convolution_forward_output_shape(this,
-                                                                                            result_ps_rank,
-                                                                                            data_batch_pshape,
-                                                                                            new_filters_pshape,
-                                                                                            m_auto_pad,
-                                                                                            m_strides,
-                                                                                            m_dilations,
-                                                                                            m_pads_begin,
-                                                                                            m_pads_end);
-
-    if (result_shape.rank().is_static() && offsets_pshape.rank().is_static()) {
-        PartialShape result_spatial_shape = [&result_shape]() {
-            vector<Dimension> result_spatial_dims{result_shape};
-            result_spatial_dims.erase(result_spatial_dims.begin(), result_spatial_dims.begin() + 2);
-            return PartialShape{result_spatial_dims};
-        }();
-
-        PartialShape offsets_spatial_shape = [&offsets_pshape]() {
-            vector<Dimension> offsets_spatial_dims{offsets_pshape};
-            offsets_spatial_dims.erase(offsets_spatial_dims.begin(), offsets_spatial_dims.begin() + 2);
-            return PartialShape{offsets_spatial_dims};
-        }();
-
-        NODE_VALIDATION_CHECK(this,
-                              offsets_spatial_shape.compatible(result_spatial_shape),
-                              "Spatial dimensions of offsets and output must be equal. Got: ",
-                              offsets_spatial_shape,
-                              " and ",
-                              result_spatial_shape);
-
-        if (result_shape[0].is_dynamic()) {
-            result_shape[0] = offsets_pshape[0];  // batch size
-        }
-    }
-    set_output_type(0, result_et, result_shape);
-}
diff --git a/src/core/tests/type_prop/binary_convolution.cpp b/src/core/tests/type_prop/binary_convolution.cpp
index 27794152f147a2..e09b09966ee169 100644
--- a/src/core/tests/type_prop/binary_convolution.cpp
+++ b/src/core/tests/type_prop/binary_convolution.cpp
@@ -2,16 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "common_test_utils/test_assertions.hpp"
 #include "gtest/gtest.h"
 #include "ngraph/ngraph.hpp"
 #include "util/type_prop.hpp"
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
 TEST(type_prop, bin_convolution_auto_padding_same) {
-    const PartialShape data_batch_shape{1, 1, 5, 5};
-    const PartialShape filters_shape{1, 1, 3, 3};
+    PartialShape data_batch_shape{1, 1, 5, 5};
+    PartialShape filters_shape{1, 1, 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
@@ -33,14 +37,17 @@ TEST(type_prop, bin_convolution_auto_padding_same) {
                                                        pad_value,
                                                        auto_pad);
 
-    ASSERT_TRUE(conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 1, 5, 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, ov::no_label, ov::no_label));
+    EXPECT_EQ(conv->get_output_partial_shape(0), (PartialShape{1, 1, 5, 5}));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, bin_convolution_auto_padding_same_lower_spatial_dims_static) {
-    const PartialShape data_batch_shape{Dimension::dynamic(), Dimension::dynamic(), 5, 5};
-    const PartialShape filters_shape{Dimension::dynamic(), Dimension::dynamic(), 3, 3};
+    PartialShape data_batch_shape{Dimension::dynamic(), Dimension::dynamic(), 5, 5};
+    PartialShape filters_shape{Dimension::dynamic(), Dimension::dynamic(), 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
@@ -62,9 +69,10 @@ TEST(type_prop, bin_convolution_auto_padding_same_lower_spatial_dims_static) {
                                                        pad_value,
                                                        auto_pad);
 
-    ASSERT_TRUE(conv->get_output_partial_shape(0).same_scheme({Dimension::dynamic(), Dimension::dynamic(), 5, 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, ov::no_label, ov::no_label));
+    EXPECT_EQ(conv->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), Dimension::dynamic(), 5, 5}));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, bin_convolution_auto_padding_same_upper_spatial_dims_static) {
@@ -91,14 +99,16 @@ TEST(type_prop, bin_convolution_auto_padding_same_upper_spatial_dims_static) {
                                                        pad_value,
                                                        auto_pad);
 
-    ASSERT_TRUE(conv->get_output_partial_shape(0).same_scheme({Dimension::dynamic(), Dimension::dynamic(), 5, 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), Dimension::dynamic(), 5, 5}));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, bin_convolution_auto_padding_same_data_batch_spatial_dims_dynamic) {
-    const PartialShape data_batch_shape{1, 1, Dimension::dynamic(), 5};
-    const PartialShape filters_shape{Dimension::dynamic(), 1, 3, 3};
+    PartialShape data_batch_shape{1, 1, Dimension::dynamic(), 5};
+    PartialShape filters_shape{Dimension::dynamic(), 1, 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
@@ -120,9 +130,10 @@ TEST(type_prop, bin_convolution_auto_padding_same_data_batch_spatial_dims_dynami
                                                        pad_value,
                                                        auto_pad);
 
-    ASSERT_TRUE(conv->get_output_partial_shape(0).same_scheme({1, Dimension::dynamic(), Dimension::dynamic(), 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 1}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, 12, ov::no_label));
+    EXPECT_EQ(conv->get_output_partial_shape(0), (PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), 5}));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 1}));
 }
 
 TEST(type_prop, bin_convolution_dyn_data_batch) {
@@ -131,7 +142,7 @@ TEST(type_prop, bin_convolution_dyn_data_batch) {
     const auto auto_pad = op::PadType::EXPLICIT;
 
     const auto data_batch = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
-    const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3});
+    const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3, 3});
     const auto bin_conv = make_shared<op::v1::BinaryConvolution>(data_batch,
                                                                  filters,
                                                                  Strides{},
@@ -141,9 +152,8 @@ TEST(type_prop, bin_convolution_dyn_data_batch) {
                                                                  mode,
                                                                  pad_value,
                                                                  auto_pad);
-    ASSERT_TRUE(bin_conv->get_output_partial_shape(0).rank().same_scheme(Rank{3}));
-    ASSERT_TRUE(
-        bin_conv->get_output_partial_shape(0).same_scheme(PartialShape{Dimension::dynamic(), 1, Dimension::dynamic()}));
+
+    EXPECT_EQ(bin_conv->get_output_partial_shape(0), (PartialShape{-1, 1, {1, -1}, {1, -1}}));
 }
 
 TEST(type_prop, bin_convolution_dyn_filters) {
@@ -162,9 +172,8 @@ TEST(type_prop, bin_convolution_dyn_filters) {
                                                                  mode,
                                                                  pad_value,
                                                                  auto_pad);
-    ASSERT_TRUE(bin_conv->get_output_partial_shape(0).rank().same_scheme(Rank{4}));
-    ASSERT_TRUE(bin_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
+
+    EXPECT_EQ(bin_conv->get_output_partial_shape(0), (PartialShape{1, -1, {1, 5}, {1, 5}}));
 }
 
 TEST(type_prop, bin_convolution_dyn_data_batch_and_filters) {
@@ -183,8 +192,8 @@ TEST(type_prop, bin_convolution_dyn_data_batch_and_filters) {
                                                                  mode,
                                                                  pad_value,
                                                                  auto_pad);
-    ASSERT_TRUE(bin_conv->get_output_partial_shape(0).rank().is_dynamic());
-    ASSERT_TRUE(bin_conv->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+
+    EXPECT_EQ(bin_conv->get_output_partial_shape(0), PartialShape::dynamic());
 }
 
 TEST(type_prop, bin_convolution_invalid_inputs_et) {
@@ -263,7 +272,7 @@ TEST(type_prop, bin_convolution_invalid_input_ranks) {
         // data batch and filters have incompatible ranks
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Data batch and filters inputs must have same rank");
+        EXPECT_HAS_SUBSTRING(error.what(), "Data batch and filters rank do not match");
     } catch (...) {
         FAIL() << "Rank validation check of inputs failed for unexpected reason";
     }
@@ -285,7 +294,7 @@ TEST(type_prop, bin_convolution_invalid_input_ranks) {
         // data batch and filters have incompatible ranks
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Data batch and filters inputs must have same rank");
+        EXPECT_HAS_SUBSTRING(error.what(), "Data batch and filters rank do not match");
     } catch (...) {
         FAIL() << "Rank validation check of inputs failed for unexpected reason";
     }
@@ -320,20 +329,21 @@ TEST(type_prop, bin_convolution_invalid_spatial_dims_parameters) {
         // Strides have incompatible number of spatial dimensions
         FAIL() << "Incompatible stride number of spatial dimensions not detected.";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Strides should be defined for all and only spatial features."));
+        EXPECT_HAS_SUBSTRING(error.what(),
+                             std::string("Strides should be defined for all and only spatial dimensions."));
     } catch (...) {
         FAIL() << "Strides validation check failed for unexpected reason.";
     }
 
     try {
-        const auto data_batch = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 5});
-        const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3});
+        const auto data_batch = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 5, 5});
+        const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3, 3});
         const auto bin_conv = make_shared<op::v1::BinaryConvolution>(data_batch,
                                                                      filters,
-                                                                     strides_1d,
+                                                                     Strides{1, 1},
                                                                      CoordinateDiff{},
                                                                      CoordinateDiff{},
-                                                                     dilations_2d,
+                                                                     dilations_3d,
                                                                      mode,
                                                                      pad_value,
                                                                      auto_pad);
@@ -341,28 +351,87 @@ TEST(type_prop, bin_convolution_invalid_spatial_dims_parameters) {
         FAIL() << "Incompatible dilations number of spatial dimensions not detected.";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Dilations should be defined for all and only spatial features."));
+                             std::string("Dilations should be defined for all and only spatial dimensions."));
     } catch (...) {
         FAIL() << "Dilations validation check failed for unexpected reason.";
     }
 
     try {
-        const auto data_batch = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 5, 5, 5});
-        const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3, 3, 3});
+        const auto data_batch = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 5, 5});
+        const auto filters = make_shared<op::Parameter>(element::u1, PartialShape{1, 1, 3, 3});
         const auto bin_conv = make_shared<op::v1::BinaryConvolution>(data_batch,
                                                                      filters,
-                                                                     strides_3d,
+                                                                     Strides{1, 1},
                                                                      pads_begin_3d,
                                                                      pads_end_2d,
-                                                                     dilations_3d,
+                                                                     dilations_2d,
                                                                      mode,
                                                                      pad_value,
                                                                      auto_pad);
         // Pads have incompatible number of spatial dimensions
         FAIL() << "Incompatible pads number of spatial dimensions not detected.";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Pads should be defined for all and only spatial features."));
+        EXPECT_HAS_SUBSTRING(error.what(),
+                             std::string("Pads begin and end should be defined for all and only spatial dimensions."));
     } catch (...) {
         FAIL() << "Pads validation check failed for unexpected reason.";
     }
 }
+
+class TypePropBinaryConvolutionV1Test : public TypePropOpTest<op::v1::BinaryConvolution> {
+protected:
+    CoordinateDiff empty_pad{};
+};
+
+TEST_F(TypePropBinaryConvolutionV1Test, default_ctor) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{1, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{data, filters});
+    op->set_strides({1, 3});
+    op->set_dilations({1, 2});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_mode(op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT);
+    op->set_pad_value(1.0f);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 2);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 3}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 2}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, 2, 6, 1}));
+}
+
+TEST_F(TypePropBinaryConvolutionV1Test, interval_shapes) {
+    PartialShape data_batch_pshape{{1, 3}, 1, {1, 5}, {3, 10}};
+    PartialShape filters_pshape{2, {1, 3}, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
+
+    constexpr auto et = element::f32;
+    constexpr auto auto_pad = op::PadType::EXPLICIT;
+    constexpr auto mode = op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT;
+    constexpr auto pad_value = 1.0f;
+
+    const auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
+    const auto filters = make_shared<op::Parameter>(et, filters_pshape);
+    const auto op = make_op(data_batch,
+                            filters,
+                            Strides{},
+                            CoordinateDiff{},
+                            CoordinateDiff{},
+                            Strides{},
+                            mode,
+                            pad_value,
+                            auto_pad);
+
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 20, ov::no_label, ov::no_label));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{1, 3}, 2, {1, 3}, {1, 8}}));
+    EXPECT_EQ(op->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(op->get_pads_end(), (CoordinateDiff{0, 0}));
+}
diff --git a/src/core/tests/type_prop/convolution.cpp b/src/core/tests/type_prop/convolution.cpp
index 1aabaf769f7a74..f26899d58a7818 100644
--- a/src/core/tests/type_prop/convolution.cpp
+++ b/src/core/tests/type_prop/convolution.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "common_test_utils/test_assertions.hpp"
 #include "convolution_shape_inference.hpp"
 #include "gtest/gtest.h"
 #include "ngraph/ngraph.hpp"
@@ -9,8 +10,9 @@
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
-TEST(type_prop, conv_v1_partial_rank) {
+TEST(type_prop, convolution_v1_partial_rank) {
     PartialShape data_batch_shape{PartialShape::dynamic()};
     PartialShape filters_shape{PartialShape::dynamic()};
     Strides window_movement_strides{1, 1};
@@ -28,12 +30,14 @@ TEST(type_prop, conv_v1_partial_rank) {
                                                  padding_above,
                                                  window_dilation_strides);
 
-    ASSERT_TRUE(conv->get_output_partial_shape(0).is_dynamic());
+    EXPECT_EQ(conv->get_output_partial_shape(0), PartialShape({-1, -1, {1, -1}, {1, -1}}));
 }
 
-TEST(type_prop, conv_v1_partial_auto_padding_same) {
-    const PartialShape data_batch_shape{1, 1, 5, 5};
-    const PartialShape filters_shape{1, 1, 3, 3};
+TEST(type_prop, convolution_v1_partial_auto_padding_same) {
+    PartialShape data_batch_shape{1, 1, 5, 5};
+    PartialShape filters_shape{1, 1, 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
@@ -46,14 +50,17 @@ TEST(type_prop, conv_v1_partial_auto_padding_same) {
     auto conv =
         make_shared<op::v1::Convolution>(data_batch, filters, strides, pads_begin, pads_end, dilations, auto_pad);
 
-    ASSERT_EQ(conv->get_output_partial_shape(0), (PartialShape{1, 1, 5, 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_output_partial_shape(0), (PartialShape{1, 1, 5, 5}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, ov::no_label, ov::no_label));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
-TEST(type_prop, conv_v1_partial_auto_padding_same_nc_dims_dynamic_same_lower) {
-    const PartialShape data_batch_shape{Dimension::dynamic(), Dimension::dynamic(), 5, 5};
-    const PartialShape filters_shape{1, 1, 3, 3};
+TEST(type_prop, convolution_v1_partial_auto_padding_same_nc_dims_dynamic_same_lower) {
+    PartialShape data_batch_shape{Dimension::dynamic(), Dimension::dynamic(), 5, 5};
+    PartialShape filters_shape{1, 1, 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
@@ -66,12 +73,13 @@ TEST(type_prop, conv_v1_partial_auto_padding_same_nc_dims_dynamic_same_lower) {
     auto conv =
         make_shared<op::v1::Convolution>(data_batch, filters, strides, pads_begin, pads_end, dilations, auto_pad);
 
-    ASSERT_EQ(conv->get_output_partial_shape(0), PartialShape({Dimension::dynamic(), 1, 5, 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_output_partial_shape(0), PartialShape({Dimension::dynamic(), 1, 5, 5}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, ov::no_label, ov::no_label));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
-TEST(type_prop, conv_v1_partial_auto_padding_same_nc_dims_dynamic_same_upper) {
+TEST(type_prop, convolution_v1_partial_auto_padding_same_nc_dims_dynamic_same_upper) {
     const PartialShape data_batch_shape{Dimension::dynamic(), Dimension::dynamic(), 5, 5};
     const PartialShape filters_shape{1, 1, 2, 2};
     Strides strides{1, 1};
@@ -91,10 +99,12 @@ TEST(type_prop, conv_v1_partial_auto_padding_same_nc_dims_dynamic_same_upper) {
     ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
-TEST(type_prop, conv_v1_partial_auto_padding_same_spatial_dims_dynamic) {
-    const PartialShape data_batch_shape{1, 1, Dimension::dynamic(), 5};
-    const PartialShape filters_shape{1, 1, 3, 3};
-    Strides strides{1, 1};
+TEST(type_prop, convolution_v1_partial_auto_padding_same_spatial_dims_dynamic) {
+    PartialShape data_batch_shape{1, 1, Dimension::dynamic(), {3, 5}};
+    PartialShape filters_shape{1, 1, 3, 3};
+    set_shape_labels(data_batch_shape, 10);
+    set_shape_labels(filters_shape, 20);
+    Strides strides{2, 2};
     CoordinateDiff pads_begin{0, 0};
     CoordinateDiff pads_end{0, 0};
     Strides dilations{1, 1};
@@ -106,12 +116,13 @@ TEST(type_prop, conv_v1_partial_auto_padding_same_spatial_dims_dynamic) {
     auto conv =
         make_shared<op::v1::Convolution>(data_batch, filters, strides, pads_begin, pads_end, dilations, auto_pad);
 
-    ASSERT_EQ(conv->get_output_partial_shape(0), PartialShape({1, 1, Dimension::dynamic(), 5}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 1}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(conv->get_output_partial_shape(0), PartialShape({1, 1, Dimension::dynamic(), {2, 3}}));
+    EXPECT_THAT(get_shape_labels(conv->get_output_partial_shape(0)), ElementsAre(10, 20, 12, ov::no_label));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 0}));
 }
 
-TEST(type_prop, conv_v1_partial_data_shape_dynamic) {
+TEST(type_prop, convolution_v1_partial_data_shape_dynamic) {
     const PartialShape data_batch_shape{PartialShape::dynamic()};
     const PartialShape filters_shape{1, 1, 3, 3};
     Strides strides{1, 1};
@@ -126,22 +137,149 @@ TEST(type_prop, conv_v1_partial_data_shape_dynamic) {
     auto conv =
         make_shared<op::v1::Convolution>(data_batch, filters, strides, pads_begin, pads_end, dilations, auto_pad);
 
-    ASSERT_EQ(conv->get_output_partial_shape(0),
+    EXPECT_EQ(conv->get_output_partial_shape(0),
               PartialShape({Dimension::dynamic(), 1, Dimension::dynamic(), Dimension::dynamic()}));
-    ASSERT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(conv->get_pads_end(), (CoordinateDiff{0, 0}));
+}
+
+class TypePropConvolutionV1Test : public TypePropOpTest<op::v1::Convolution> {
+protected:
+    CoordinateDiff empty_pad{};
+};
+
+TEST_F(TypePropConvolutionV1Test, default_ctor) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{1, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{data, filters});
+    op->set_strides({1, 3});
+    op->set_dilations({1, 2});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 2);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 3}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 2}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, 2, 6, 1}));
+}
+
+TEST_F(TypePropConvolutionV1Test, data_dynamic_rank_filters_2d) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+
+    auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_UPPER);
+
+    EXPECT_THAT(op->get_pads_begin(), ElementsAre(0, 0));
+    EXPECT_THAT(op->get_pads_end(), ElementsAre(0, 0));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 2, -1, -1}));
+}
+
+TEST_F(TypePropConvolutionV1Test, data_rank_to_low) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Expected a 3D, 4D or 5D tensor for the input"));
 }
 
-TEST(type_prop, convolution_default_constructed) {
-    auto conv = make_shared<op::v1::Convolution>();
-    conv->set_auto_pad(op::PadType::SAME_LOWER);
+TEST_F(TypePropConvolutionV1Test, data_rank_to_high) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Expected a 3D, 4D or 5D tensor for the input"));
+}
+
+TEST_F(TypePropConvolutionV1Test, data_and_filters_rank_not_compatible) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Data batch and filters rank do not match"));
+}
+
+TEST_F(TypePropConvolutionV1Test, data_and_filters_channel_number_not_compatible) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 2, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Data batch channel count (2) does not match filter input channel count (3)"));
+}
+
+TEST_F(TypePropConvolutionV1Test, strides_not_defined_only_for_spatial_dims) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Strides should be defined for all and only spatial dimensions."));
+}
+
+TEST_F(TypePropConvolutionV1Test, dilations_not_defined_only_for_spatial_dims) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Dilations should be defined for all and only spatial dimensions."));
+}
+
+TEST_F(TypePropConvolutionV1Test, strides_has_zeros) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 0};
+    const auto dilations = Strides{1, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Strides has zero dimension"));
+}
+
+TEST_F(TypePropConvolutionV1Test, dilations_has_zeros) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{0, 1};
+
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, empty_pad, empty_pad, dilations, op::PadType::SAME_LOWER),
+                    NodeValidationFailure,
+                    HasSubstr("Filter dilations has zero dimension"));
+}
 
-    const auto &input_shape = ov::PartialShape::dynamic(), filters_shape = ov::PartialShape{1, 1, 3, 3};
-    const auto& input_shapes = std::vector<ov::PartialShape>{input_shape, filters_shape};
-    std::vector<ov::PartialShape> output_shapes(1);
-    auto pad_begin = CoordinateDiff{}, pad_end = CoordinateDiff{};
+TEST_F(TypePropConvolutionV1Test, pads_not_defined_for_spatial_only) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 5, 5});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{2, 3, 4, 4});
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{2, 2};
+    const auto pads_end = CoordinateDiff{2, 2, 2};
 
-    int64_t num_spatial = calculate_num_spatial(conv.get(), input_shape, filters_shape, 2, 2);
-    update_and_validate_attributes(conv.get(), num_spatial);
-    EXPECT_NO_THROW(shape_infer(conv.get(), pad_begin, pad_end, input_shapes, output_shapes));
+    OV_EXPECT_THROW(auto op = make_op(data, filters, strides, pads_begin, pads_end, dilations),
+                    NodeValidationFailure,
+                    HasSubstr("Pads begin and end should be defined for all and only spatial dimensions."));
 }
diff --git a/src/core/tests/type_prop/convolution_backprop_data.cpp b/src/core/tests/type_prop/convolution_backprop_data.cpp
index 74d95b827005f2..8d6babfed0135d 100644
--- a/src/core/tests/type_prop/convolution_backprop_data.cpp
+++ b/src/core/tests/type_prop/convolution_backprop_data.cpp
@@ -21,6 +21,7 @@
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
 // ---------------------------- v1 ----------------------------
 TEST(type_prop, convolution_backprop_data_partial_auto_padding_upper) {
@@ -72,9 +73,10 @@ TEST(type_prop, convolution_backprop_data_partial_auto_padding_lower) {
 }
 
 TEST(type_prop, convolution_backprop_data_auto_pad_explicit_with_output_padding) {
-    const PartialShape data_pshape{1, 16, 2, 2};
-    const PartialShape filters_pshape{16, 6, 3, 3};
-
+    PartialShape data_pshape{1, 16, 2, 2};
+    PartialShape filters_pshape{16, 6, 3, 3};
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const Strides strides{2, 2};
     const Strides dilations{1, 1};
     const CoordinateDiff padding_begin{1, 1};
@@ -94,6 +96,8 @@ TEST(type_prop, convolution_backprop_data_auto_pad_explicit_with_output_padding)
                                                                       auto_pad,
                                                                       output_padding);
 
+    EXPECT_THAT(get_shape_labels(conv_backprop->get_output_partial_shape(0)),
+                ElementsAre(10, 21, ov::no_label, ov::no_label));
     ASSERT_EQ(conv_backprop->get_output_partial_shape(0), PartialShape(PartialShape{1, 6, 4, 4}));
     ASSERT_EQ(conv_backprop->get_pads_begin(), (CoordinateDiff{1, 1}));
     ASSERT_EQ(conv_backprop->get_pads_end(), (CoordinateDiff{1, 1}));
@@ -125,10 +129,10 @@ TEST(type_prop, convolution_backprop_data_auto_pad_same_with_output_padding_and_
                                                                       auto_pad,
                                                                       output_padding);
 
-    ASSERT_EQ(conv_backprop->get_output_partial_shape(0), PartialShape(PartialShape{1, 6, 3, 3}));
-    ASSERT_EQ(conv_backprop->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(conv_backprop->get_pads_end(), (CoordinateDiff{2, 2}));
-    ASSERT_EQ(conv_backprop->get_output_padding(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv_backprop->get_output_partial_shape(0), PartialShape(PartialShape{1, 6, 3, 3}));
+    EXPECT_EQ(conv_backprop->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(conv_backprop->get_pads_end(), (CoordinateDiff{2, 2}));
+    EXPECT_EQ(conv_backprop->get_output_padding(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, convolution_backprop_data_output_shape_as_const) {
@@ -222,8 +226,10 @@ TEST(type_prop, convolution_backprop_data_with_output_shape_dyn_static_ranks_fil
 }
 
 TEST(type_prop, convolution_backprop_data_with_output_shape_dyn_static_ranks_filters_cin_cout_dyn) {
-    const PartialShape data_pshape{Dimension::dynamic(), 16, 5, 5};
-    const PartialShape filters_pshape{Dimension::dynamic(), Dimension::dynamic(), 3, 3};
+    PartialShape data_pshape{Dimension::dynamic(), 16, 5, 5};
+    PartialShape filters_pshape{Dimension::dynamic(), Dimension::dynamic(), 3, 3};
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const element::Type_t et = element::f32;
 
     auto data = make_shared<op::Parameter>(et, data_pshape);
@@ -238,6 +244,8 @@ TEST(type_prop, convolution_backprop_data_with_output_shape_dyn_static_ranks_fil
                                                                       Strides{},
                                                                       op::PadType::SAME_UPPER);
 
+    EXPECT_THAT(get_shape_labels(conv_backprop->get_output_partial_shape(0)),
+                ElementsAre(10, 21, ov::no_label, ov::no_label));
     ASSERT_EQ(conv_backprop->get_output_partial_shape(0),
               PartialShape(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 3, 3}));
 }
@@ -548,7 +556,7 @@ TEST(type_prop, convolution_backprop_data_invalid_input_ranks) {
                                                                           Strides{});
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Data and filters rank do not match");
+        EXPECT_HAS_SUBSTRING(error.what(), "Data batch and filters rank do not match");
     } catch (...) {
         FAIL() << "Rank validation check of inputs failed for unexpected reason";
     }
@@ -568,7 +576,7 @@ TEST(type_prop, convolution_backprop_data_invalid_input_ranks) {
                                                                           Strides{});
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Data and filters inputs must have rank 3, 4 or 5");
+        EXPECT_HAS_SUBSTRING(error.what(), "Expected a 3D, 4D or 5D tensor for the input. Got:");
     } catch (...) {
         FAIL() << "Rank validation check of inputs failed for unexpected reason";
     }
@@ -588,7 +596,7 @@ TEST(type_prop, convolution_backprop_data_invalid_input_ranks) {
                                                                           Strides{});
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Data and filters inputs must have rank 3, 4 or 5");
+        EXPECT_HAS_SUBSTRING(error.what(), "Expected a 3D, 4D or 5D tensor for the input. Got:");
     } catch (...) {
         FAIL() << "Rank validation check of inputs failed for unexpected reason";
     }
@@ -633,8 +641,9 @@ TEST(type_prop, convolution_backprop_data_invalid_input_channel_dims) {
         // data input shape does not have correct dimension C_IN
         FAIL() << "Incompatibile input shapes not detected.";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Input channels dimension of data and filters inputs must be equal"));
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            std::string("Data batch channel count (32) does not match filter input channel count (16)"));
     } catch (...) {
         FAIL() << "Input shapes validation check failed for unexpected reason.";
     }
@@ -656,11 +665,11 @@ TEST(type_prop, convolution_backprop_data_invalid_output_shape_spatial_dims) {
                                                                           CoordinateDiff{},
                                                                           CoordinateDiff{},
                                                                           Strides{});
-        // output_shape has invalid spatials dimensions (should be 2)
+        // output_shape has invalid spatial dimensions (should be 2)
         FAIL() << "Incompatible output shape optional input not detected";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Output shape should be specified only and for all spatial dimensions."));
+                             std::string("Output shape should be defined for all and only spatial dimensions."));
     } catch (...) {
         FAIL() << "Output shape validation check failed for unexpected reason.";
     }
@@ -752,7 +761,7 @@ TEST(type_prop, convolution_backprop_data_invalid_conv_param_spatial_dims) {
             make_shared<op::v1::ConvolutionBackpropData>(data, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin should be defined for all and only spatial dimensions.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -768,7 +777,7 @@ TEST(type_prop, convolution_backprop_data_invalid_conv_param_spatial_dims) {
             make_shared<op::v1::ConvolutionBackpropData>(data, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads end should be defined for all and only spatial dimensions.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -825,16 +834,59 @@ TEST(type_prop, convolution_backprop_data_invalid_conv_param_spatial_dims) {
 }
 
 TEST(type_prop, convolution_back_prop_data_default_constructed) {
-    auto conv = make_shared<op::v1::ConvolutionBackpropData>();
-
-    const auto &input_shape = ov::PartialShape::dynamic(), filters_shape = ov::PartialShape{1, 1, 3, 3},
-               output_spatial_shape_shape = ov::PartialShape({2});
-    const auto& input_shapes = std::vector<ov::PartialShape>{input_shape, filters_shape, output_spatial_shape_shape};
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
-    auto pad_begin = CoordinateDiff{}, pad_end = CoordinateDiff{};
-    const auto& output_spatial_shape = ov::PartialShape{3, 3};
-    int64_t num_spatial =
-        calculate_num_spatial(conv.get(), input_shape, filters_shape, output_spatial_shape_shape, 2, 2);
-    update_and_validate_attributes_back_prop(conv.get(), num_spatial);
-    EXPECT_NO_THROW(shape_infer(conv.get(), pad_begin, pad_end, output_spatial_shape, input_shapes, output_shapes));
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 1, 3, 3});
+    const auto out_spatial = op::Constant::create(element::i32, Shape{3}, {5, 4, 10});
+
+    const auto op = make_shared<op::v1::ConvolutionBackpropData>();
+    op->set_arguments(OutputVector{data, filters, out_spatial});
+    op->set_strides({1, 1, 1});
+    op->set_dilations({1, 1, 1});
+    op->set_pads_begin({2, 2, 2});
+    op->set_pads_end({2, 2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 3);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 1, 1}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 1, 1}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 1, 5, 4, 10}));
+}
+
+TEST(type_prop, convolution_back_prop_data_interval_shapes_output_shape_as_shape_of) {
+    PartialShape data_pshape{{1, 3}, {2, 6}, {1, 5}, {3, 10}, {20, 100}};
+    PartialShape filters_pshape{{2, 3}, {1, 3}, 3, 3, 3};
+    PartialShape out_spatial_pshape{3, {2, 4}, 3};
+
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
+    set_shape_labels(out_spatial_pshape, 30);
+
+    const element::Type_t et = element::f32;
+    Strides strides{1, 2, 1};
+    Strides dilations{1, 1, 1};
+    CoordinateDiff pads_begin{0, 2, 1};
+    CoordinateDiff pads_end{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    auto data_batch = make_shared<op::Parameter>(et, data_pshape);
+    auto filters = make_shared<op::Parameter>(et, filters_pshape);
+    auto out_spatial = make_shared<op::Parameter>(element::i32, out_spatial_pshape);
+    auto spatial_shape_of = std::make_shared<op::v0::ShapeOf>(out_spatial);
+
+    const auto op = make_shared<op::v1::ConvolutionBackpropData>(data_batch,
+                                                                 filters,
+                                                                 spatial_shape_of,
+                                                                 strides,
+                                                                 pads_begin,
+                                                                 pads_end,
+                                                                 dilations,
+                                                                 auto_pad);
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 21, 30, 31, 32));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{1, 3}, {1, 3}, 3, {2, 4}, 3}));
+    EXPECT_EQ(op->get_pads_begin(), (CoordinateDiff{0, 0, 0}));
+    EXPECT_EQ(op->get_pads_end(), (CoordinateDiff{0, 0, 0}));
 }
diff --git a/src/core/tests/type_prop/deformable_convolution.cpp b/src/core/tests/type_prop/deformable_convolution.cpp
index 4d48704a0564ea..7ebe9c6d3d41ea 100644
--- a/src/core/tests/type_prop/deformable_convolution.cpp
+++ b/src/core/tests/type_prop/deformable_convolution.cpp
@@ -8,11 +8,15 @@
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
 TEST(type_prop, deformable_convolution_partial_auto_padding_same) {
-    const PartialShape data_batch_pshape{1, 4, 5, 5};
-    const PartialShape offsets_pshape{1, 36, 5, 5};
-    const PartialShape filters_pshape{4, 1, 3, 3};
+    PartialShape data_batch_pshape{1, 4, 5, 5};
+    PartialShape offsets_pshape{1, 36, 5, 5};
+    PartialShape filters_pshape{4, 1, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(offsets_pshape, 20);
+    set_shape_labels(filters_pshape, 30);
     const element::Type_t et = element::f32;
 
     Strides strides{1, 1};
@@ -37,9 +41,11 @@ TEST(type_prop, deformable_convolution_partial_auto_padding_same) {
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_THAT(get_shape_labels(deformable_conv->get_output_partial_shape(0)),
+                ElementsAre(10, 30, ov::no_label, ov::no_label));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_partial_auto_padding_same_lower_data_batch_nc_dims_dynamic) {
@@ -70,9 +76,9 @@ TEST(type_prop, deformable_convolution_partial_auto_padding_same_lower_data_batc
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{Dimension::dynamic(), 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_partial_auto_padding_same_upper_data_batch_nc_dims_dynamic) {
@@ -103,9 +109,9 @@ TEST(type_prop, deformable_convolution_partial_auto_padding_same_upper_data_batc
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_partial_auto_padding_same_spatial_dims_dynamic) {
@@ -136,15 +142,17 @@ TEST(type_prop, deformable_convolution_partial_auto_padding_same_spatial_dims_dy
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme({1, 4, Dimension::dynamic(), 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, Dimension::dynamic(), 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 1}));
 }
 
 TEST(type_prop, deformable_convolution_data_batch_dynamic) {
-    const PartialShape data_batch_pshape{PartialShape::dynamic()};
-    const PartialShape offsets_pshape{2, 36, 5, 5};
-    const PartialShape filters_pshape{4, 4, 3, 3};
+    PartialShape data_batch_pshape{PartialShape::dynamic()};
+    PartialShape offsets_pshape{2, 36, 5, 5};
+    PartialShape filters_pshape{4, 4, 3, 3};
+    set_shape_labels(offsets_pshape, 20);
+    set_shape_labels(filters_pshape, 30);
     const element::Type_t et = element::f32;
 
     const auto auto_pad = op::PadType::EXPLICIT;
@@ -165,13 +173,14 @@ TEST(type_prop, deformable_convolution_data_batch_dynamic) {
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{2, 4, Dimension::dynamic(), Dimension::dynamic()}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{2, 4, {1, -1}, {1, -1}}));
+    EXPECT_THAT(get_shape_labels(deformable_conv->get_output_partial_shape(0)),
+                ElementsAre(20, 30, ov::no_label, ov::no_label));
 }
 
 TEST(type_prop, deformable_convolution_offsets_dynamic) {
@@ -198,17 +207,17 @@ TEST(type_prop, deformable_convolution_offsets_dynamic) {
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
 }
 
 TEST(type_prop, deformable_convolution_auto_pad_same_filters_dynamic) {
     const PartialShape data_batch_pshape{1, 4, 5, 5};
-    const PartialShape offsets_pshape{1, 36, 3, 3};
+    const PartialShape offsets_pshape{1, 36, 5, 5};
     const PartialShape filters_pshape{PartialShape::dynamic()};
     const element::Type_t et = element::f32;
 
@@ -230,13 +239,12 @@ TEST(type_prop, deformable_convolution_auto_pad_same_filters_dynamic) {
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_UPPER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_UPPER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, Dimension::dynamic(), 5, 5}));
 }
 
 TEST(type_prop, deformable_convolution_deformable_data_batch_and_filters_dynamic) {
@@ -263,13 +271,12 @@ TEST(type_prop, deformable_convolution_deformable_data_batch_and_filters_dynamic
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, -1, {1, -1}, {1, -1}}));
 }
 
 TEST(type_prop, deformable_convolution_deformable_all_inputs_dynamic) {
@@ -294,12 +301,12 @@ TEST(type_prop, deformable_convolution_deformable_all_inputs_dynamic) {
                                                                       group,
                                                                       deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape::dynamic()));
 }
 
 TEST(type_prop, deformable_convolution_invalid_et_inputs) {
@@ -324,9 +331,9 @@ TEST(type_prop, deformable_convolution_invalid_et_inputs) {
         // data batch input must be of same element type as filters and deformable values
         FAIL() << "Invalid element type of inputs not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Element types of inputs do not match. Got: data batch (f16), "
-                             "offsets (f32) and filters (f32)");
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            "Element types of inputs do not match. Got: data batch (f16), offsets (f32) and filters (f32)");
     } catch (...) {
         FAIL() << "Element types of inputs validation check failed for unexpected reason.";
     }
@@ -416,10 +423,7 @@ TEST(type_prop, deformable_convolution_invalid_input_ranks) {
         // data batch has invalid rank 5, should be 4
         FAIL() << "Incompatible data batch input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch "
-                             "shape [1,4,5,5,5], offsets shape [1,4,4,4], filters "
-                             "shape [4,4,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of data batch input failed for unexpected reason";
     }
@@ -443,10 +447,7 @@ TEST(type_prop, deformable_convolution_invalid_input_ranks) {
         // deformable values has invalid rank 5, should be 4
         FAIL() << "Incompatible offsets input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch shape "
-                             "[1,4,5,5], offsets shape [1,4,4,4,4], filters shape "
-                             "[4,4,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Offsets must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of offsets input failed for unexpected reason";
     }
@@ -470,10 +471,7 @@ TEST(type_prop, deformable_convolution_invalid_input_ranks) {
         // filters has invalid rank 5, should be 4
         FAIL() << "Incompatible filter input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch shape "
-                             "[1,4,5,5], offsets shape [1,4,4,4], filters shape "
-                             "[4,4,3,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Filters must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of filter input failed for unexpected reason";
     }
@@ -497,7 +495,7 @@ TEST(type_prop, deformable_convolution_invalid_input_ranks) {
         // inputs have rank 5, should be 4
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Inputs must be of rank 4");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check for 2 spatial dimension inputs failed for unexpected reason";
     }
@@ -521,7 +519,7 @@ TEST(type_prop, deformable_convolution_invalid_input_ranks) {
         // inputs have rank 3, should be 4
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Inputs must be of rank 4");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4. Got: 3");
     } catch (...) {
         FAIL() << "Rank validation check for 2 spatial dimension inputs failed for unexpected reason";
     }
@@ -679,9 +677,7 @@ TEST(type_prop, deformable_convolution_invalid_offsets_channels_dim) {
         FAIL() << "Invalid channels dimension of offsets input not detected";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             "The channels dimension of offsets input must be "
-                             "evenly divisible by the 'deformable group' value along the "
-                             "channels axis.");
+                             "Offsets channels dimension (35) must be evenly divisible by the 'deformable group': 2");
     } catch (...) {
         FAIL() << "Channels dimension of offsets input validation check failed for "
                   "unexpected reason.";
@@ -759,9 +755,7 @@ TEST(type_prop, deformable_convolution_invalid_data_batch_channels_dim_with_grou
         // data batch channels is not evenly divisible by the attribute group value
         FAIL() << "Invalid channels dimension of data batch input not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "The input data shape must be evenly divisible by the 'group' value "
-                             "along the channels axis.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input channels dimension (5) must be evenly divisible by the 'group': 4");
     } catch (...) {
         FAIL() << "Data batch channel dimension validation check failed for unexpected "
                   "reason.";
@@ -800,9 +794,7 @@ TEST(type_prop, deformable_convolution_invalid_filters_channels_dim_with_group)
         // filters channels output is not evenly divisible by the attribute group value
         FAIL() << "Invalid channels output dimension of filters input not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "The filters shape must be evenly divisible by the 'group' value along "
-                             "the channels axis");
+        EXPECT_HAS_SUBSTRING(error.what(), "Filters channels dimension (5) must be evenly divisible by the 'group'");
     } catch (...) {
         FAIL() << "Filters channels output dimension validation check failed for unexpected "
                   "reason.";
@@ -882,7 +874,7 @@ TEST(type_prop, deformable_convolution_invalid_offsets_spatial_dims) {
         // deformable values has incorrect spatial dimensions
         FAIL() << "Invalid spatial dimensions of offsets not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of offsets and output must be equal");
+        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of offsets and output must be compatible");
     } catch (...) {
         FAIL() << "Spatial dimension of offsets validation check failed for unexpected reason";
     }
@@ -913,7 +905,7 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid strides spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial dimension");
     } catch (...) {
         FAIL() << "Strides spatial dimensions validation check failed for unexpected reason";
     }
@@ -935,7 +927,7 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid strides spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial dimension");
     } catch (...) {
         FAIL() << "Strides spatial dimensions validation check failed for unexpected reason";
     }
@@ -959,7 +951,7 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid dilations spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial dimensions");
     } catch (...) {
         FAIL() << "Dilations spatial dimensions validation check failed for unexpected reason";
     }
@@ -981,7 +973,7 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid dilations spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial dimensions");
     } catch (...) {
         FAIL() << "Dilations spatial dimensions validation check failed for unexpected reason";
     }
@@ -1005,7 +997,7 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -1027,8 +1019,61 @@ TEST(type_prop, deformable_convolution_invalid_conv_param_spatial_dims) {
                                                                           dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
 }
+
+class TypePropDeformableConvolutionV1Test : public TypePropOpTest<op::v1::DeformableConvolution> {
+protected:
+    CoordinateDiff empty_pad{};
+};
+
+TEST_F(TypePropDeformableConvolutionV1Test, default_ctor) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape{1, 4, 5, 5});
+    const auto offsets = make_shared<op::Parameter>(element::f32, PartialShape{1, 36, 7, 2});
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{4, 1, 3, 3});
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{data, offsets, filters});
+    op->set_strides({1, 3});
+    op->set_dilations({1, 2});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_group(4);
+    op->set_deformable_group(2);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 3);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 3}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 2}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2}));
+
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, 4, 7, 2}));
+}
+
+TEST_F(TypePropDeformableConvolutionV1Test, interval_shapes) {
+    PartialShape data_batch_pshape{{1, 3}, {2, 6}, {1, 5}, {3, 10}};
+    PartialShape offsets_shape{1, 36, 4, 5};
+    PartialShape filters_pshape{{2, 5}, {1, 3}, {2, 3}, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(offsets_shape, 20);
+    set_shape_labels(filters_pshape, 30);
+
+    const element::Type_t et = element::f32;
+    const auto auto_pad = op::PadType::EXPLICIT;
+
+    const auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
+    const auto offsets = make_shared<op::Parameter>(et, offsets_shape);
+    const auto filters = make_shared<op::Parameter>(et, filters_pshape);
+    const auto op = make_op(data_batch, offsets, filters, Strides{}, empty_pad, empty_pad, Strides{}, auto_pad, 4, 2);
+
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 30, ov::no_label, ov::no_label));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, {2, 5}, {1, 4}, {1, 8}}));
+    EXPECT_EQ(op->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(op->get_pads_end(), (CoordinateDiff{0, 0}));
+}
diff --git a/src/core/tests/type_prop/deformable_convolution_opset8.cpp b/src/core/tests/type_prop/deformable_convolution_opset8.cpp
index 1a7f977cda105e..f48a7b462124aa 100644
--- a/src/core/tests/type_prop/deformable_convolution_opset8.cpp
+++ b/src/core/tests/type_prop/deformable_convolution_opset8.cpp
@@ -4,17 +4,21 @@
 
 #include "gtest/gtest.h"
 #include "ngraph/ngraph.hpp"
-#include "ngraph/opsets/opset8.hpp"
+#include "openvino/opsets/opset8.hpp"
 #include "util/type_prop.hpp"
 
 using namespace std;
 using namespace ngraph;
-using namespace ngraph::opset8;
+using namespace ov::opset8;
+using namespace testing;
 
 TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same) {
-    const PartialShape data_batch_pshape{1, 4, 5, 5};
-    const PartialShape offsets_pshape{1, 36, 5, 5};
-    const PartialShape filters_pshape{4, 1, 3, 3};
+    PartialShape data_batch_pshape{1, 4, 5, 5};
+    PartialShape offsets_pshape{1, 36, 5, 5};
+    PartialShape filters_pshape{4, 1, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(offsets_pshape, 20);
+    set_shape_labels(filters_pshape, 30);
     const element::Type_t et = element::f32;
 
     Strides strides{1, 1};
@@ -39,9 +43,11 @@ TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_THAT(get_shape_labels(deformable_conv->get_output_partial_shape(0)),
+                ElementsAre(10, 30, ov::no_label, ov::no_label));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_lower_data_batch_nc_dims_dynamic) {
@@ -72,9 +78,9 @@ TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_lower_da
                                                               group,
                                                               deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{Dimension::dynamic(), 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_upper_data_batch_nc_dims_dynamic) {
@@ -105,9 +111,9 @@ TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_upper_da
                                                               group,
                                                               deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_spatial_dims_dynamic) {
@@ -138,9 +144,9 @@ TEST(type_prop, deformable_convolution_opset8_partial_auto_padding_same_spatial_
                                                               group,
                                                               deformable_group);
 
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme({1, 4, Dimension::dynamic(), 5}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, Dimension::dynamic(), 5}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 1}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_data_batch_dynamic) {
@@ -167,13 +173,12 @@ TEST(type_prop, deformable_convolution_opset8_data_batch_dynamic) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{2, 4, Dimension::dynamic(), Dimension::dynamic()}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{2, 4, {1, -1}, {1, -1}}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_offsets_dynamic) {
@@ -200,17 +205,17 @@ TEST(type_prop, deformable_convolution_opset8_offsets_dynamic) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_auto_pad_same_filters_dynamic) {
     const PartialShape data_batch_pshape{1, 4, 5, 5};
-    const PartialShape offsets_pshape{1, 36, 3, 3};
+    const PartialShape offsets_pshape{1, 36, 5, 5};
     const PartialShape filters_pshape{PartialShape::dynamic()};
     const element::Type_t et = element::f32;
 
@@ -232,13 +237,12 @@ TEST(type_prop, deformable_convolution_opset8_auto_pad_same_filters_dynamic) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_UPPER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
-        PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_UPPER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, Dimension::dynamic(), 5, 5}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_deformable_data_batch_and_filters_dynamic) {
@@ -265,11 +269,11 @@ TEST(type_prop, deformable_convolution_opset8_deformable_data_batch_and_filters_
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{0, 0}));
     ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(
         PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
 }
@@ -296,11 +300,11 @@ TEST(type_prop, deformable_convolution_opset8_deformable_all_inputs_dynamic) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::EXPLICIT);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{}));
     ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
 }
 
@@ -418,10 +422,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_input_ranks) {
         // data batch has invalid rank 5, should be 4
         FAIL() << "Incompatible data batch input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch "
-                             "shape [1,4,5,5,5], offsets shape [1,4,4,4], filters "
-                             "shape [4,4,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of data batch input failed for unexpected reason";
     }
@@ -445,10 +446,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_input_ranks) {
         // deformable values has invalid rank 5, should be 4
         FAIL() << "Incompatible offsets input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch shape "
-                             "[1,4,5,5], offsets shape [1,4,4,4,4], filters shape "
-                             "[4,4,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Offsets must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of offsets input failed for unexpected reason";
     }
@@ -472,10 +470,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_input_ranks) {
         // filters has invalid rank 5, should be 4
         FAIL() << "Incompatible filter input rank not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Ranks of inputs do not match. Got: data batch shape "
-                             "[1,4,5,5], offsets shape [1,4,4,4], filters shape "
-                             "[4,4,3,3,3]");
+        EXPECT_HAS_SUBSTRING(error.what(), "Filters must be of rank 4. Got: 5");
     } catch (...) {
         FAIL() << "Rank validation check of filter input failed for unexpected reason";
     }
@@ -499,7 +494,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_input_ranks) {
         // inputs have rank 5, should be 4
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Inputs must be of rank 4");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4");
     } catch (...) {
         FAIL() << "Rank validation check for 2 spatial dimension inputs failed for unexpected reason";
     }
@@ -523,7 +518,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_input_ranks) {
         // inputs have rank 3, should be 4
         FAIL() << "Incompatible input ranks not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Inputs must be of rank 4");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input must be of rank 4");
     } catch (...) {
         FAIL() << "Rank validation check for 2 spatial dimension inputs failed for unexpected reason";
     }
@@ -681,9 +676,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_offsets_channels_dim) {
         FAIL() << "Invalid channels dimension of offsets input not detected";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             "The channels dimension of offsets input must be "
-                             "evenly divisible by the 'deformable group' value along the "
-                             "channels axis.");
+                             "Offsets channels dimension (35) must be evenly divisible by the 'deformable group'");
     } catch (...) {
         FAIL() << "Channels dimension of offsets input validation check failed for "
                   "unexpected reason.";
@@ -761,9 +754,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_data_batch_channels_dim_wi
         // data batch channels is not evenly divisible by the attribute group value
         FAIL() << "Invalid channels dimension of data batch input not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "The input data shape must be evenly divisible by the 'group' value "
-                             "along the channels axis.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Input channels dimension (5) must be evenly divisible by the 'group'");
     } catch (...) {
         FAIL() << "Data batch channel dimension validation check failed for unexpected "
                   "reason.";
@@ -802,9 +793,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_filters_channels_dim_with_
         // filters channels output is not evenly divisible by the attribute group value
         FAIL() << "Invalid channels output dimension of filters input not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "The filters shape must be evenly divisible by the 'group' value along "
-                             "the channels axis");
+        EXPECT_HAS_SUBSTRING(error.what(), "Filters channels dimension (5) must be evenly divisible by the 'group'");
     } catch (...) {
         FAIL() << "Filters channels output dimension validation check failed for unexpected "
                   "reason.";
@@ -884,7 +873,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_offsets_spatial_dims) {
         // deformable values has incorrect spatial dimensions
         FAIL() << "Invalid spatial dimensions of offsets not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of offsets and output must be equal");
+        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of offsets and output must be compatible");
     } catch (...) {
         FAIL() << "Spatial dimension of offsets validation check failed for unexpected reason";
     }
@@ -910,7 +899,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid strides spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Strides spatial dimensions validation check failed for unexpected reason";
     }
@@ -927,7 +916,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid strides spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Strides should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Strides spatial dimensions validation check failed for unexpected reason";
     }
@@ -946,7 +935,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid dilations spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Dilations spatial dimensions validation check failed for unexpected reason";
     }
@@ -963,7 +952,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid dilations spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Dilations should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Dilations spatial dimensions validation check failed for unexpected reason";
     }
@@ -982,7 +971,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -999,7 +988,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_conv_param_spatial_dims) {
             make_shared<DeformableConvolution>(data_batch, offsets, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads should be defined for all and only spatial features.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -1040,7 +1029,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_mask_spatial_dims) {
         // deformable values has incorrect spatial dimensions
         FAIL() << "Invalid spatial dimensions of mask not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of mask and output must be equal");
+        EXPECT_HAS_SUBSTRING(error.what(), "Spatial dimensions of mask and output must be compatible");
     } catch (...) {
         FAIL() << "Spatial dimension of mask validation check failed for unexpected reason";
     }
@@ -1073,12 +1062,12 @@ TEST(type_prop, deformable_convolution_opset8_mask_dynamic) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
-    ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_output_partial_shape(0), (PartialShape{1, 4, 5, 5}));
 }
 
 TEST(type_prop, deformable_convolution_opset8_invalid_mask_channels_dim) {
@@ -1161,9 +1150,7 @@ TEST(type_prop, deformable_convolution_opset8_invalid_mask_channels_dim) {
         FAIL() << "Invalid channels dimension of mask input not detected";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             "The channels dimension of mask input must be "
-                             "evenly divisible by the 'deformable group' value along the "
-                             "channels axis.");
+                             "Mask channels dimension (9) must be evenly divisible by the 'deformable group'");
     } catch (...) {
         FAIL() << "Channels dimension of mask input validation check failed for "
                   "unexpected reason.";
@@ -1239,10 +1226,66 @@ TEST(type_prop, deformable_convolution_opset8_mask) {
                                                               group,
                                                               deformable_group);
 
-    ASSERT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
-    ASSERT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
-    ASSERT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_auto_pad(), op::PadType::SAME_LOWER);
+    EXPECT_EQ(deformable_conv->get_strides(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_dilations(), (Strides{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_begin(), (CoordinateDiff{1, 1}));
+    EXPECT_EQ(deformable_conv->get_pads_end(), (CoordinateDiff{1, 1}));
     ASSERT_TRUE(deformable_conv->get_output_partial_shape(0).same_scheme(PartialShape{1, 4, 5, 5}));
 }
+
+class TypePropDeformableConvolutionV8Test : public TypePropOpTest<op::v8::DeformableConvolution> {
+protected:
+    CoordinateDiff empty_pad{};
+};
+
+TEST_F(TypePropDeformableConvolutionV8Test, default_ctor) {
+    const auto data = make_shared<Parameter>(element::f32, PartialShape{1, 4, 5, 5});
+    const auto offsets = make_shared<Parameter>(element::f32, PartialShape{1, 36, 7, 2});
+    const auto filters = make_shared<Parameter>(element::f32, PartialShape{4, 1, 3, 3});
+    const auto masks = make_shared<Parameter>(element::f32, PartialShape{1, 18, 7, -1});
+
+    const auto op = make_op();
+    op->set_arguments(OutputVector{data, offsets, filters, masks});
+    op->set_strides({1, 3});
+    op->set_dilations({1, 2});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_group(4);
+    op->set_deformable_group(2);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 4);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 3}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 2}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2}));
+
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, 4, 7, 2}));
+}
+
+TEST_F(TypePropDeformableConvolutionV8Test, interval_shapes) {
+    PartialShape data_batch_pshape{{1, 3}, {2, 6}, {1, 5}, {3, 10}};
+    PartialShape offsets_shape{1, 36, 4, 5};
+    PartialShape filters_pshape{{2, 5}, {1, 3}, {2, 3}, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(offsets_shape, 20);
+    set_shape_labels(filters_pshape, 30);
+
+    const element::Type_t et = element::f32;
+    const auto auto_pad = op::PadType::EXPLICIT;
+
+    const auto data_batch = make_shared<Parameter>(et, data_batch_pshape);
+    const auto offsets = make_shared<Parameter>(et, offsets_shape);
+    const auto filters = make_shared<Parameter>(et, filters_pshape);
+    const auto masks = make_shared<Parameter>(element::f32, PartialShape{-1, 18, {1, 10}, 3});
+    const auto op =
+        make_op(data_batch, offsets, filters, masks, Strides{}, empty_pad, empty_pad, Strides{}, auto_pad, 4, 2);
+
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 30, ov::no_label, ov::no_label));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, {2, 5}, {1, 4}, {1, 8}}));
+    EXPECT_EQ(op->get_pads_begin(), (CoordinateDiff{0, 0}));
+    EXPECT_EQ(op->get_pads_end(), (CoordinateDiff{0, 0}));
+}
diff --git a/src/core/tests/type_prop/group_convolution.cpp b/src/core/tests/type_prop/group_convolution.cpp
index 62c3c028cd453e..76d206df4c8454 100644
--- a/src/core/tests/type_prop/group_convolution.cpp
+++ b/src/core/tests/type_prop/group_convolution.cpp
@@ -2,17 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "convolution_shape_inference.hpp"
+#include "common_test_utils/test_assertions.hpp"
 #include "gtest/gtest.h"
 #include "ngraph/ngraph.hpp"
 #include "util/type_prop.hpp"
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
 TEST(type_prop, group_convolution_auto_padding_same_lower) {
-    const PartialShape data_batch_pshape{1, 4, 5, 5};
-    const PartialShape filters_pshape{2, 1, 2, 3, 3};
+    PartialShape data_batch_pshape{1, 4, 5, 5};
+    PartialShape filters_pshape{2, 1, 2, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     element::Type_t et = element::f32;
     Strides strides{1, 1};
     CoordinateDiff pads_begin{0, 0};
@@ -26,6 +29,8 @@ TEST(type_prop, group_convolution_auto_padding_same_lower) {
     auto groupConv =
         make_shared<op::v1::GroupConvolution>(data_batch, filters, strides, pads_begin, pads_end, dilations, auto_pad);
 
+    EXPECT_THAT(get_shape_labels(groupConv->get_output_partial_shape(0)),
+                ElementsAre(10, 20, ov::no_label, ov::no_label));
     ASSERT_EQ(groupConv->get_output_partial_shape(0), PartialShape({1, 2, 5, 5}));
     ASSERT_EQ(groupConv->get_pads_begin(), (CoordinateDiff{1, 1}));
     ASSERT_EQ(groupConv->get_pads_end(), (CoordinateDiff{1, 1}));
@@ -74,8 +79,10 @@ TEST(type_prop, group_convolution_auto_padding_same_lower_spatial_dims_static) {
 }
 
 TEST(type_prop, group_convolution_auto_padding_same_upper_spatial_dims_static) {
-    const PartialShape data_batch_pshape{1, Dimension::dynamic(), 5, 5};
-    const PartialShape filters_pshape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 2, 2};
+    PartialShape data_batch_pshape{1, Dimension::dynamic(), 5, 5};
+    PartialShape filters_pshape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 2, 2};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const element::Type_t et = element::f32;
     const auto auto_pad = op::PadType::SAME_UPPER;
 
@@ -89,14 +96,19 @@ TEST(type_prop, group_convolution_auto_padding_same_upper_spatial_dims_static) {
                                                            Strides{},
                                                            auto_pad);
 
+    EXPECT_THAT(get_shape_labels(groupConv->get_output_partial_shape(0)),
+                ElementsAre(10, ov::no_label, ov::no_label, ov::no_label));
     ASSERT_EQ(groupConv->get_output_partial_shape(0), PartialShape({1, Dimension::dynamic(), 5, 5}));
     ASSERT_EQ(groupConv->get_pads_begin(), (CoordinateDiff{0, 0}));
     ASSERT_EQ(groupConv->get_pads_end(), (CoordinateDiff{1, 1}));
 }
 
 TEST(type_prop, group_convolution_static_ranks_filters_groups_dyn) {
-    const PartialShape data_batch_pshape{Dimension::dynamic(), 4, 5, 5};
-    const PartialShape filters_pshape{Dimension::dynamic(), 1, 2, 3, 3};
+    PartialShape data_batch_pshape{Dimension::dynamic(), 4, 5, 5};
+    PartialShape filters_pshape{Dimension::dynamic(), 1, 2, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
+
     const element::Type_t et = element::f32;
     const auto auto_pad = op::PadType::SAME_LOWER;
 
@@ -109,7 +121,8 @@ TEST(type_prop, group_convolution_static_ranks_filters_groups_dyn) {
                                                            CoordinateDiff{},
                                                            Strides{},
                                                            auto_pad);
-
+    EXPECT_THAT(get_shape_labels(groupConv->get_output_partial_shape(0)),
+                ElementsAre(10, 20, ov::no_label, ov::no_label));
     ASSERT_EQ(groupConv->get_output_partial_shape(0), PartialShape({Dimension::dynamic(), 2, 5, 5}));
     ASSERT_EQ(groupConv->get_pads_begin(), (CoordinateDiff{1, 1}));
     ASSERT_EQ(groupConv->get_pads_end(), (CoordinateDiff{1, 1}));
@@ -348,50 +361,43 @@ TEST(type_prop, group_convolution_invalid_input_ranks) {
 }
 
 TEST(type_prop, group_convolution_invalid_input_channel_dims) {
-    try {
+    constexpr auto et = element::f32;
+    // data batch shape does not have correct dimension C_IN * GROUPS
+    {
         const PartialShape data_batch_pshape{1, 6, 5, 5};
-        const PartialShape filters_pshape{2, 1, 2, 3, 3};
-        element::Type_t et = element::f32;
+        const PartialShape filters_pshape{1, 1, 3, 3, 3};
 
         auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
         auto filters = make_shared<op::Parameter>(et, filters_pshape);
-        auto groupConv = make_shared<op::v1::GroupConvolution>(data_batch,
-                                                               filters,
-                                                               Strides{},
-                                                               CoordinateDiff{},
-                                                               CoordinateDiff{},
-                                                               Strides{});
-        // data batch shape does not have correct dimension C_IN * GROUPS
-        FAIL() << "Invalid input channels dimension of data batch not detected.";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             "Input channels dimension of data batch has incompatible value "
-                             "with filter shape.");
-    } catch (...) {
-        FAIL() << "Input channels dimension of data batch validation check failed for unexpected "
-                  "reason.";
+
+        OV_EXPECT_THROW(
+            const auto op = make_shared<op::v1::GroupConvolution>(data_batch,
+                                                                  filters,
+                                                                  Strides{},
+                                                                  CoordinateDiff{},
+                                                                  CoordinateDiff{},
+                                                                  Strides{}),
+            NodeValidationFailure,
+            HasSubstr("Input channels dimension of data batch is incompatible with filter groups or input channels."));
     }
 
-    try {
+    // data batch shape does not have correct dimension C_IN * GROUPS
+    {
         const PartialShape data_batch_pshape{1, 3, 5, 5};
-        const PartialShape filters_pshape{2, 1, Dimension::dynamic(), 3, 3};
-        element::Type_t et = element::f32;
+        const PartialShape filters_pshape{-1, 1, 2, 3, 3};
 
         auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
         auto filters = make_shared<op::Parameter>(et, filters_pshape);
-        auto groupConv = make_shared<op::v1::GroupConvolution>(data_batch,
-                                                               filters,
-                                                               Strides{},
-                                                               CoordinateDiff{},
-                                                               CoordinateDiff{},
-                                                               Strides{});
-        // data batch shape does not have correct dimension C_IN * GROUPS
-        FAIL() << "Invalid input channels dimension of data batch not detected.";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Input channels dimension of data batch not a multiple of group size");
-    } catch (...) {
-        FAIL() << "Input channels dimension of data batch validation check failed for unexpected "
-                  "reason.";
+
+        OV_EXPECT_THROW(
+            const auto op = make_shared<op::v1::GroupConvolution>(data_batch,
+                                                                  filters,
+                                                                  Strides{},
+                                                                  CoordinateDiff{},
+                                                                  CoordinateDiff{},
+                                                                  Strides{}),
+            NodeValidationFailure,
+            HasSubstr("Input channels dimension of data batch is incompatible with filter groups or input channels."));
     }
 }
 
@@ -469,23 +475,23 @@ TEST(type_prop, group_convolution_invalid_conv_param_spatial_dims) {
     }
 
     // invalid padding spatial dimensions
-    try {
+    {
         Strides strides{1, 1};
         Strides dilations{1, 1};
         CoordinateDiff pads_begin{0, 0, 0};
         CoordinateDiff pads_end{0, 0};
 
-        auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
-        auto filters = make_shared<op::Parameter>(et, PartialShape::dynamic());
-        auto groupConv =
-            make_shared<op::v1::GroupConvolution>(data_batch, filters, strides, pads_begin, pads_end, dilations);
-        FAIL() << "Invalid padding spatial dimensions not detected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin should be defined for all and only spatial dimensions.");
-    } catch (...) {
-        FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
+        auto data_batch = make_shared<op::Parameter>(et, PartialShape::dynamic());
+        auto filters = make_shared<op::Parameter>(et, filters_pshape);
+
+        OV_EXPECT_THROW(
+            auto op =
+                make_shared<op::v1::GroupConvolution>(data_batch, filters, strides, pads_begin, pads_end, dilations),
+            NodeValidationFailure,
+            HasSubstr("Pads begin and end should be defined for all and only spatial dimensions."));
     }
-    try {
+
+    {
         Strides strides{1, 1};
         Strides dilations{1, 1};
         CoordinateDiff pads_begin{0, 0};
@@ -493,26 +499,58 @@ TEST(type_prop, group_convolution_invalid_conv_param_spatial_dims) {
 
         auto data_batch = make_shared<op::Parameter>(et, PartialShape::dynamic());
         auto filters = make_shared<op::Parameter>(et, filters_pshape);
-        auto groupConv =
-            make_shared<op::v1::GroupConvolution>(data_batch, filters, strides, pads_begin, pads_end, dilations);
-        FAIL() << "Invalid padding spatial dimensions not detected";
-    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads end should be defined for all and only spatial dimensions.");
-    } catch (...) {
-        FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
+
+        OV_EXPECT_THROW(
+            auto op =
+                make_shared<op::v1::GroupConvolution>(data_batch, filters, strides, pads_begin, pads_end, dilations),
+            NodeValidationFailure,
+            HasSubstr("Pads begin and end should be defined for all and only spatial dimensions."));
     }
 }
 
-TEST(type_prop, group_convolution_default_constructed) {
-    auto conv = make_shared<op::v1::GroupConvolution>();
-    conv->set_auto_pad(op::PadType::SAME_LOWER);
+TEST(type_prop, group_convolution_interval_shapes) {
+    PartialShape data_batch_pshape{{1, 3}, {2, 6}, {1, 5}, {3, 10}, {20, 100}};
+    PartialShape filters_pshape{{2, 3}, {1, 3}, {2, 3}, 3, 3, 3};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
+
+    const element::Type_t et = element::f32;
+    const auto auto_pad = op::PadType::EXPLICIT;
 
-    const auto &input_shape = ov::PartialShape::dynamic(), filters_shape = ov::PartialShape{1, 1, 1, 3, 3};
-    const auto& input_shapes = std::vector<ov::PartialShape>{input_shape, filters_shape};
-    std::vector<ov::PartialShape> output_shapes(1);
-    auto pad_begin = CoordinateDiff{}, pad_end = CoordinateDiff{};
+    auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
+    auto filters = make_shared<op::Parameter>(et, filters_pshape);
+    auto groupConv = make_shared<op::v1::GroupConvolution>(data_batch,
+                                                           filters,
+                                                           Strides{},
+                                                           CoordinateDiff{},
+                                                           CoordinateDiff{},
+                                                           Strides{},
+                                                           auto_pad);
+    EXPECT_THAT(get_shape_labels(groupConv->get_output_partial_shape(0)),
+                ElementsAre(10, ov::no_label, ov::no_label, ov::no_label, ov::no_label));
+    EXPECT_EQ(groupConv->get_output_partial_shape(0), PartialShape({{1, 3}, {2, 9}, {1, 3}, {1, 8}, {18, 98}}));
+    EXPECT_EQ(groupConv->get_pads_begin(), (CoordinateDiff{0, 0, 0}));
+    EXPECT_EQ(groupConv->get_pads_end(), (CoordinateDiff{0, 0, 0}));
+}
 
-    int64_t num_spatial = calculate_num_spatial(conv.get(), input_shape, filters_shape, 2, 3);
-    update_and_validate_attributes(conv.get(), num_spatial);
-    EXPECT_NO_THROW(shape_infer(conv.get(), pad_begin, pad_end, input_shapes, output_shapes));
+TEST(type_prop, group_convolution_default_constructed) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 1, 3, 3});
+
+    const auto op = make_shared<op::v1::GroupConvolution>();
+    op->set_arguments(OutputVector{data, filters});
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 2);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 1}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 1}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 1, {2, -1}, {2, -1}}));
 }
diff --git a/src/core/tests/type_prop/group_convolution_backprop_data.cpp b/src/core/tests/type_prop/group_convolution_backprop_data.cpp
index df363fab80457b..da13820d999bf2 100644
--- a/src/core/tests/type_prop/group_convolution_backprop_data.cpp
+++ b/src/core/tests/type_prop/group_convolution_backprop_data.cpp
@@ -9,6 +9,7 @@
 
 using namespace std;
 using namespace ngraph;
+using namespace testing;
 
 TEST(type_prop, group_convolution_backprop_data_shape_infer) {
     const PartialShape data_pshape{1, 16, 6, 6};       // [N, C_IN * GROUPS, H, W]
@@ -156,12 +157,10 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_with_output_shape_st
 }
 
 TEST(type_prop, group_convolution_backprop_data_shape_infer_with_output_shape_static_ranks_filters_group_cout_dyn) {
-    const PartialShape data_pshape{Dimension::dynamic(), 16, 5, 5};  // [N, C_IN * GROUPS, H, W]
-    const PartialShape filters_pshape{Dimension::dynamic(),
-                                      16,
-                                      Dimension::dynamic(),
-                                      3,
-                                      3};  // [GROUPS, C_IN, C_OUT, kH, kW]
+    PartialShape data_pshape{Dimension::dynamic(), 16, 5, 5};                           // [N, C_IN * GROUPS, H, W]
+    PartialShape filters_pshape{Dimension::dynamic(), 16, Dimension::dynamic(), 3, 3};  // [GROUPS, C_IN, C_OUT, kH, kW]
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const element::Type_t et = element::f32;
 
     auto data = make_shared<op::Parameter>(et, data_pshape);
@@ -174,6 +173,7 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_with_output_shape_st
                                                                   Strides{},
                                                                   op::PadType::SAME_UPPER);
 
+    EXPECT_THAT(get_shape_labels(gcbd->get_output_partial_shape(0)), ElementsAre(10, 22, ov::no_label, ov::no_label));
     ASSERT_EQ(gcbd->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), Dimension::dynamic(), 3, 3}));
 }
 
@@ -270,12 +270,10 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_data_ci
 }
 
 TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_filters_group_cout_dyn) {
-    const PartialShape data_pshape{1, 20, 224, 224};  // [N, C_IN * GROUPS, H, W]
-    const PartialShape filters_pshape{Dimension::dynamic(),
-                                      Dimension::dynamic(),
-                                      2,
-                                      3,
-                                      3};  // [GROUPS, C_IN, C_OUT, kH, kW]
+    PartialShape data_pshape{1, 20, 224, 224};                                         // [N, C_IN * GROUPS, H, W]
+    PartialShape filters_pshape{Dimension::dynamic(), Dimension::dynamic(), 2, 3, 3};  // [GROUPS, C_IN, C_OUT, kH, kW]
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const element::Type_t et = element::f32;
 
     const Strides strides{2, 2};
@@ -291,7 +289,8 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_filters
                                                                   padding_begin,
                                                                   padding_end,
                                                                   dilations);
-
+    EXPECT_THAT(get_shape_labels(gcbd->get_output_partial_shape(0)),
+                ElementsAre(10, ov::no_label, ov::no_label, ov::no_label));
     ASSERT_EQ(gcbd->get_output_partial_shape(0), (PartialShape{1, Dimension::dynamic(), 447, 447}));
 }
 
@@ -318,8 +317,10 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_data_sp
 }
 
 TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_filters_spatial_dim_dyn) {
-    const PartialShape data_pshape{Dimension::dynamic(), 20, 224, Dimension::dynamic()};  // [N, C_IN * GROUPS, H, W]
-    const PartialShape filters_pshape{4, 5, 2, 3, 3};  // [GROUPS, C_IN, C_OUT, kH, kW]
+    PartialShape data_pshape{Dimension::dynamic(), 20, 224, Dimension::dynamic()};  // [N, C_IN * GROUPS, H, W]
+    PartialShape filters_pshape{4, 5, 2, 3, 3};                                     // [GROUPS, C_IN, C_OUT, kH, kW]
+    set_shape_labels(data_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
     const element::Type_t et = element::f32;
 
     const Strides strides{2, 2};
@@ -336,6 +337,8 @@ TEST(type_prop, group_convolution_backprop_data_shape_infer_static_ranks_filters
                                                                   padding_end,
                                                                   dilations);
 
+    EXPECT_THAT(get_shape_labels(gcbd->get_output_partial_shape(0)),
+                ElementsAre(10, ov::no_label, ov::no_label, ov::no_label));
     ASSERT_EQ(gcbd->get_output_partial_shape(0), (PartialShape{Dimension::dynamic(), 8, 447, Dimension(1, -1)}));
 }
 
@@ -627,7 +630,8 @@ TEST(type_prop, group_convolution_backprop_data_invalid_input_channel_dims) {
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(
             error.what(),
-            std::string("Input channels dimension of data batch has incompatible value with filter shape."));
+            std::string(
+                "Input channels dimension of data batch is incompatible with filter groups or input channels."));
     } catch (...) {
         FAIL() << "Input shapes validation check failed for unexpected reason.";
     }
@@ -646,7 +650,8 @@ TEST(type_prop, group_convolution_backprop_data_invalid_input_channel_dims) {
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(
             error.what(),
-            std::string("Input channels dimension of data batch has incompatible value with filter shape."));
+            std::string(
+                "Input channels dimension of data batch is incompatible with filter groups or input channels."));
     } catch (...) {
         FAIL() << "Input shapes validation check failed for unexpected reason.";
     }
@@ -671,7 +676,7 @@ TEST(type_prop, group_convolution_backprop_data_invalid_output_shape_spatial_dim
         FAIL() << "Incompatible output shape optional input not detected";
     } catch (const NodeValidationFailure& error) {
         EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Output shape should be specified only and for all spatial dimensions."));
+                             std::string("Output shape should be defined for all and only spatial dimensions."));
     } catch (...) {
         FAIL() << "Output shape validation check failed for unexpected reason.";
     }
@@ -763,7 +768,7 @@ TEST(type_prop, group_convolution_backprop_data_invalid_conv_param_spatial_dims)
             make_shared<op::v1::GroupConvolutionBackpropData>(data, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin should be defined for all and only spatial dimensions.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -779,7 +784,7 @@ TEST(type_prop, group_convolution_backprop_data_invalid_conv_param_spatial_dims)
             make_shared<op::v1::GroupConvolutionBackpropData>(data, filters, strides, pads_begin, pads_end, dilations);
         FAIL() << "Invalid padding spatial dimensions not detected";
     } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), "Pads end should be defined for all and only spatial dimensions.");
+        EXPECT_HAS_SUBSTRING(error.what(), "Pads begin and end should be defined for all and only spatial dimensions.");
     } catch (...) {
         FAIL() << "Padding spatial dimensions validation check failed for unexpected reason";
     }
@@ -835,17 +840,54 @@ TEST(type_prop, group_convolution_backprop_data_invalid_conv_param_spatial_dims)
     }
 }
 
-TEST(type_prop, group_convolution_back_prop_data_default_constructed) {
-    auto conv = make_shared<op::v1::GroupConvolutionBackpropData>();
-
-    const auto &input_shape = ov::PartialShape::dynamic(), filters_shape = ov::PartialShape{1, 1, 1, 3, 3},
-               output_spatial_shape_shape = ov::PartialShape({2});
-    const auto& input_shapes = std::vector<ov::PartialShape>{input_shape, filters_shape, output_spatial_shape_shape};
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
-    auto pad_begin = CoordinateDiff{}, pad_end = CoordinateDiff{};
-    const auto& output_spatial_shape = ov::PartialShape{3, 3};
-    int64_t num_spatial =
-        calculate_num_spatial(conv.get(), input_shape, filters_shape, output_spatial_shape_shape, 2, 3);
-    update_and_validate_attributes_back_prop(conv.get(), num_spatial);
-    EXPECT_NO_THROW(shape_infer(conv.get(), pad_begin, pad_end, output_spatial_shape, input_shapes, output_shapes));
+TEST(type_prop, group_convolution_backprop_data_default_constructed) {
+    const auto data = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = make_shared<op::Parameter>(element::f32, PartialShape{1, 1, 1, 3, 3, 3});
+    const auto out_spatial = op::Constant::create(element::i32, Shape{3}, {5, 4, 10});
+
+    const auto op = make_shared<op::v1::GroupConvolutionBackpropData>();
+    op->set_arguments(OutputVector{data, filters, out_spatial});
+    op->set_strides({1, 1, 1});
+    op->set_dilations({1, 1, 1});
+    op->set_pads_begin({2, 2, 2});
+    op->set_pads_end({2, 2, 2});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->validate_and_infer_types();
+
+    EXPECT_EQ(op->get_input_size(), 3);
+    EXPECT_EQ(op->get_output_size(), 1);
+    EXPECT_EQ(op->get_strides(), Strides({1, 1, 1}));
+    EXPECT_EQ(op->get_dilations(), Strides({1, 1, 1}));
+    EXPECT_EQ(op->get_pads_begin(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(op->get_pads_end(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, 1, 5, 4, 10}));
+}
+
+TEST(type_prop, group_convolution_backprop_data_interval_shapes) {
+    PartialShape data_batch_pshape{{1, 3}, {2, 6}, {1, 5}, {3, 10}, {20, 100}};
+    PartialShape filters_pshape{{2, 3}, {1, 3}, 1, 3, 3, 3};
+    PartialShape out_spatial_pshape{{2, 3}, -1, 10};
+    set_shape_labels(data_batch_pshape, 10);
+    set_shape_labels(filters_pshape, 20);
+    set_shape_labels(out_spatial_pshape, 30);
+
+    const element::Type_t et = element::f32;
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data_batch = make_shared<op::Parameter>(et, data_batch_pshape);
+    const auto filters = make_shared<op::Parameter>(et, filters_pshape);
+    const auto out_spatial_shape_of = make_shared<op::v0::ShapeOf>(make_shared<op::Parameter>(et, out_spatial_pshape));
+    const auto op = make_shared<op::v1::GroupConvolutionBackpropData>(data_batch,
+                                                                      filters,
+                                                                      out_spatial_shape_of,
+                                                                      Strides{},
+                                                                      CoordinateDiff{},
+                                                                      CoordinateDiff{},
+                                                                      Strides{},
+                                                                      auto_pad);
+
+    EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(10, 20, 30, 31, 32));
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{1, 3}, {2, 3}, {2, 3}, -1, 10}));
+    EXPECT_EQ(op->get_pads_begin(), (CoordinateDiff{0, 0, 0}));
+    EXPECT_EQ(op->get_pads_end(), (CoordinateDiff{0, 0, 0}));
 }
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
index 5cc51a792b9f3d..f08c33d935219a 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
@@ -15,13 +15,16 @@
 #include "augru_sequence_shape_inference.hpp"
 #include "avg_pool_shape_inference.hpp"
 #include "batch_to_space_shape_inference.hpp"
+#include "binary_convolution_shape_inference.hpp"
 #include "broadcast_shape_inference.hpp"
 #include "bucketize_shape_inference.hpp"
 #include "concat_shape_inference.hpp"
+#include "convolution_backprop_shape_inference.hpp"
 #include "convolution_shape_inference.hpp"
 #include "ctc_greedy_decoder_seq_len_shape_inference.hpp"
 #include "ctc_greedy_decoder_shape_inference.hpp"
 #include "ctc_loss_shape_inference.hpp"
+#include "deformable_convolution_shape_inference.hpp"
 #include "deformable_psroi_pooling_shape_inference.hpp"
 #include "depth_to_space_shape_inference.hpp"
 #include "detection_output_shape_inference.hpp"
@@ -43,6 +46,8 @@
 #include "gather_shape_inference.hpp"
 #include "gather_tree_shape_inference.hpp"
 #include "grid_sample_shape_inference.hpp"
+#include "group_convolution_backprop_shape_inference.hpp"
+#include "group_convolution_shape_inference.hpp"
 #include "gru_cell_shape_inference.hpp"
 #include "gru_sequence_shape_inference.hpp"
 #include "interpolate_shape_inference.hpp"
@@ -256,11 +261,8 @@ class entryFallback : public entryBase {
     }
 };
 
-static inline ov::CoordinateDiff convertPadding(const ov::CoordinateDiff& newPads) {
-    return newPads;
-}
-
-static inline ov::CoordinateDiff convertPadding(const ov::Shape& newPads) {
+template <class TContainer>
+ov::CoordinateDiff convertPadding(const TContainer& newPads) {
     return {newPads.begin(), newPads.end()};
 }
 
@@ -302,76 +304,48 @@ class entryInterpolate : public entryBase {
     }
 };
 
-template <typename OP, bool is_grouped>
-class entryConv : public entryBase {
+template <class TOp>
+class ShapeInferWithPaddingConvert : public entryBase {
 public:
-    entryConv(std::shared_ptr<Node> node) : entryBase(std::move(node)) {}
-    const ov::CoordinateDiff& get_pads_begin() override {
-        return pads_begin;
-    }
-    const ov::CoordinateDiff& get_pads_end() override {
-        return pads_end;
-    }
-    IShapeInferCommon::Result
-    infer(const std::vector<StaticShape>& input_shapes, const std::map<size_t, HostTensorPtr>& constant_data) override {
-        auto op = static_cast<OP*>(node.get());
-        std::vector<StaticShape> output_shapes(op->get_output_size());
-        bool status = resolve_auto_pad_for_shape(op, pads_begin, pads_end, input_shapes, 2, is_grouped ? 3 : 2);
-        OPENVINO_ASSERT(status,
-                        "Convolution shape inference doesn't have enough information to calculate static shapes");
-        shape_infer(op, pads_begin, pads_end, input_shapes, output_shapes);
-        return {std::move(output_shapes), ShapeInferStatus::success};
-    }
+    ShapeInferWithPaddingConvert(std::shared_ptr<Node> node)
+        : entryBase{std::move(node)},
+          m_pads_begin{},
+          m_pads_end{} {}
 
-protected:
-    ov::CoordinateDiff pads_begin, pads_end;
-};
-
-template <typename OP, bool is_grouped>
-class entryConvBackprop : public entryBase {
-public:
-    entryConvBackprop(std::shared_ptr<Node> node) : entryBase{std::move(node)} {}
+    IShapeInferCommon::Result infer(const std::vector<StaticShape>& input_shapes,
+                                    const std::map<size_t, ov::HostTensorPtr>& constant_data) override {
+        auto out_shapes = shape_infer(static_cast<TOp*>(node.get()), input_shapes);
+        on_infer_exit();
+        return {std::move(out_shapes), ShapeInferStatus::success};
+    }
 
     const ov::CoordinateDiff& get_pads_begin() override {
-        return pads_begin;
+        return m_pads_begin;
     }
+
     const ov::CoordinateDiff& get_pads_end() override {
-        return pads_end;
-    }
-    IShapeInferCommon::Result
-    infer(const std::vector<StaticShape>& input_shapes, const std::map<size_t, HostTensorPtr>& constant_data) override {
-        StaticShape output_shape_input;
-        auto op = static_cast<OP*>(node.get());
-        std::vector<StaticShape> output_shapes(op->get_output_size());
-        if (op->get_input_size() == 3)
-            get_data_as_shape<StaticShape>(2, op, output_shape_input, constant_data);
-        bool status = resolve_auto_pad_for_shape_back_prop(op,
-                                                           pads_begin,
-                                                           pads_end,
-                                                           input_shapes,
-                                                           output_shape_input,
-                                                           2,
-                                                           is_grouped ? 3 : 2);
-        OPENVINO_ASSERT(
-            status,
-            "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
-        shape_infer(op, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
-        return {std::move(output_shapes), ShapeInferStatus::success};
+        return m_pads_end;
     }
 
 protected:
-    ov::CoordinateDiff pads_begin, pads_end;
+    void on_infer_exit() {
+        auto op = static_cast<TOp*>(node.get());
+        m_pads_begin = convertPadding(op->get_pads_begin());
+        m_pads_end = convertPadding(op->get_pads_end());
+    }
+
+    ov::CoordinateDiff m_pads_begin, m_pads_end;
 };
 
 template <class TOp>
-class ShapeInferBaseWithPadding : public entryBase {
+class ShapeInferWithPadding : public entryBase {
 public:
-    ShapeInferBaseWithPadding(std::shared_ptr<Node> node) : entryBase{std::move(node)}, m_pads_begin{}, m_pads_end{} {}
+    ShapeInferWithPadding(std::shared_ptr<Node> node) : entryBase{std::move(node)}, m_pads_begin{}, m_pads_end{} {}
 
     IShapeInferCommon::Result infer(const std::vector<StaticShape>& input_shapes,
                                     const std::map<size_t, ov::HostTensorPtr>& constant_data) override {
-        auto out_shapes = shape_infer(static_cast<TOp*>(node.get()), input_shapes);
-        on_infer_exit();
+        auto op = static_cast<TOp*>(node.get());
+        auto out_shapes = shape_infer(op, input_shapes, m_pads_begin, m_pads_end, constant_data);
         return {std::move(out_shapes), ShapeInferStatus::success};
     }
 
@@ -384,12 +358,6 @@ class ShapeInferBaseWithPadding : public entryBase {
     }
 
 protected:
-    void on_infer_exit() {
-        auto op = static_cast<TOp*>(node.get());
-        m_pads_begin = convertPadding(op->get_pads_begin());
-        m_pads_end = convertPadding(op->get_pads_end());
-    }
-
     ov::CoordinateDiff m_pads_begin, m_pads_end;
 };
 
@@ -548,15 +516,18 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(AdaptiveAvgPool, entryIOC),
     _OV_OP_SHAPE_INFER_REG(AdaptiveMaxPool, entryIOC),
     _OV_OP_SHAPE_INFER_REG(Assign, entryIO),
-    _OV_OP_SHAPE_INFER_REG(AvgPool, ShapeInferBaseWithPadding),
+    _OV_OP_SHAPE_INFER_REG(AvgPool, ShapeInferWithPaddingConvert),
     _OV_OP_SHAPE_INFER_REG(BatchToSpace, entryIOC),
+    _OV_OP_SHAPE_INFER_REG(BinaryConvolution, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(Broadcast, entryIOC),
     _OV_OP_SHAPE_INFER_REG(Bucketize, entryIO),
     _OV_OP_SHAPE_INFER_REG(Concat, entryIO),
+    _OV_OP_SHAPE_INFER_REG(Convolution, ShapeInferWithPadding),
+    _OV_OP_SHAPE_INFER_REG(ConvolutionBackpropData, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(CTCGreedyDecoder, entryIO),
     _OV_OP_SHAPE_INFER_REG(CTCGreedyDecoderSeqLen, entryIO),
     _OV_OP_SHAPE_INFER_REG(CTCLoss, entryIO),
-    _OV_OP_SHAPE_INFER_REG(DeformableConvolution, entryFallbackWithPadding),
+    _OV_OP_SHAPE_INFER_REG(DeformableConvolution, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(DeformablePSROIPooling, entryIO),
     _OV_OP_SHAPE_INFER_REG(DepthToSpace, entryIO),
     _OV_OP_SHAPE_INFER_REG(DetectionOutput, entryIO),
@@ -577,6 +548,8 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(GatherND, entryIO),
     _OV_OP_SHAPE_INFER_REG(GatherTree, entryIO),
     _OV_OP_SHAPE_INFER_REG(GridSample, entryIO),
+    _OV_OP_SHAPE_INFER_REG(GroupConvolution, ShapeInferWithPadding),
+    _OV_OP_SHAPE_INFER_REG(GroupConvolutionBackpropData, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(GRUCell, entryIO),
     _OV_OP_SHAPE_INFER_REG(GRUSequence, entryIO),
     _OV_OP_SHAPE_INFER_REG(IDFT, entryIOC),
@@ -584,7 +557,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(IRDFT, entryIOC),
     _OV_OP_SHAPE_INFER_REG(LSTMCell, entryIO),
     _OV_OP_SHAPE_INFER_REG(MatMul, entryIO),
-    _OV_OP_SHAPE_INFER_REG(MaxPool, ShapeInferBaseWithPadding),
+    _OV_OP_SHAPE_INFER_REG(MaxPool, ShapeInferWithPaddingConvert),
     _OV_OP_SHAPE_INFER_REG(OneHot, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ov::op::internal::AUGRUCell, entryIO),
     _OV_OP_SHAPE_INFER_REG(ov::op::internal::AUGRUSequence, entryIO),
@@ -616,12 +589,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(Transpose, entryIOC),
     _OV_OP_SHAPE_INFER_REG(Unsqueeze, entryIOC),
     _OV_OP_SHAPE_INFER_REG(VariadicSplit, entryIOC),
-    _OV_OP_SHAPE_INFER_VA_REG(Convolution, entryConv, Convolution, false),
-    _OV_OP_SHAPE_INFER_VA_REG(ConvolutionBackpropData, entryConvBackprop, ConvolutionBackpropData, false),
-    _OV_OP_SHAPE_INFER_VA_REG(ConvolutionBackpropData, entryConvBackprop, ConvolutionBackpropData, false),
     _OV_OP_SHAPE_INFER_VA_REG(Gather, entryIOC, ov::op::util::GatherBase),
-    _OV_OP_SHAPE_INFER_VA_REG(GroupConvolution, entryConv, GroupConvolution, true),
-    _OV_OP_SHAPE_INFER_VA_REG(GroupConvolutionBackpropData, entryConvBackprop, GroupConvolutionBackpropData, true),
     _OV_OP_SHAPE_INFER_VA_REG(ReduceL1, entryIOC, op::util::ArithmeticReductionKeepDims),
     _OV_OP_SHAPE_INFER_VA_REG(ReduceL2, entryIOC, op::util::ArithmeticReductionKeepDims),
     _OV_OP_SHAPE_INFER_VA_REG(ReduceLogicalAnd, entryIOC, op::util::LogicalReductionKeepDims),
@@ -645,11 +613,11 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_NON_TEMPLATE_SHAPE_INFER_REG(opset1::BatchNormInference, entryFirstPassthrough),
     _OV_OP_NON_TEMPLATE_SHAPE_INFER_REG(opset1::Softmax, entryCopy),
     _OV_OP_SHAPE_INFER_REG(opset1::Broadcast, entryIOC),
-    _OV_OP_SHAPE_INFER_REG(opset1::DeformableConvolution, entryFallbackWithPadding),
+    _OV_OP_SHAPE_INFER_REG(opset1::DeformableConvolution, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(opset1::DetectionOutput, entryIO),
     _OV_OP_SHAPE_INFER_REG(opset1::Interpolate, entryIOC),
     _OV_OP_SHAPE_INFER_REG(opset1::LSTMCell, entryIO),
-    _OV_OP_SHAPE_INFER_REG(opset1::MaxPool, ShapeInferBaseWithPadding),
+    _OV_OP_SHAPE_INFER_REG(opset1::MaxPool, ShapeInferWithPaddingConvert),
     _OV_OP_SHAPE_INFER_REG(opset1::Proposal, entryIO),
     _OV_OP_SHAPE_INFER_REG(opset1::Range, entryIOC),
     _OV_OP_SHAPE_INFER_REG(opset1::ShapeOf, entryIO),
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
new file mode 100644
index 00000000000000..5eaf1b724139ee
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
@@ -0,0 +1,117 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class BinaryConvolutionV1StaticShapeInferenceTest : public OpStaticShapeInferenceTest<op::v1::BinaryConvolution> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+
+    const op_type::BinaryConvolutionMode mode = op_type::BinaryConvolutionMode::XNOR_POPCOUNT;
+    const float pad_value = 1.0f;
+};
+
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::VALID);
+
+    input_shapes = ShapeVector{{1, 3, 10, 12}, {2, 3, 5, 5}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 6, 8}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, auto_pads_same_lower_inputs_dynamic_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5}));
+}
+
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, auto_pad_same_lower_inputs_static_ranks) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5}));
+}
+
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, data_and_filters_num_channels_not_same) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad);
+
+    input_shapes = ShapeVector{{3, 5, 5, 5}, {7, 6, 3, 3}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("Data batch channel count (5) does not match filter"));
+}
+
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, data_rank_not_4) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5}, {7, 6, 3}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("Expected 4D for the input. Got:"));
+}
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
new file mode 100644
index 00000000000000..26976943bf7e1d
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
@@ -0,0 +1,147 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "convolution_backprop_shape_inference.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class ConvolutionBackpropDataV1StaticShapeInferenceTest
+    : public OpStaticShapeInferenceTest<op::v1::ConvolutionBackpropData> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, default_ctor_direct_infer_call) {
+    const auto spatial_shape = PartialShape{500, 500};
+    op = make_op();
+    op->set_strides({2, 2});
+    op->set_dilations({1, 1});
+    op->set_output_padding({0, 0});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_output_shape(spatial_shape.to_shape());
+
+    auto pads_begin = CoordinateDiff{1, 1};
+    auto pads_end = CoordinateDiff{1, 1};
+
+    input_shapes = ShapeVector{{1, 20, 224, 224}, {20, 10, 3, 3}, {spatial_shape.size()}};
+
+    output_shapes = ov::op::v1::shape_infer(op.get(), input_shapes, pads_begin, pads_end);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 10, 500, 500}));
+    EXPECT_EQ(pads_begin, CoordinateDiff({1, 1}));
+    EXPECT_EQ(pads_end, CoordinateDiff({1, 1}));
+}
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, default_ctor_with_output_shape) {
+    const auto spatial_shape = PartialShape{500, 500};
+
+    op = make_op();
+    op->set_strides({2, 2});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({1, 1});
+    op->set_pads_end({1, 1});
+    op->set_output_padding({0, 0});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_output_shape(spatial_shape.to_shape());
+
+    input_shapes = ShapeVector{{1, 20, 224, 224}, {20, 10, 3, 3}, {spatial_shape.size()}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 10, 500, 500}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({1, 1}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({1, 1}));
+}
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_output_padding({1, 1});
+    op->set_auto_pad(op::PadType::VALID);
+
+    input_shapes = ShapeVector{{1, 3, 10, 12}, {3, 3, 5, 5}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 15, 17}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 2d_inputs_dynamic_rank_no_spatial_shape) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5}, {6, 1, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 1, 7, 7}));
+}
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 3d_auto_pad_same_lower_out_spatial_as_const) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto out_spatial = op::v0::Constant::create(element::i64, Shape{3}, {2, 1, 3});
+
+    op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {6, 2, 3, 3, 3}, {3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 2, 2, 1, 3}));
+}
+
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 3d_auto_pad_same_upper_out_spatial_in_map) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto out_spatial = std::make_shared<op::v0::Parameter>(element::i32, PartialShape::dynamic(1));
+
+    op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad);
+    int32_t spatial_dims[] = {2, 6, 1};
+    const auto const_map =
+        std::map<size_t, HostTensorPtr>{{2, std::make_shared<HostTensor>(element::i32, Shape{3}, spatial_dims)}};
+
+    input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {5, 7, 3, 3, 3}, {3}};
+    shape_inference(op.get(), input_shapes, output_shapes, const_map);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 2, 6, 1}));
+}
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference.cpp
deleted file mode 100644
index e1800a1999aa6c..00000000000000
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "utils.hpp"
-
-using namespace ov;
-using namespace ov::intel_cpu;
-
-TEST(StaticShapeInferenceTest, ConvolutionTest) {
-    Strides strides{1, 1};
-    CoordinateDiff pads_begin{0, 0};
-    CoordinateDiff pads_end{0, 0};
-    Strides dilations{1, 1};
-    const auto auto_pad = op::PadType::SAME_LOWER;
-
-    auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto filters = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-
-    auto conv =
-        std::make_shared<op::v1::Convolution>(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{3, 6, 5, 5}, StaticShape{7, 6, 3, 3}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(conv.get(), static_input_shapes, static_output_shapes);
-
-    ASSERT_EQ(static_output_shapes[0], StaticShape({3, 7, 5, 5}));
-}
-
-
-TEST(StaticShapeInferenceTest, GroupConvolutionTest) {
-    Strides strides{1, 1};
-    CoordinateDiff pads_begin{0, 0};
-    CoordinateDiff pads_end{0, 0};
-    Strides dilations{1, 1};
-    const auto auto_pad = op::PadType::SAME_LOWER;
-
-    auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto filters = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
-
-    auto conv =
-        std::make_shared<op::v1::GroupConvolution>(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{1, 4, 5, 5}, StaticShape{2, 1, 2, 3, 3}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(conv.get(), static_input_shapes, static_output_shapes);
-
-    ASSERT_EQ(static_output_shapes[0], StaticShape({1, 2, 5, 5}));
-}
-
-TEST(StaticShapeInferenceTest, ConvolutionBackPropDataTest) {
-    auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto filters = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-
-    const Strides strides{2, 2};
-    const Strides dilations{1, 1};
-    const CoordinateDiff padding_begin{1, 1};
-    const CoordinateDiff padding_end{1, 1};
-    const CoordinateDiff output_padding{1, 1};
-    const op::PadType auto_pad = op::PadType::SAME_LOWER;
-
-    auto output_shape = std::make_shared<op::v0::Constant>(
-            ov::element::i64, ov::Shape{2}, std::vector<int64_t>({3, 3}));
-    auto conv = std::make_shared<op::v1::ConvolutionBackpropData>(data,
-                                                                  filters,
-                                                                  output_shape,
-                                                                  strides,
-                                                                  padding_begin,
-                                                                  padding_end,
-                                                                  dilations,
-                                                                  auto_pad,
-                                                                  output_padding);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{1, 16, 2, 2}, StaticShape{16, 6, 3, 3}, StaticShape{2}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(conv.get(), static_input_shapes, static_output_shapes);
-
-    ASSERT_EQ(static_output_shapes[0], StaticShape({1, 6, 3, 3}));
-}
-
-TEST(StaticShapeInferenceTest, GroupConvolutionBackPropDataTest) {
-    auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto filters = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
-
-    const Strides strides{2, 2};
-    const Strides dilations{1, 1};
-    const CoordinateDiff padding_begin{1, 1};
-    const CoordinateDiff padding_end{1, 1};
-    const CoordinateDiff output_padding{1, 1};
-    const op::PadType auto_pad = op::PadType::SAME_LOWER;
-
-    auto output_shape = std::make_shared<op::v0::Constant>(
-            ov::element::i64, ov::Shape{2}, std::vector<int64_t>({3, 3}));
-    auto conv = std::make_shared<op::v1::GroupConvolutionBackpropData>(data,
-                                                                  filters,
-                                                                  output_shape,
-                                                                  strides,
-                                                                  padding_begin,
-                                                                  padding_end,
-                                                                  dilations,
-                                                                  auto_pad,
-                                                                  output_padding);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{1, 16, 2, 2}, StaticShape{4, 4, 6, 3, 3}, StaticShape{2}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(conv.get(), static_input_shapes, static_output_shapes);
-
-    ASSERT_EQ(static_output_shapes[0], StaticShape({1, 24, 3, 3}));
-}
-
-
-#if 0
-TEST(StaticShapeInferenceTest, ConvolutionTimeTest) {
-    Strides strides{1, 1};
-    CoordinateDiff pads_begin{0, 0};
-    CoordinateDiff pads_end{0, 0};
-    Strides dilations{1, 1};
-    const auto auto_pad = op::PadType::SAME_LOWER;
-    auto data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{3, 6, 5, 5});
-    auto filters = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{7, 6, 3, 3});
-    auto conv =
-            std::make_shared<op::v1::Convolution>(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
-    std::vector<StaticShape> static_input_shapes = {StaticShape{3, 6, 5, 5}, StaticShape{7, 6, 3, 3}}, static_output_shapes = {StaticShape{}};
-
-    auto before = std::chrono::high_resolution_clock::now();
-    auto after = std::chrono::high_resolution_clock::now();
-
-    std::cout << conv << std::endl;
-    auto convolution_time_sum = 0;
-    for (size_t i = 0; i < 10; ++i) {
-        before = std::chrono::high_resolution_clock::now();
-        shape_inference(conv.get(), static_input_shapes, static_output_shapes);
-        after = std::chrono::high_resolution_clock::now();
-        auto diff = std::chrono::duration_cast<std::chrono::nanoseconds>(after - before).count();
-        std::cout << diff << " ns" << std::endl;
-        convolution_time_sum += diff;
-    }
-
-    // other operation creation and time measurements: ReLU is an example
-    auto relu = std::make_shared<op::v0::Relu>(data);
-    std::cout << relu << std::endl;
-    auto other_op_time_sum = 0;
-    for (size_t i = 0; i < 10; ++i) {
-        before = std::chrono::high_resolution_clock::now();
-        relu->validate_and_infer_types();
-        after = std::chrono::high_resolution_clock::now();
-        auto diff = std::chrono::duration_cast<std::chrono::nanoseconds>(after - before).count();
-        std::cout << diff << " ns" << std::endl;
-        other_op_time_sum += diff;
-    }
-    std::cout << (convolution_time_sum >= other_op_time_sum ? "ON PAR WITH CONVOLUTION: " : "LONGER THAN CONVOLUTION ")
-              << 1. * other_op_time_sum / convolution_time_sum << std::endl;
-}
-#endif
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
new file mode 100644
index 00000000000000..52a432907388f7
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class ConvolutionV1StaticShapeInferenceTest : public OpStaticShapeInferenceTest<op::v1::Convolution> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(ConvolutionV1StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::VALID);
+
+    input_shapes = ShapeVector{{1, 3, 10, 12}, {2, 3, 5, 5}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 6, 8}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
+TEST_F(ConvolutionV1StaticShapeInferenceTest, 2d_auto_pads_same_lower_inputs_dynamic_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5}));
+}
+
+TEST_F(ConvolutionV1StaticShapeInferenceTest, 3d_auto_pad_same_lower_inputs_static_ranks) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {7, 6, 3, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5, 5}));
+}
+
+TEST_F(ConvolutionV1StaticShapeInferenceTest, data_and_filters_num_channels_not_same) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1, -1});
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {7, 6, 3, 3, 3}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("Data batch channel count (5) does not match filter"));
+}
+
+TEST_F(ConvolutionV1StaticShapeInferenceTest, data_rank_not_compatible_with_filters_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {7, 6, 3, 3}};
+
+    OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes),
+                    NodeValidationFailure,
+                    HasSubstr("Data batch and filters rank do not match"));
+}
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp
new file mode 100644
index 00000000000000..5980f23e683fcb
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp
@@ -0,0 +1,146 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class DeformableConvolutionV8StaticShapeInferenceTest
+    : public OpStaticShapeInferenceTest<op::v8::DeformableConvolution> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 2});
+    op->set_dilations({1, 2});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::VALID);
+    op->set_group(4);
+    op->set_deformable_group(2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 3, 1}, {4, 1, 3, 3}, {1, 18, 3, 1}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 4, 3, 1}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_lower_inputs_dynamic_rank_no_masks) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto offsets = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, offsets, filters, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5}));
+}
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_lower_inputs_dynamic_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto offsets = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto masks = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, offsets, filters, masks, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 18, 5, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5}));
+}
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_uper_inputs_static_rank_no_masks) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto offsets = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, offsets, filters, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5}));
+}
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_upper_inputs_static_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto offsets = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto masks = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, offsets, filters, masks, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 18, 5, 5}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5}));
+}
+
+TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, mask_channel_dimension_not_divisible_by_deformable_group) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto offsets = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto masks = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, offsets, filters, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 17, 5, 5}};
+
+    OV_EXPECT_THROW(
+        shape_inference(op.get(), input_shapes, output_shapes),
+        NodeValidationFailure,
+        HasSubstr(
+            "The channels dimension of mask input is not compatible with filters and 'deformable group' attribute"));
+}
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
new file mode 100644
index 00000000000000..a58e20c257dcfa
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
@@ -0,0 +1,128 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class GroupConvolutionBackpropDataStaticShapeInferenceTest
+    : public OpStaticShapeInferenceTest<op::v1::GroupConvolutionBackpropData> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor_with_output_shape) {
+    const auto spatial_shape = PartialShape{500, 500};
+
+    op = make_op();
+    op->set_strides({2, 2});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({1, 1});
+    op->set_pads_end({1, 1});
+    op->set_output_padding({0, 0});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_output_shape(spatial_shape.to_shape());
+
+    input_shapes = ShapeVector{{1, 20, 224, 224}, {2, 10, 10, 3, 3}, {2}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 20, 500, 500}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({1, 1}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({1, 1}));
+}
+
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 1, 1});
+    op->set_dilations({1, 1, 1});
+    op->set_pads_begin({2, 2, 2});
+    op->set_pads_end({2, 1, 3});
+    op->set_output_padding({1, 1, 1});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+
+    int32_t spatial_shape[] = {5, 10, 15};
+    const auto const_data =
+        std::map<size_t, HostTensorPtr>{{2, std::make_shared<HostTensor>(element::i32, Shape{3}, spatial_shape)}};
+
+    input_shapes = ShapeVector{{1, 6, 10, 12, 2}, {3, 2, 2, 5, 5, 5}, {3}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, const_data).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 6, 5, 10, 15}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1, 3}));
+}
+
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 2d_inputs_dynamic_rank_no_spatial_shape) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{1, 2, 5, 5}, {2, 1, 2, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 7, 7}));
+}
+
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 3d_auto_pad_same_lower_out_spatial_as_const) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(6));
+    const auto out_spatial = op::v0::Constant::create(element::i64, Shape{3}, {2, 1, 3});
+
+    op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {1, 6, 6, 3, 3, 3}, {3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 6, 2, 1, 3}));
+}
+
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 3d_auto_pad_same_upper_out_spatial_in_map) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(6));
+    const auto out_spatial = std::make_shared<op::v0::Parameter>(element::i32, PartialShape::dynamic(1));
+
+    op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad);
+    int32_t spatial_dims[] = {2, 6, 1};
+    const auto const_data =
+        std::map<size_t, HostTensorPtr>{{2, std::make_shared<HostTensor>(element::i32, Shape{3}, spatial_dims)}};
+
+    input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {1, 5, 1, 3, 3, 3}, {3}};
+    shape_inference(op.get(), input_shapes, output_shapes, const_data);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 1, 2, 6, 1}));
+}
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp
new file mode 100644
index 00000000000000..77b5b4f3e70831
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp
@@ -0,0 +1,130 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "group_convolution_shape_inference.hpp"
+#include "openvino/opsets/opset11.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using namespace ov::intel_cpu;
+using namespace testing;
+
+class GroupConvolutionV1StaticShapeInferenceTest : public OpStaticShapeInferenceTest<op::v1::GroupConvolution> {
+protected:
+    void SetUp() override {
+        output_shapes.resize(1);
+    }
+};
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, default_ctor_direct_infer_call) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+
+    auto pads_begin = CoordinateDiff{2, 2};
+    auto pads_end = CoordinateDiff{2, 1};
+
+    input_shapes = ShapeVector{{1, 6, 10, 12}, {3, 2, 2, 5, 5}};
+    output_shapes = ov::op::v1::shape_infer(op.get(), input_shapes, pads_begin, pads_end);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 6, 10, 11}));
+    EXPECT_EQ(pads_begin, CoordinateDiff({2, 2}));
+    EXPECT_EQ(pads_end, CoordinateDiff({2, 1}));
+}
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, default_ctor) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+
+    input_shapes = ShapeVector{{1, 6, 10, 12}, {3, 2, 2, 5, 5}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 6, 10, 11}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1}));
+}
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 1d_explicit_pads_inputs_static_rank) {
+    const auto strides = Strides{1};
+    const auto dilations = Strides{1};
+    const auto pads_begin = CoordinateDiff{0};
+    const auto pads_end = CoordinateDiff{0};
+    const auto auto_pad = op::PadType::EXPLICIT;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(3));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(4));
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{1, 12, 20}, {12, 1, 1, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 12, 18}));
+}
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 2d_auto_pads_same_lower_inputs_dynamic_rank) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {2, 1, 2, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({1, 2, 5, 5}));
+}
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 3d_auto_pad_same_lower_inputs_static_ranks) {
+    const auto strides = Strides{1, 1, 1};
+    const auto dilations = Strides{1, 1, 1};
+    const auto pads_begin = CoordinateDiff{0, 0, 0};
+    const auto pads_end = CoordinateDiff{0, 0, 0};
+    const auto auto_pad = op::PadType::SAME_UPPER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(5));
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic(6));
+
+    op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad);
+
+    input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {1, 6, 6, 3, 3, 3}};
+    shape_inference(op.get(), input_shapes, output_shapes);
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes[0], StaticShape({3, 6, 5, 5, 5}));
+}
+
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, dilations_not_defined_for_spatial_shape) {
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1};
+    const auto pads_begin = CoordinateDiff{0, 0};
+    const auto pads_end = CoordinateDiff{0, 0};
+    const auto auto_pad = op::PadType::SAME_LOWER;
+
+    const auto data = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+    const auto filters = std::make_shared<op::v0::Parameter>(element::f32, PartialShape::dynamic());
+
+    input_shapes = ShapeVector{{1, 4, 5, 5}, {2, 1, 2, 3, 3}};
+    OV_EXPECT_THROW(op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad),
+                    NodeValidationFailure,
+                    HasSubstr("Dilations should be defined for all and only spatial dimensions"));
+}
diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp
index 8c84a8b2c842b9..ca4c228d431b4a 100644
--- a/src/plugins/intel_gpu/src/graph/convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/convolution.cpp
@@ -1,14 +1,16 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "pass_manager.h"
+#include <string>
+
 #include "convolution_inst.h"
-#include "primitive_type_base.h"
 #include "convolution_shape_inference.hpp"
-#include "sliding_window_utils.hpp"
+#include "group_convolution_shape_inference.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "json_object.h"
-#include <string>
+#include "pass_manager.h"
+#include "primitive_type_base.h"
+#include "sliding_window_utils.hpp"
 
 using namespace ov::intel_gpu;
 
@@ -413,29 +415,27 @@ std::vector<layout> convolution_inst::calc_output_layouts(convolution_node const
         input_layout.get<ShapeType>(),
         weights_layout.get<ShapeType>()
     };
-    std::vector<ShapeType> output_shapes = {ShapeType()};
+    std::vector<ShapeType> output_shapes;
+    auto pads_begin = desc->padding_above;
+    auto pads_end = desc->padding_below;
 
     if (desc->groups > 1) {
         ov::op::v1::GroupConvolution op;
         op.set_dilations(desc->dilation);
         op.set_strides(desc->stride);
         op.set_auto_pad(ov::op::PadType::EXPLICIT);
-        auto pad_begin = desc->padding_above;
-        auto pad_end = desc->padding_below;
         if (input_shapes[1].size() == 4 && input_shapes[0].size() == 3) {
             // 3D
             input_shapes[1][3] = input_shapes[1][2];
             input_shapes[1][2] = input_shapes[0][1].get_length()/input_shapes[1][0].get_length();
         }
-        ov::op::v1::shape_infer(&op, pad_begin, pad_end, input_shapes, output_shapes);
+        output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
     } else {
         ov::op::v1::Convolution op;
         op.set_dilations(desc->dilation);
         op.set_strides(desc->stride);
         op.set_auto_pad(ov::op::PadType::EXPLICIT);
-        auto pad_begin = desc->padding_above;
-        auto pad_end = desc->padding_below;
-        ov::op::v1::shape_infer(&op, pad_begin, pad_end, input_shapes, output_shapes);
+        output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
     }
     format::type output_format = input_layout.format.value;
     return {layout{output_shapes[0], output_type, output_format}};
diff --git a/src/plugins/intel_gpu/src/graph/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/deconvolution.cpp
index e6a5c90585fbb5..3dc12a9fbd2e2a 100644
--- a/src/plugins/intel_gpu/src/graph/deconvolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/deconvolution.cpp
@@ -1,14 +1,15 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+#include <string>
+
+#include "convolution_backprop_shape_inference.hpp"
 #include "deconvolution_inst.h"
-#include "primitive_type_base.h"
-#include "sliding_window_utils.hpp"
+#include "group_convolution_backprop_shape_inference.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "json_object.h"
-#include <string>
-
-#include "convolution_shape_inference.hpp"
+#include "primitive_type_base.h"
+#include "sliding_window_utils.hpp"
 
 using namespace ov::intel_gpu;
 
@@ -164,7 +165,7 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
     std::vector<ShapeType> input_shapes = {
         input_layout.get<ShapeType>()
     };
-    std::vector<ShapeType> output_shapes = {ShapeType()};
+    std::vector<ShapeType> output_shapes;
     auto& memory_deps = impl_param.memory_deps;
     // Dimensions order of weights is IOYX, but the selected format is OIYX by default and I/O dimensions are
     // already swapped when creating constant op. So we need to swap I/O dimensions according to the original
@@ -179,19 +180,22 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
         std::swap(weights_pshape[2], weights_pshape[1]);
         input_shapes.push_back(weights_pshape);
         if (output_partial_shape.size() != 0) {
-            ShapeType output_shape = ov::Shape{ output_partial_shape.size() };
-            input_shapes.push_back(output_shape);
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, output_partial_shape, input_shapes, output_shapes);
+            op.set_output_shape(output_partial_shape.to_shape());
+            input_shapes.push_back(ov::Shape{output_partial_shape.size()});
+            output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         } else if (memory_deps.count(2)) {
             auto mem = memory_deps.at(2);
-            std::vector<int64_t> dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
-            ov::Shape shape(dims.begin(), dims.end());
-            ov::PartialShape output_pshape(shape);
-            ShapeType output_shape = ov::Shape{ output_pshape.size() };
-            input_shapes.push_back(output_shape);
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, output_pshape, input_shapes, output_shapes);
+            auto dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
+            auto dims_shape = ov::Shape{dims.size()};
+            input_shapes.push_back(dims_shape);
+            output_shapes = ov::op::v1::shape_infer(
+                &op,
+                input_shapes,
+                pads_begin,
+                pads_end,
+                {{2, std::make_shared<ov::HostTensor>(ov::element::i64, dims_shape, dims.data())}});
         } else {
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, ov::PartialShape{}, input_shapes, output_shapes);
+            output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         }
     } else {
         ov::op::v1::ConvolutionBackpropData op;
@@ -202,19 +206,22 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
         std::swap(weights_pshape[1], weights_pshape[0]);
         input_shapes.push_back(weights_pshape);
         if (output_partial_shape.size() != 0) {
-            ShapeType output_shape = ov::Shape{ output_partial_shape.size() };
-            input_shapes.push_back(output_shape);
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, output_partial_shape, input_shapes, output_shapes);
+            op.set_output_shape(output_partial_shape.to_shape());
+            input_shapes.push_back(ov::Shape{output_partial_shape.size()});
+            output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         } else if (memory_deps.count(2)) {
             auto mem = memory_deps.at(2);
-            std::vector<int64_t> dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
-            ov::Shape shape(dims.begin(), dims.end());
-            ov::PartialShape output_pshape(shape);
-            ShapeType output_shape = ov::Shape{ output_pshape.size() };
-            input_shapes.push_back(output_shape);
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, output_pshape, input_shapes, output_shapes);
+            auto dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
+            auto dims_shape = ov::Shape{dims.size()};
+            input_shapes.push_back(dims_shape);
+            output_shapes = ov::op::v1::shape_infer(
+                &op,
+                input_shapes,
+                pads_begin,
+                pads_end,
+                {{2, std::make_shared<ov::HostTensor>(ov::element::i64, dims_shape, dims.data())}});
         } else {
-            ov::op::v1::shape_infer(&op, pads_begin, pads_end, ov::PartialShape{}, input_shapes, output_shapes);
+            output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         }
     }
     return {layout{output_shapes[0], output_type, out_fmt.value}};

From 40cc006bae4f8f869d878a0369a3adbb77dcc6f7 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Tue, 28 Mar 2023 23:24:13 +0400
Subject: [PATCH 141/296] Enable MapAllocator in IR Frontend (#12673)

* Enable MapAllocator in IR Frontend

* Fix `ov_infer_request_ppp` test

With `mmap()`ing of IR, .bin can't be deleted until unmapping.
And it shows that there was a leak in test

* Add comment to Win `CreateFile()` regarding
FILE_SHARE_DELETE

* Unmap .bin file before IR files deletion

Wait ov::Model deletion to trigger .bin file unmapping
before IR files deletion

* ClangFormat

* Add `use_map_allocator` switch in FE

In case of direct use of FE (e.g. via MO), `mmap()` is OFF.
But in case of use FE via Core, `mmap()` is ON.
---
 .../c/tests/ov_infer_request_test.cpp         |  1 +
 .../python/tests/test_graph/test_manager.py   | 93 +++++++++++--------
 src/core/tests/frontend/mock_frontend.cpp     |  8 +-
 src/frontends/ir/src/frontend.cpp             | 40 ++++----
 .../ir/src/os/win/win_mmap_object.cpp         |  4 +
 src/frontends/paddle/src/frontend.cpp         | 10 +-
 src/frontends/pytorch/src/frontend.cpp        |  8 +-
 src/frontends/tensorflow/src/frontend.cpp     |  9 +-
 .../tensorflow_lite/src/frontend.cpp          |  8 +-
 src/inference/src/ie_network_reader.cpp       |  1 +
 10 files changed, 115 insertions(+), 67 deletions(-)

diff --git a/src/bindings/c/tests/ov_infer_request_test.cpp b/src/bindings/c/tests/ov_infer_request_test.cpp
index 7c1ccd7e2688f7..c2ff469f34c2cc 100644
--- a/src/bindings/c/tests/ov_infer_request_test.cpp
+++ b/src/bindings/c/tests/ov_infer_request_test.cpp
@@ -164,6 +164,7 @@ class ov_infer_request_ppp : public ov_capi_test_base {
         OV_EXPECT_OK(ov_preprocess_input_model_info_set_layout(input_model, model_layout));
         ov_layout_free(model_layout);
 
+        ov_model_free(model);  // clean before assigning built model
         OV_EXPECT_OK(ov_preprocess_prepostprocessor_build(preprocess, &model));
         EXPECT_NE(nullptr, model);
 
diff --git a/src/bindings/python/tests/test_graph/test_manager.py b/src/bindings/python/tests/test_graph/test_manager.py
index dad03fecaeaa2c..bdc47e8fee0f8d 100644
--- a/src/bindings/python/tests/test_graph/test_manager.py
+++ b/src/bindings/python/tests/test_graph/test_manager.py
@@ -48,9 +48,22 @@ def test_constant_folding():
 
 
 # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_serialize_seperate_paths_kwargs(request, tmp_path):
-    core = Core()
+@pytest.fixture
+def prepare_ir_paths(request, tmp_path):
     xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+
+    yield xml_path, bin_path
+    
+    # IR Files deletion should be done after `Model` is destructed.
+    # It may be achieved by splitting scopes (`Model` will be destructed 
+    # just after test scope finished), or by calling `del Model`
+    os.remove(xml_path)
+    os.remove(bin_path)
+
+
+def test_serialize_separate_paths_kwargs(prepare_ir_paths):
+    core = Core()
+
     shape = [2, 2]
     parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
     parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
@@ -58,6 +71,7 @@ def test_serialize_seperate_paths_kwargs(request, tmp_path):
     model = (parameter_a + parameter_b) * parameter_c
     func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
 
+    xml_path, bin_path = prepare_ir_paths
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
     pass_manager.run_passes(func)
@@ -67,14 +81,10 @@ def test_serialize_seperate_paths_kwargs(request, tmp_path):
     assert func.get_parameters() == res_model.get_parameters()
     assert func.get_ordered_ops() == res_model.get_ordered_ops()
 
-    os.remove(xml_path)
-    os.remove(bin_path)
-
 
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_serialize_seperate_paths_args(request, tmp_path):
+def test_serialize_separate_paths_args(prepare_ir_paths):
     core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+
     shape = [2, 2]
     parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
     parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
@@ -83,6 +93,7 @@ def test_serialize_seperate_paths_args(request, tmp_path):
     model = ((parameter_a + parameter_b) * parameter_c) / parameter_d
     func = Model(model, [parameter_a, parameter_b, parameter_c, parameter_d], "Model")
 
+    xml_path, bin_path = prepare_ir_paths
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(xml_path, bin_path))
     pass_manager.run_passes(func)
@@ -92,20 +103,17 @@ def test_serialize_seperate_paths_args(request, tmp_path):
     assert func.get_parameters() == res_model.get_parameters()
     assert func.get_ordered_ops() == res_model.get_ordered_ops()
 
-    os.remove(xml_path)
-    os.remove(bin_path)
 
-
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
+def test_serialize_pass_mixed_args_kwargs(prepare_ir_paths):
     core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+    
     shape = [3, 2]
     parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
     parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
     model = parameter_a - parameter_b
     func = Model(model, [parameter_a, parameter_b], "Model")
 
+    xml_path, bin_path = prepare_ir_paths
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(xml_path, path_to_bin=bin_path))
     pass_manager.run_passes(func)
@@ -115,14 +123,11 @@ def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
     assert func.get_parameters() == res_model.get_parameters()
     assert func.get_ordered_ops() == res_model.get_ordered_ops()
 
-    os.remove(xml_path)
-    os.remove(bin_path)
-
 
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_serialize_pass_mixed_args_kwargs_v2(request, tmp_path):
+def test_serialize_pass_mixed_args_kwargs_v2(prepare_ir_paths):
     core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+    
+    xml_path, bin_path = prepare_ir_paths
     model = create_model()
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
@@ -133,9 +138,6 @@ def test_serialize_pass_mixed_args_kwargs_v2(request, tmp_path):
     assert model.get_parameters() == res_model.get_parameters()
     assert model.get_ordered_ops() == res_model.get_ordered_ops()
 
-    os.remove(xml_path)
-    os.remove(bin_path)
-
 
 # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
 def test_serialize_pass_wrong_num_of_args(request, tmp_path):
@@ -147,14 +149,13 @@ def test_serialize_pass_wrong_num_of_args(request, tmp_path):
     assert "Invoked with:" in str(e.value)
 
 
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_serialize_results(request, tmp_path):
+def test_serialize_results(prepare_ir_paths):
     core = Core()
     node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
     node_ceil = ops.ceiling(node_constant)
     func = Model(node_ceil, [], "Model")
 
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+    xml_path, bin_path = prepare_ir_paths
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
     pass_manager.run_passes(func)
@@ -165,14 +166,32 @@ def test_serialize_results(request, tmp_path):
 
     assert const == new_const
 
-    os.remove(xml_path)
-    os.remove(bin_path)
+    import gc
+    gc.collect()    # CVS-106805
 
 
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_default_version(request, tmp_path):
+@pytest.mark.xfail(reason="CVS-106805. Delete test after fix")
+def test_serialize_results_fails(prepare_ir_paths):
     core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+    node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
+    func = Model(node_constant, [], "Model")
+
+    xml_path, bin_path = prepare_ir_paths
+    pass_manager = Manager()
+    pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
+    pass_manager.run_passes(func)
+
+    res_model = core.read_model(model=xml_path, weights=bin_path)
+    const = func.get_results()
+    new_const = res_model.get_results()
+
+    assert const == new_const
+
+
+def test_default_version(prepare_ir_paths):
+    core = Core()
+
+    xml_path, bin_path = prepare_ir_paths
     model = create_model()
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(xml_path, bin_path))
@@ -183,14 +202,11 @@ def test_default_version(request, tmp_path):
     assert model.get_parameters() == res_model.get_parameters()
     assert model.get_ordered_ops() == res_model.get_ordered_ops()
 
-    os.remove(xml_path)
-    os.remove(bin_path)
-
 
-# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
-def test_default_version_IR_V11_seperate_paths(request, tmp_path):
+def test_default_version_IR_V11_seperate_paths(prepare_ir_paths):
     core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
+
+    xml_path, bin_path = prepare_ir_paths
     model = create_model()
     pass_manager = Manager()
     pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path, version=Version.IR_V11))
@@ -200,6 +216,3 @@ def test_default_version_IR_V11_seperate_paths(request, tmp_path):
 
     assert model.get_parameters() == res_model.get_parameters()
     assert model.get_ordered_ops() == res_model.get_ordered_ops()
-
-    os.remove(xml_path)
-    os.remove(bin_path)
diff --git a/src/core/tests/frontend/mock_frontend.cpp b/src/core/tests/frontend/mock_frontend.cpp
index 257c464af36114..642943805119ec 100644
--- a/src/core/tests/frontend/mock_frontend.cpp
+++ b/src/core/tests/frontend/mock_frontend.cpp
@@ -134,7 +134,9 @@ class FrontEndMock : public FrontEnd {
     }
 
     bool supported_impl(const std::vector<ov::Any>& variants) const override {
-        if (variants.size() == 1 && variants[0].is<std::string>()) {
+        // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+        size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+        if (variants.size() == 1 + extra_variants_num && variants[0].is<std::string>()) {
             std::string command = variants[0].as<std::string>();
             FRONT_END_GENERAL_CHECK(command != "throw_now", "Test exception");
         }
@@ -146,8 +148,10 @@ class FrontEndMock : public FrontEnd {
     }
 
     InputModel::Ptr load_impl(const std::vector<ov::Any>& variants) const override {
+        // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+        size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
         auto input_model = std::make_shared<InputModelMock>();
-        if (variants.size() == 1 && variants[0].is<std::string>()) {
+        if (variants.size() == 1 + extra_variants_num && variants[0].is<std::string>()) {
             std::string command = variants[0].as<std::string>();
             if (command == "throw_now") {
                 OPENVINO_THROW("Test throw load input model");
diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp
index 27f5af781d0f39..4dcbb931e6d355 100644
--- a/src/frontends/ir/src/frontend.cpp
+++ b/src/frontends/ir/src/frontend.cpp
@@ -61,10 +61,12 @@ size_t get_ir_version(std::istream& model) {
 }  // namespace
 
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
     std::ifstream local_model_stream;
     std::istream* provided_model_stream = nullptr;
 
-    if (variants.empty() || variants.size() > 3) {
+    if (variants.empty() || variants.size() > 3 + extra_variants_num) {
         return false;
     }
 
@@ -181,6 +183,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
             weights = variant.as<std::shared_ptr<ngraph::runtime::AlignedBuffer>>();
         }
     }
+    bool use_map_allocator =
+        variants[variants.size() - 1].is<bool>() ? variants[variants.size() - 1].as<bool>() : false;
 
     // Find weights if only path to xml was provided
     if (weights_path.empty()) {
@@ -198,27 +202,31 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
         }
     }
     if (!weights_path.empty()) {
-        std::ifstream bin_stream;
-        bin_stream.open(weights_path, std::ios::binary);
-        if (!bin_stream.is_open())
+        if (use_map_allocator)
+            weights = ov::load_mmap_object(weights_path);
+        else {
+            std::ifstream bin_stream;
+            bin_stream.open(weights_path, std::ios::binary);
+            if (!bin_stream.is_open())
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
-            IE_THROW() << "Weights file " + ov::util::wstring_to_string(weights_path) + " cannot be opened!";
+                IE_THROW() << "Weights file " + ov::util::wstring_to_string(weights_path) + " cannot be opened!";
 #else
-            IE_THROW() << "Weights file " + weights_path + " cannot be opened!";
+                IE_THROW() << "Weights file " + weights_path + " cannot be opened!";
 #endif
 
-        bin_stream.seekg(0, std::ios::end);
-        size_t file_size = bin_stream.tellg();
-        bin_stream.seekg(0, std::ios::beg);
+            bin_stream.seekg(0, std::ios::end);
+            size_t file_size = bin_stream.tellg();
+            bin_stream.seekg(0, std::ios::beg);
 
-        auto aligned_weights_buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(file_size);
-        bin_stream.read(aligned_weights_buffer->get_ptr<char>(), aligned_weights_buffer->size());
-        bin_stream.close();
+            auto aligned_weights_buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(file_size);
+            bin_stream.read(aligned_weights_buffer->get_ptr<char>(), aligned_weights_buffer->size());
+            bin_stream.close();
 
-        weights = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
-            aligned_weights_buffer->get_ptr<char>(),
-            aligned_weights_buffer->size(),
-            aligned_weights_buffer);
+            weights = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
+                aligned_weights_buffer->get_ptr<char>(),
+                aligned_weights_buffer->size(),
+                aligned_weights_buffer);
+        }
     }
 
     return create_input_model();
diff --git a/src/frontends/ir/src/os/win/win_mmap_object.cpp b/src/frontends/ir/src/os/win/win_mmap_object.cpp
index 2a1bbd85c4693b..a3c85bd310144c 100644
--- a/src/frontends/ir/src/os/win/win_mmap_object.cpp
+++ b/src/frontends/ir/src/os/win/win_mmap_object.cpp
@@ -58,12 +58,16 @@ class MapHolder {
     }
 
     void set(const std::string& path) {
+        // Note that file can't be changed (renamed/deleted) until it's unmapped. FILE_SHARE_DELETE flag allow 
+        // rename/deletion, but it doesn't work with FAT32 filesystem (works on NTFS)
         auto h = ::CreateFileA(path.c_str(), GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
         map(path, h);
     }
 
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
     void set(const std::wstring& path) {
+        // Note that file can't be changed (renamed/deleted) until it's unmapped. FILE_SHARE_DELETE flag allow 
+        // rename/deletion, but it doesn't work with FAT32 filesystem (works on NTFS)
         auto h = ::CreateFileW(path.c_str(), GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
         map(ov::util::wstring_to_string(path), h);
     }
diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp
index 831bf33f583cf2..6568c0976b9a9b 100644
--- a/src/frontends/paddle/src/frontend.cpp
+++ b/src/frontends/paddle/src/frontend.cpp
@@ -350,8 +350,10 @@ void FrontEnd::fuse_fakequantize_ops(const std::vector<std::shared_ptr<Model>>&
 }
 
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
     // FrontEnd can only load model specified by one path, one file or two files.
-    if (variants.empty() || variants.size() > 2)
+    if (variants.empty() || variants.size() > 2 + extra_variants_num)
         return false;
 
     // Validating first path, it must contain a model
@@ -389,7 +391,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
 }
 
 InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
-    if (variants.size() == 1) {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+    if (variants.size() == 1 + extra_variants_num) {
         // The case when folder with __model__ and weight files is provided or .pdmodel file
         if (variants[0].is<std::string>()) {
             std::string m_path = variants[0].as<std::string>();
@@ -407,7 +411,7 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
             auto p_model_stream = variants[0].as<std::istream*>();
             return std::make_shared<InputModel>(std::vector<std::istream*>{p_model_stream}, m_telemetry);
         }
-    } else if (variants.size() == 2) {
+    } else if (variants.size() == 2 + extra_variants_num) {
         // The case when .pdmodel and .pdparams files are provided
         std::ifstream model_stream;
         std::ifstream weights_stream;
diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 45a7ef2f2b536f..4b12f11ed8eaab 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -141,15 +141,19 @@ void FrontEnd::add_extension(const std::shared_ptr<ov::Extension>& extension) {
 }
 
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
     // Currently PyTorch FrontEnd only support TorchDecoder as input
-    if (variants.size() != 1 || !variants[0].is<std::shared_ptr<IDecoder>>())
+    if (variants.size() != 1 + extra_variants_num || !variants[0].is<std::shared_ptr<IDecoder>>())
         return false;
     auto decoder = variants[0].as<std::shared_ptr<IDecoder>>();
     return decoder && std::dynamic_pointer_cast<TorchDecoder>(decoder);
 }
 
 ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
-    FRONT_END_GENERAL_CHECK(variants.size() == 1,
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+    FRONT_END_GENERAL_CHECK(variants.size() == 1 + extra_variants_num,
                             "PyTorch Frontend supports exactly one parameter in model representation, got ",
                             std::to_string(variants.size()),
                             " instead.");
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 80d9609bd469d8..58a329cc1b92f2 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -77,8 +77,10 @@ FrontEnd::FrontEnd() : m_op_translators(tensorflow::op::get_supported_ops()) {}
 
 /// \brief Check if FrontEndTensorflow can recognize model from given parts
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
     // TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint, pbtxt
-    if (variants.size() != 1)
+    if (variants.size() != 1 + extra_variants_num)
         return false;
 
     if (variants[0].is<std::string>()) {
@@ -115,7 +117,10 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
 
 ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
     // TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint, pbtxt
-    FRONT_END_GENERAL_CHECK(variants.size() == 1,
+
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+    FRONT_END_GENERAL_CHECK(variants.size() == 1 + extra_variants_num,
                             "[TensorFlow Frontend] Internal error or inconsistent input model: the frontend supports "
                             "only frozen binary protobuf format.");
 
diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp
index 265d52bf20c1e9..e6e9054f77520b 100644
--- a/src/frontends/tensorflow_lite/src/frontend.cpp
+++ b/src/frontends/tensorflow_lite/src/frontend.cpp
@@ -48,7 +48,9 @@ FrontEnd::FrontEnd() {
 
 /// \brief Check if FrontEndTensorflowLite can recognize model from given parts
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
-    if (variants.size() != 1)
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+    if (variants.size() != 1 + extra_variants_num)
         return false;
 
     if (variants[0].is<std::string>()) {
@@ -71,7 +73,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
 }
 
 ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
-    if (variants.size() == 1) {
+    // Last boolean flag in `variants` (if presented) is reserved for FE configuration
+    size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
+    if (variants.size() == 1 + extra_variants_num) {
         if (variants[0].is<std::string>()) {
             std::string suffix = ".tflite";
             std::string model_path = variants[0].as<std::string>();
diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp
index 16bfdc7d883f98..3a219f74382552 100644
--- a/src/inference/src/ie_network_reader.cpp
+++ b/src/inference/src/ie_network_reader.cpp
@@ -457,6 +457,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath,
 #endif
         params.emplace_back(weights_path);
     }
+    params.emplace_back(/*use_ir_frontend_map_allocator=*/true);
 
     FE = manager.load_by_model(params);
     if (FE) {

From 79b267033c24c1cef686a4fc1fba219a8323dcf5 Mon Sep 17 00:00:00 2001
From: Wilson Seok <wilson.seok@intel.com>
Date: Wed, 29 Mar 2023 04:25:22 +0900
Subject: [PATCH 142/296] [GPU] Fix program::replace() to copy duplicated
 connection from single constant (#16529)

* fix program::replace() to copy duplicated connection from single constant

* add unit test

* modified with review feedback
---
 src/plugins/intel_gpu/src/graph/program.cpp   |  6 ++-
 .../tests/passes/prepare_quantization.cpp     | 47 +++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 src/plugins/intel_gpu/tests/passes/prepare_quantization.cpp

diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 5eff06f4276e5f..bd04286be7e43d 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -931,9 +931,13 @@ void program::replace(program_node& old_node, program_node& new_node) {
     new_node.valid_output_layouts = old_node.valid_output_layouts;
 
     // copy old's dependencies
+    // First copy them from old node to new node
+    for (auto& dependency : old_node.dependencies) {
+        add_connection(*dependency.first, new_node);
+    }
+    // Second delete them from old node
     while (!old_node.dependencies.empty()) {
         auto& dep = old_node.dependencies.front().first;
-        add_connection(*dep, new_node);
         remove_connection(*dep, old_node);
     }
 
diff --git a/src/plugins/intel_gpu/tests/passes/prepare_quantization.cpp b/src/plugins/intel_gpu/tests/passes/prepare_quantization.cpp
new file mode 100644
index 00000000000000..bbd918fde11da2
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/passes/prepare_quantization.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include "intel_gpu/runtime/engine.hpp"
+
+#include "intel_gpu/graph/network.hpp"
+#include "intel_gpu/graph/program.hpp"
+#include "data_inst.h"
+#include "quantize_inst.h"
+#include "pass_manager.h"
+#include "to_string_utils.h"
+
+#include "program_wrapper.h"
+
+#include <memory>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(prepare_quantization, program_replace_check_num_of_nodes) {
+    auto& engine = get_test_engine();
+    auto data0_layout = engine.allocate_memory({ ov::PartialShape{1}, data_types::f32, format::bfyx });
+    auto data1_layout = engine.allocate_memory({ ov::PartialShape{1}, data_types::f32, format::bfyx });
+    auto in_layout = layout{ ov::PartialShape::dynamic(0), data_types::f32, format::bfyx };
+
+    topology topology;
+    topology.add(input_layout("input", in_layout));
+    topology.add(data("input_low", data0_layout));
+    topology.add(data("input_high", data1_layout));
+    topology.add(quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("input_low"), input_info("input_high"), 256, data_types::f32));
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    auto prog = program::build_program(engine, topology, config, false, true);
+
+    ASSERT_NE(prog, nullptr);
+    ASSERT_TRUE(prog->get_node("quantize").get_dependencies().size() == 5);
+
+    layout_optimizer lo(true);
+
+    program_wrapper::apply_opt_pass<prepare_quantization>(*prog);
+
+    ASSERT_TRUE(prog->get_node("quantize").get_dependencies().size() == 9);
+}
\ No newline at end of file

From b82bedd648b664f6b5d14911513310f75cc27488 Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Wed, 29 Mar 2023 00:25:29 +0200
Subject: [PATCH 143/296] Add Conversion and Op Extension to Pytorch frontend
 (#16434)

Tickets: 98766 and 98767
---
 .../src/openvino/frontend/pytorch/__init__.py |   2 +
 .../src/pyopenvino/frontend/node_context.cpp  |  66 +++++---
 .../pyopenvino/frontend/pytorch/extension.cpp |  78 +++++++++
 .../pyopenvino/frontend/pytorch/extension.hpp |  12 ++
 .../pyopenvino/frontend/pytorch/py_module.cpp |   3 +
 .../openvino/frontend/extension/op.hpp        | 121 ++++++++++++++
 .../openvino/frontend/node_context.hpp        |   6 +
 .../frontend/pytorch/extension/conversion.hpp |  38 +++++
 .../frontend/pytorch/extension/op.hpp         |  18 +++
 .../openvino/frontend/pytorch/frontend.hpp    |   2 +
 .../frontend/pytorch/node_context.hpp         |   2 +
 src/frontends/pytorch/src/frontend.cpp        |  17 +-
 src/frontends/pytorch/src/node_context.cpp    |  64 ++++++++
 .../builtin_extensions.cpp                    |  76 ++++++++-
 .../py_frontend_tests/test_torch_frontend.py  | 152 +++++++++++++++++-
 15 files changed, 636 insertions(+), 21 deletions(-)
 create mode 100644 src/bindings/python/src/pyopenvino/frontend/pytorch/extension.cpp
 create mode 100644 src/bindings/python/src/pyopenvino/frontend/pytorch/extension.hpp
 create mode 100644 src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp
 create mode 100644 src/frontends/pytorch/include/openvino/frontend/pytorch/extension/op.hpp

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/__init__.py b/src/bindings/python/src/openvino/frontend/pytorch/__init__.py
index 78cfb7a018220f..c84f4df24425e7 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/__init__.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/__init__.py
@@ -16,6 +16,8 @@
 try:
     from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
     from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
+    from openvino.frontend.pytorch.py_pytorch_frontend import ConversionExtensionPytorch as ConversionExtension
+    from openvino.frontend.pytorch.py_pytorch_frontend import OpExtensionPytorch as OpExtension
 except ImportError as err:
     raise ImportError("OpenVINO PyTorch frontend is not available, please make sure the frontend is built."
                       "{}".format(err))
diff --git a/src/bindings/python/src/pyopenvino/frontend/node_context.cpp b/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
index 58a662779f7431..76d895913590a2 100644
--- a/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
@@ -53,6 +53,31 @@ void regclass_frontend_NodeContext(py::module m) {
                                                                                           "NodeContext",
                                                                                           py::dynamic_attr());
 
+    auto cast_attribute = [](const ov::Any& any, const py::object& dtype) -> py::object {
+        CAST_TO_PY(any, dtype, int32_t);
+        CAST_TO_PY(any, dtype, int64_t);
+        CAST_TO_PY(any, dtype, bool);
+        CAST_TO_PY(any, dtype, std::string);
+        CAST_TO_PY(any, dtype, float);
+        CAST_TO_PY(any, dtype, double);
+        CAST_TO_PY(any, dtype, ov::element::Type);
+        CAST_TO_PY(any, dtype, ov::PartialShape);
+
+        CAST_VEC_TO_PY(any, dtype, std::vector<int32_t>);
+        CAST_VEC_TO_PY(any, dtype, std::vector<int64_t>);
+#ifndef __APPLE__
+        // TODO: investigate the issue in pybind11 on MacOS
+        CAST_VEC_TO_PY(any, dtype, std::vector<bool>);
+#endif
+        CAST_VEC_TO_PY(any, dtype, std::vector<std::string>);
+        CAST_VEC_TO_PY(any, dtype, std::vector<float>);
+        CAST_VEC_TO_PY(any, dtype, std::vector<double>);
+        CAST_VEC_TO_PY(any, dtype, std::vector<ov::element::Type>);
+        CAST_VEC_TO_PY(any, dtype, std::vector<ov::PartialShape>);
+
+        return py::none();
+    };
+
     ext.def(
         "get_attribute",
         [=](NodeContext& self, const std::string& name, const py::object& default_value, const py::object& dtype)
@@ -68,24 +93,9 @@ void regclass_frontend_NodeContext(py::module m) {
                 }
             }
 
-            CAST_TO_PY(any, dtype, int32_t);
-            CAST_TO_PY(any, dtype, int64_t);
-            CAST_TO_PY(any, dtype, bool);
-            CAST_TO_PY(any, dtype, std::string);
-            CAST_TO_PY(any, dtype, double);
-            CAST_TO_PY(any, dtype, ov::element::Type);
-            CAST_TO_PY(any, dtype, ov::PartialShape);
-
-            CAST_VEC_TO_PY(any, dtype, std::vector<int32_t>);
-            CAST_VEC_TO_PY(any, dtype, std::vector<int64_t>);
-#ifndef __APPLE__
-            // TODO: investigate the issue in pybind11 on MacOS
-            CAST_VEC_TO_PY(any, dtype, std::vector<bool>);
-#endif
-            CAST_VEC_TO_PY(any, dtype, std::vector<std::string>);
-            CAST_VEC_TO_PY(any, dtype, std::vector<double>);
-            CAST_VEC_TO_PY(any, dtype, std::vector<ov::element::Type>);
-            CAST_VEC_TO_PY(any, dtype, std::vector<ov::PartialShape>);
+            auto casted = cast_attribute(any, dtype);
+            if (!casted.is_none())
+                return casted;
 
             if (default_value.is_none())
                 FRONT_END_GENERAL_CHECK(false, "Attribute ", name, " can't be converted to defined types.");
@@ -108,6 +118,26 @@ void regclass_frontend_NodeContext(py::module m) {
         return self.get_input(name, idx);
     });
 
+    ext.def(
+        "get_values_from_const_input",
+        [=](NodeContext& self, int idx, const py::object& default_value, const py::object& dtype) -> py::object {
+            auto any = self.get_values_from_const_input(idx);
+            if (any.empty())
+                return py::none();
+
+            auto casted = cast_attribute(any, dtype);
+            if (!casted.is_none())
+                return casted;
+
+            if (default_value.is_none())
+                FRONT_END_GENERAL_CHECK(false, "Const input with index ", idx, " can't be converted to defined types.");
+            else
+                return default_value;
+        },
+        py::arg("idx"),
+        py::arg("default_value") = py::none(),
+        py::arg("dtype") = py::none());
+
     ext.def("get_input_size", [](NodeContext& self) {
         return self.get_input_size();
     });
diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.cpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.cpp
new file mode 100644
index 00000000000000..6f64da9b1b6dc5
--- /dev/null
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "extension.hpp"
+#include "utils.hpp"
+
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+
+#include "openvino/frontend/extension/conversion.hpp"
+#include "openvino/frontend/pytorch/extension/conversion.hpp"
+#include "openvino/frontend/pytorch/extension/op.hpp"
+
+namespace py = pybind11;
+
+using namespace ov::frontend::pytorch;
+
+
+void regclass_frontend_pytorch_ConversionExtension(py::module m) {
+    py::class_<ConversionExtension, ConversionExtension::Ptr, ov::frontend::ConversionExtensionBase> _ext(
+        m,
+        "_ConversionExtensionPytorch",
+        py::dynamic_attr());
+    class PyConversionExtension : public ConversionExtension {
+    public:
+        using Ptr = std::shared_ptr<PyConversionExtension>;
+        using PyCreatorFunction = std::function<ov::OutputVector(const ov::frontend::NodeContext*)>;
+        PyConversionExtension(const std::string& op_type, const PyCreatorFunction& f)
+            : ConversionExtension(op_type, [f](const ov::frontend::NodeContext& node) -> ov::OutputVector {
+                  return f(static_cast<const ov::frontend::NodeContext*>(&node));
+              }) {}
+    };
+    py::class_<PyConversionExtension, PyConversionExtension::Ptr, ConversionExtension> ext(
+        m,
+        "ConversionExtensionPytorch",
+        py::dynamic_attr());
+
+    ext.def(py::init([](const std::string& op_type, const PyConversionExtension::PyCreatorFunction& f) {
+        return std::make_shared<PyConversionExtension>(op_type, f);
+    }));
+}
+
+void regclass_frontend_pytorch_OpExtension(py::module m) {
+    py::class_<OpExtension<void>, std::shared_ptr<OpExtension<void>>, ConversionExtension> ext(
+            m,
+            "OpExtensionPytorch",
+            py::dynamic_attr());
+
+    ext.def(py::init([](const std::string& fw_type_name,
+                        const std::map<std::string, size_t>& attr_names_map,
+                        const std::map<std::string, py::object>& attr_values_map) {
+                std::map<std::string, ov::Any> any_map;
+                for (const auto& it : attr_values_map) {
+                    any_map[it.first] = Common::utils::py_object_to_any(it.second);
+                }
+                return std::make_shared<OpExtension<void>>(fw_type_name, attr_names_map, any_map);
+            }), py::arg("fw_type_name"),
+            py::arg("attr_names_map") = std::map<std::string, size_t>(),
+            py::arg("attr_values_map") = std::map<std::string, ov::Any>());
+
+    ext.def(py::init([](const std::string& ov_type_name,
+                        const std::string& fw_type_name,
+                        const std::map<std::string, size_t>& attr_names_map,
+                        const std::map<std::string, py::object>& attr_values_map) {
+                std::map<std::string, ov::Any> any_map;
+                for (const auto& it : attr_values_map) {
+                    any_map[it.first] = Common::utils::py_object_to_any(it.second);
+                }
+                return std::make_shared<OpExtension<void>>(ov_type_name, fw_type_name, attr_names_map, any_map);
+            }),
+            py::arg("ov_type_name"),
+            py::arg("fw_type_name"),
+            py::arg("attr_names_map") = std::map<std::string, size_t>(),
+            py::arg("attr_values_map") = std::map<std::string, py::object>());
+}
diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.hpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.hpp
new file mode 100644
index 00000000000000..797d6d81793440
--- /dev/null
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/extension.hpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void regclass_frontend_pytorch_ConversionExtension(py::module m);
+void regclass_frontend_pytorch_OpExtension(py::module m);
diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/py_module.cpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/py_module.cpp
index 185f91ef59f542..a2e3a2c7639342 100644
--- a/src/bindings/python/src/pyopenvino/frontend/pytorch/py_module.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/py_module.cpp
@@ -5,9 +5,12 @@
 #include <pybind11/pybind11.h>
 
 #include "decoder.hpp"
+#include "extension.hpp"
 
 namespace py = pybind11;
 
 PYBIND11_MODULE(py_pytorch_frontend, m) {
     regclass_frontend_pytorch_decoder(m);
+    regclass_frontend_pytorch_ConversionExtension(m);
+    regclass_frontend_pytorch_OpExtension(m);
 }
diff --git a/src/frontends/common/include/openvino/frontend/extension/op.hpp b/src/frontends/common/include/openvino/frontend/extension/op.hpp
index 0c9912609897a9..1abc162c3bcff4 100644
--- a/src/frontends/common/include/openvino/frontend/extension/op.hpp
+++ b/src/frontends/common/include/openvino/frontend/extension/op.hpp
@@ -112,6 +112,15 @@ class OpExtensionBase : public BaseConversionType {
     OpExtensionBase(const std::string& fw_type_name,
                     const std::map<std::string, std::string>& attr_names_map = {},
                     const std::map<std::string, ov::Any>& attr_values_map = {});
+
+    OpExtensionBase(const std::map<std::string, size_t>& attr_names_map,
+                    const std::map<std::string, ov::Any>& attr_values_map = {})
+        : OpExtensionBase(OVOpType::get_type_info_static().name, attr_names_map, attr_values_map) {}
+
+    // Maps op with a given type in FW and OV type given in template parameter
+    OpExtensionBase(const std::string& fw_type_name,
+                    const std::map<std::string, size_t>& attr_names_map,
+                    const std::map<std::string, ov::Any>& attr_values_map = {});
 };
 
 template <typename BaseConversionType>
@@ -131,6 +140,17 @@ class OpExtensionBase<BaseConversionType, void> : public BaseConversionType {
                     const std::string& fw_type_name,
                     const std::map<std::string, std::string>& attr_names_map = {},
                     const std::map<std::string, ov::Any>& attr_values_map = {});
+
+    explicit OpExtensionBase(const std::string& fw_ov_type_name,
+                             const std::map<std::string, size_t>& attr_names_map,
+                             const std::map<std::string, ov::Any>& attr_values_map = {})
+        : OpExtensionBase(fw_ov_type_name, fw_ov_type_name, attr_names_map, attr_values_map) {}
+
+    // Maps op with a given type in FW and specified OV type given in template parameter
+    OpExtensionBase(const std::string& ov_type_name,
+                    const std::string& fw_type_name,
+                    const std::map<std::string, size_t>& attr_names_map,
+                    const std::map<std::string, ov::Any>& attr_values_map = {});
 };
 
 class FWVisitor : public ov::AttributeVisitor {
@@ -198,6 +218,82 @@ class OpConversionFunction {
     std::map<std::string, ov::Any> m_attr_values_map;
 };
 
+class FWVisitorInputAttributes : public ov::AttributeVisitor {
+public:
+    explicit FWVisitorInputAttributes(const NodeContext& context,
+                                      const std::map<std::string, size_t>& attr_names_map = {},
+                                      const std::map<std::string, ov::Any>& attr_values_map = {})
+        : m_context(context),
+          m_attr_names_map(attr_names_map),
+          m_attr_values_map(attr_values_map) {}
+
+    void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override {
+        auto p_value = m_attr_values_map.find(name);
+        auto p_name = m_attr_names_map.find(name);
+        bool is_value_found = p_value != m_attr_values_map.end();
+        bool is_name_mapping_found = p_name != m_attr_names_map.end();
+        OPENVINO_ASSERT(!(is_value_found && is_name_mapping_found),
+                        "For attribute " + name +
+                            " both name mapping and value mapping are provided."
+                            " The behavior for this case is undefined. Please leave only one mapping.");
+
+        if (is_value_found) {
+            adapter.set_as_any(p_value->second);
+        } else if (is_name_mapping_found) {
+            auto a = m_context.get_values_from_const_input(static_cast<int>(p_name->second));
+            adapter.set_as_any(a);
+        } else {
+            OPENVINO_ASSERT(false,
+                            "\nValue for attribute \"",
+                            name,
+                            "\" is not set or mapping between "
+                            "framework and openvino node attributes is incorrect.");
+        }
+    }
+
+private:
+    const NodeContext& m_context;
+    const std::map<std::string, size_t>& m_attr_names_map;
+    const std::map<std::string, ov::Any>& m_attr_values_map;
+};
+
+class OpConversionFunctionInputAttributes {
+public:
+    explicit OpConversionFunctionInputAttributes(const std::function<std::shared_ptr<ov::Node>()>& op_creator,
+                                                 const std::map<std::string, size_t>& attr_names_map = {},
+                                                 const std::map<std::string, ov::Any>& attr_values_map = {})
+        : m_op_creator(op_creator),
+          m_attr_names_map(attr_names_map),
+          m_attr_values_map(attr_values_map) {
+        first_attr_index = std::numeric_limits<size_t>::max();
+        for (const auto& it : m_attr_names_map) {
+            first_attr_index = std::min(first_attr_index, it.second);
+        }
+    }
+
+    ov::OutputVector operator()(const NodeContext& context) {
+        auto node = m_op_creator();
+
+        std::vector<Output<Node>> inputs;
+        for (size_t i = 0; i < context.get_input_size(); ++i) {
+            if (i < first_attr_index) {
+                inputs.push_back(context.get_input(static_cast<int>(i)));
+            }
+        }
+        node->set_arguments(inputs);
+        FWVisitorInputAttributes fw_visitor(context, m_attr_names_map, m_attr_values_map);
+        node->visit_attributes(fw_visitor);
+        node->validate_and_infer_types();
+        return node->outputs();
+    }
+
+private:
+    std::function<std::shared_ptr<ov::Node>()> m_op_creator;
+    std::map<std::string, size_t> m_attr_names_map;
+    std::map<std::string, ov::Any> m_attr_values_map;
+    size_t first_attr_index;
+};
+
 template <typename BaseConversionType>
 OpExtensionBase<BaseConversionType, void>::OpExtensionBase(const std::string& ov_type_name,
                                                            const std::string& fw_type_name,
@@ -211,6 +307,19 @@ OpExtensionBase<BaseConversionType, void>::OpExtensionBase(const std::string& ov
                              attr_names_map,
                              attr_values_map)) {}
 
+template <typename BaseConversionType>
+OpExtensionBase<BaseConversionType, void>::OpExtensionBase(const std::string& ov_type_name,
+                                                           const std::string& fw_type_name,
+                                                           const std::map<std::string, size_t>& attr_names_map,
+                                                           const std::map<std::string, ov::Any>& attr_values_map)
+    : BaseConversionType(fw_type_name,
+                         OpConversionFunctionInputAttributes(
+                             [ov_type_name]() {
+                                 return create_ov_node_by_name(ov_type_name);
+                             },
+                             attr_names_map,
+                             attr_values_map)) {}
+
 template <typename BaseConversionType, typename OVOpType>
 OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string& fw_type_name,
                                                                const std::map<std::string, std::string>& attr_names_map,
@@ -223,6 +332,18 @@ OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string
                              attr_names_map,
                              attr_values_map)) {}
 
+template <typename BaseConversionType, typename OVOpType>
+OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string& fw_type_name,
+                                                               const std::map<std::string, size_t>& attr_names_map,
+                                                               const std::map<std::string, ov::Any>& attr_values_map)
+    : BaseConversionType(fw_type_name,
+                         OpConversionFunctionInputAttributes(
+                             []() {
+                                 return std::make_shared<OVOpType>();
+                             },
+                             attr_names_map,
+                             attr_values_map)) {}
+
 template <typename OVOpType = void>
 using OpExtension = ov::frontend::OpExtensionBase<ov::frontend::ConversionExtension, OVOpType>;
 
diff --git a/src/frontends/common/include/openvino/frontend/node_context.hpp b/src/frontends/common/include/openvino/frontend/node_context.hpp
index f52bfe2dfe688c..fabf70146a81b4 100644
--- a/src/frontends/common/include/openvino/frontend/node_context.hpp
+++ b/src/frontends/common/include/openvino/frontend/node_context.hpp
@@ -48,6 +48,12 @@ class FRONTEND_API NodeContext {
         FRONT_END_NOT_IMPLEMENTED(get_input);
     }
 
+    /// \brief Returns values from Constant input with the given index as ov::Any.
+    ///        Throws an exception if the input cannot be represented as Constant.
+    virtual Any get_values_from_const_input(int idx) const {
+        FRONT_END_NOT_IMPLEMENTED(get_values_from_const_input);
+    }
+
     virtual const std::string& get_op_type() const {
         return m_op_type;
     }
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp
new file mode 100644
index 00000000000000..a1b102cead4a2d
--- /dev/null
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/frontend/extension/conversion.hpp"
+#include "openvino/frontend/frontend.hpp"
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/frontend/pytorch/visibility.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+
+class PYTORCH_API ConversionExtension : public ConversionExtensionBase {
+public:
+    using Ptr = std::shared_ptr<ConversionExtension>;
+
+    ConversionExtension() = delete;
+
+    ConversionExtension(const std::string& op_type, const ov::frontend::CreatorFunction& converter)
+        : ConversionExtensionBase(op_type),
+          m_converter(converter) {}
+
+    const ov::frontend::CreatorFunction& get_converter() const {
+        return m_converter;
+    }
+
+    ~ConversionExtension() override = default;
+
+private:
+    ov::frontend::CreatorFunction m_converter;
+};
+
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/op.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/op.hpp
new file mode 100644
index 00000000000000..ee2c1ca44de8ad
--- /dev/null
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/op.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "openvino/frontend/extension/op.hpp"
+#include "openvino/frontend/pytorch/extension/conversion.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+
+template <typename OVOpType = void>
+using OpExtension = ov::frontend::OpExtensionBase<ov::frontend::pytorch::ConversionExtension, OVOpType>;
+
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
index 8100592e797333..f01975a135e4c2 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "openvino/frontend/extension/conversion.hpp"
 #include "openvino/frontend/extension/telemetry.hpp"
 #include "openvino/frontend/frontend.hpp"
 #include "openvino/frontend/pytorch/node_context.hpp"
@@ -62,6 +63,7 @@ class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd {
     ov::frontend::InputModel::Ptr load_impl(const std::vector<ov::Any>& variants) const override;
 
     std::map<std::string, CreatorFunction> m_op_translators;
+    std::vector<ConversionExtensionBase::Ptr> m_conversion_extensions;
     TelemetryExtension::Ptr m_telemetry;
 };
 
diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
index a3c5504c5c0f3a..8502101bdfb810 100644
--- a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
+++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp
@@ -54,6 +54,8 @@ class NodeContext : public frontend::NodeContext {
         return m_tensor_map->at(input);
     }
 
+    Any get_values_from_const_input(int index) const override;
+
     // TODO: upstream to base class
     OutputVector inputs() const {
         OutputVector res;
diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 4b12f11ed8eaab..4d8305dde390d9 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -6,10 +6,12 @@
 
 #include "input_model.hpp"
 #include "op_table.hpp"
+#include "openvino/frontend/pytorch/extension/conversion.hpp"
 #include "openvino/op/util/multi_subgraph_base.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/util/log.hpp"
 #include "pt_framework_node.hpp"
+#include "so_extension.hpp"
 #include "transformations/common_optimizations/push_constant_to_subgraph.hpp"
 #include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp"
 #include "transformations/common_optimizations/reverse_shape_and_type_infer.hpp"
@@ -135,7 +137,20 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
 }
 
 void FrontEnd::add_extension(const std::shared_ptr<ov::Extension>& extension) {
-    if (const auto& telemetry = std::dynamic_pointer_cast<TelemetryExtension>(extension)) {
+    if (auto conv_ext = std::dynamic_pointer_cast<ov::frontend::ConversionExtension>(extension)) {
+        m_conversion_extensions.push_back(conv_ext);
+        m_op_translators[conv_ext->get_op_type()] = [=](const NodeContext& context) {
+            return conv_ext->get_converter()(context);
+        };
+    } else if (auto conv_ext = std::dynamic_pointer_cast<ov::frontend::pytorch::ConversionExtension>(extension)) {
+        m_conversion_extensions.push_back(conv_ext);
+        m_op_translators[conv_ext->get_op_type()] = [=](const NodeContext& context) {
+            return conv_ext->get_converter()(context);
+        };
+    } else if (const auto& so_ext = std::dynamic_pointer_cast<ov::detail::SOExtension>(extension)) {
+        add_extension(so_ext->extension());
+        m_extensions.push_back(so_ext);
+    } else if (const auto& telemetry = std::dynamic_pointer_cast<TelemetryExtension>(extension)) {
         m_telemetry = telemetry;
     }
 }
diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp
index 9f2b4716d6e6a4..1eec75c3f25cfd 100644
--- a/src/frontends/pytorch/src/node_context.cpp
+++ b/src/frontends/pytorch/src/node_context.cpp
@@ -183,6 +183,70 @@ std::string NodeContext::const_input<std::string>(size_t index) const {
     return input->get_decoder()->as_string();
 }
 
+namespace {
+template <typename T>
+Any get_constant_data(const std::shared_ptr<opset10::Constant>& constant) {
+    const T* ptr = reinterpret_cast<const T*>(constant->get_data_ptr());
+    const auto& shape = constant->get_shape();
+    if (is_scalar(shape)) {
+        return ptr[0];
+    }
+    return std::vector<T>(ptr, ptr + shape_size(shape));
+}
+}  // namespace
+
+Any NodeContext::get_values_from_const_input(int index) const {
+    FRONT_END_GENERAL_CHECK(static_cast<size_t>(index) < get_input_size(),
+                            "Input with index: ",
+                            index,
+                            " does not exist.");
+
+    if (input_is_none(index)) {
+        return {};
+    }
+
+    auto input_node = get_input_from_visible_context(index).get_node_shared_ptr();
+    if (auto constant = as_type_ptr<opset10::Constant>(input_node)) {
+        switch (constant->get_element_type()) {
+        case element::f32:
+            return get_constant_data<float>(constant);
+        case element::f64:
+            return get_constant_data<double>(constant);
+        case element::i32:
+            return get_constant_data<int32_t>(constant);
+        case element::u32:
+            return get_constant_data<uint32_t>(constant);
+        case element::i64:
+            return get_constant_data<int64_t>(constant);
+        case element::u64:
+            return get_constant_data<uint64_t>(constant);
+        case element::i8:
+            return get_constant_data<int8_t>(constant);
+        case element::u8:
+            return get_constant_data<uint8_t>(constant);
+        case element::i16:
+            return get_constant_data<int16_t>(constant);
+        case element::u16:
+            return get_constant_data<uint16_t>(constant);
+        default:
+            FRONT_END_GENERAL_CHECK(false, "Input with index: ", index, " has unsupported type.");
+        }
+    } else if (auto input = std::dynamic_pointer_cast<PtFrameworkNode>(input_node)) {
+        const auto& attrs = input->get_attrs();
+        if (attrs.find("none_value") != attrs.end()) {
+            return {};
+        }
+        auto it = attrs.find("string_value");
+        if (it != attrs.end()) {
+            return it->second;
+        }
+    }
+
+    FRONT_END_GENERAL_CHECK(false, "Input node with index ", index, " cannot be interpreted as constant", input_node);
+
+    return 0;
+}
+
 }  // namespace pytorch
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
index a80f1488342404..1910fabee6a7db 100644
--- a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
+++ b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
@@ -71,7 +71,81 @@ std::map<std::string, ov::OutputVector> CustomTranslatorPaddle(const ov::fronten
     return std::map<std::string, ov::OutputVector>();
 }
 
+class CustomElu : public ov::op::Op {
+public:
+    OPENVINO_OP("CustomElu");
+
+    CustomElu() = default;
+    CustomElu(const ov::Output<ov::Node>& input, float alpha, float beta) : m_alpha{alpha}, m_beta{beta} {
+        set_argument(0, input);
+        constructor_validate_and_infer_types();
+    }
+
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
+        return std::make_shared<CustomElu>(inputs[0], m_alpha, m_beta);
+    }
+
+    void validate_and_infer_types() override {
+        set_output_size(1);
+        set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+    }
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        visitor.on_attribute("m_alpha", m_alpha);
+        visitor.on_attribute("m_beta", m_beta);
+        return true;
+    }
+
+    bool has_evaluate() const override {
+        switch (get_input_element_type(0)) {
+        case ov::element::f32:
+            return true;
+        default:
+            return false;
+        }
+        return false;
+    }
+
+    template <typename T>
+    void elu(const T* input, T* output, size_t n) const {
+        for (size_t i = 0; i < n; i++) {
+            if (input[i] > 0)
+                output[i] = m_beta * input[i];
+            else
+                output[i] = m_alpha * (std::exp(input[i]) - 1);
+        }
+    }
+
+    bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
+        switch (get_input_element_type(0)) {
+        case ov::element::f32:
+            elu(inputs[0].data<float>(), outputs[0].data<float>(), ov::shape_size(get_output_shape(0)));
+            break;
+        default:
+            return false;
+        }
+        return true;
+    }
+
+private:
+    float m_alpha;
+    float m_beta;
+};
+
+#ifdef ENABLE_OV_PYTORCH_FRONTEND
+#    include <openvino/frontend/pytorch/extension/conversion.hpp>
+#    include <openvino/frontend/pytorch/extension/op.hpp>
+#    define PT_EXT                                                       \
+        std::make_shared<ov::frontend::pytorch::OpExtension<CustomElu>>( \
+            "aten::elu",                                                 \
+            std::map<std::string, size_t>{{"m_alpha", 1}},               \
+            std::map<std::string, ov::Any>{{"m_beta", 1.0f}}),           \
+            std::make_shared<ov::frontend::pytorch::ConversionExtension>("Relu", ReluToSwishTranslator),
+#else
+#    define PT_EXT
+#endif
+
 OPENVINO_CREATE_EXTENSIONS(std::vector<ov::Extension::Ptr>(
     {std::make_shared<ov::frontend::ConversionExtension>("NewCustomOp_1", CustomTranslatorCommon_1),
      std::make_shared<ov::frontend::ConversionExtension>("NewCustomOp_2", CustomTranslatorCommon_2),
-     ONNX_EXT PADDLE_EXT TF_EXT TF_LITE_EXT}));
+     ONNX_EXT PADDLE_EXT TF_EXT TF_LITE_EXT PT_EXT}));
diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
index 7be15ba9cd3595..1c7a29ea7af717 100644
--- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
+++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py
@@ -4,8 +4,15 @@
 
 import torch
 import numpy as np
-from openvino.frontend import FrontEndManager
+from openvino.frontend import FrontEndManager, ConversionExtension, NodeContext, OpExtension
 from openvino.runtime import PartialShape, Type
+import openvino.runtime.opset10 as ops
+
+from pathlib import Path
+import glob
+import re
+import os
+import math
 
 
 class aten_relu(torch.nn.Module):
@@ -67,6 +74,149 @@ def test_pytorch_fe_set_input_value():
     assert len(om.get_parameters()) == 0
 
 
+def test_conversion_extension():
+    from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder
+
+    class Model(torch.nn.Module):
+        def __init__(self):
+            super(Model, self).__init__()
+
+        def forward(self, inp):
+            elu = torch.nn.functional.elu(inp, alpha=0.123)
+            gelu = torch.nn.functional.gelu(elu, approximate="none")
+            gelu2 = torch.nn.functional.gelu(gelu, approximate="tanh")
+            softmax = torch.nn.functional.softmax(gelu2, dim=-1)
+            vn = torch.linalg.vector_norm(softmax, ord=math.inf, dim=None)
+            return vn
+
+    model = Model()
+    decoder = TorchScriptPythonDecoder(get_scripted_model(model))
+
+    def convert_elu(node: NodeContext):
+        inp = node.get_input(0)
+        alpha = node.get_input(1)
+        zero = ops.constant(np.array([0], dtype=np.float32))
+        greater = ops.greater(inp, zero)
+        exp = ops.exp(inp)
+        one = ops.constant(np.array([0], dtype=np.float32))
+        sub = ops.subtract(exp, one)
+        mul = ops.multiply(sub, alpha)
+        select = ops.select(greater, inp, mul)
+        return select.outputs()
+
+    def convert_gelu(node: NodeContext):
+        inp = node.get_input(0)
+        approximate = node.get_values_from_const_input(1)
+        if approximate == "none":
+            f = ops.erf(ops.divide(inp, ops.constant(np.array([math.sqrt(2.0)], dtype=np.float32))))
+        elif approximate == "tanh":
+            f = ops.tanh(ops.multiply(ops.constant(np.array([math.sqrt(2.0 / math.pi)], dtype=np.float32)),
+                                      ops.add(inp, ops.multiply(ops.constant(np.array([0.044715], dtype=np.float32)),
+                                                                ops.power(inp, ops.constant(np.array([3], dtype=np.float32)))))))
+        mul = ops.multiply(ops.multiply(ops.constant(np.array([0.5], dtype=np.float32)), inp),
+                           ops.add(ops.constant(np.array([1], dtype=np.float32)), f))
+        return mul.outputs()
+
+    def convert_softmax(node: NodeContext):
+        inp = node.get_input(0)
+        dim = node.get_values_from_const_input(1, dtype=np.int32)
+        dim_const = ops.constant(np.array([dim], dtype=np.int32))
+        reduce_max = ops.reduce_max(inp, dim_const, True)
+        sub = ops.subtract(inp, reduce_max)
+        exp = ops.exp(sub)
+        reduce_sum = ops.reduce_sum(exp, dim_const, True)
+        div = ops.divide(exp, reduce_sum)
+        return div.outputs()
+
+
+    def convert_vector_norm(node: NodeContext):
+        try:
+            inp = node.get_input(0)
+            ord = node.get_values_from_const_input(1)
+            assert ord == math.inf
+            dim = node.get_values_from_const_input(2)
+            if dim is None:
+                inp = ops.reshape(inp, ops.constant(np.array([-1])), False)
+                reduce_axes = np.array([0])
+            else:
+                reduce_axes = np.array(dim)
+            rm = ops.reduce_max(ops.abs(inp), reduce_axes, False)
+            return rm.outputs()
+        except Exception as e:
+            print(e)
+
+
+    fem = FrontEndManager()
+    fe = fem.load_by_framework(framework="pytorch")
+    assert fe
+    fe.add_extension(ConversionExtension("aten::elu", convert_elu))
+    fe.add_extension(ConversionExtension("aten::gelu", convert_gelu))
+    fe.add_extension(ConversionExtension("aten::softmax", convert_softmax))
+    fe.add_extension(ConversionExtension("aten::linalg_vector_norm", convert_vector_norm))
+    input_model = fe.load(decoder)
+    assert input_model
+    converted_model = fe.convert(input_model)
+    assert converted_model
+    assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == ["Parameter", "Constant", "Constant",
+                                                                              "Constant", "Greater", "Exp",
+                                                                              "Constant", "Subtract", "Constant",
+                                                                              "Multiply", "Select", "Multiply",
+                                                                              "Constant", "Constant", "Divide",
+                                                                              "Erf", "Add", "Multiply",
+                                                                              "Multiply", "Constant", "Constant",
+                                                                              "Constant", "Constant", "Power",
+                                                                              "Multiply", "Add", "Multiply",
+                                                                              "Tanh", "Add", "Multiply",
+                                                                              "Constant", "ReduceMax", "Subtract",
+                                                                              "Exp", "ReduceSum", "Divide",
+                                                                              "Constant", "Reshape", "Abs",
+                                                                              "Constant", "ReduceMax", "Result"]
+
+
+def get_builtin_extensions_path():
+    base_paths = [Path(__file__).parent.parent.parent.parent]
+    repo_dir = os.environ.get("REPO_DIR")
+    if repo_dir:
+        base_paths.append(repo_dir)
+
+    for base_path in base_paths:
+        paths = glob.glob(os.path.join(base_path, "bin", "*", "*", "*test_builtin_extensions*"))
+        for path in paths:
+            if re.search(r"(lib)?test_builtin_extensions.?\.(dll|so)", path):
+                return path
+    raise RuntimeError("Unable to find test_builtin_extensions")
+
+
+def test_op_extension():
+    from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder
+
+    class Elu(torch.nn.Module):
+        def __init__(self, alpha):
+            super(Elu, self).__init__()
+            self.alpha = alpha
+
+        def forward(self, inp):
+            return torch.nn.functional.elu(inp, self.alpha)
+
+    model = Elu(alpha=0.123)
+    decoder = TorchScriptPythonDecoder(get_scripted_model(model))
+
+    fem = FrontEndManager()
+    fe = fem.load_by_framework(framework="pytorch")
+    assert fe
+
+    input_model = fe.load(decoder)
+    assert input_model
+    converted_model = fe.convert(input_model)
+    assert converted_model
+    assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == ["Parameter", "Elu", "Result"]
+
+    fe.add_extension(get_builtin_extensions_path())
+    converted_model = fe.convert(input_model)
+    assert converted_model
+    assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == ["Parameter", "CustomElu", "Result"]
+
+
 def test_pytorch_telemetry():
     from openvino.frontend import TelemetryExtension
     from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder

From 6c766a81b5cfadfe307d0da84b9acb103b5e3127 Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Wed, 29 Mar 2023 02:56:47 +0400
Subject: [PATCH 144/296] [GPU] Treat warnings C4267 as errors for Windows
 (#16345)

---
 src/plugins/intel_gpu/CMakeLists.txt                  |  1 -
 .../intel_gpu/src/graph/impls/ocl/primitive_base.hpp  |  2 +-
 .../scatter_update/scatter_nd_update_kernel_ref.cpp   |  2 +-
 src/plugins/intel_gpu/src/plugin/ops/gather.cpp       |  2 +-
 .../tests/test_cases/scatter_nd_update_gpu_test.cpp   | 11 ++++++++---
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt
index a76e015346b82a..56070bf26a83b7 100644
--- a/src/plugins/intel_gpu/CMakeLists.txt
+++ b/src/plugins/intel_gpu/CMakeLists.txt
@@ -15,7 +15,6 @@ endif()
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # 4267 4244 conversion from 'XXX' to 'YYY', possible loss of data
     ie_add_compiler_flags(/wd4244)
-    ie_add_compiler_flags(/wd4267)
     # '<': signed/unsigned mismatch
     ie_add_compiler_flags(/wd4018)
 endif()
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index a3ec06609ece86..45f0cafd2bb424 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -294,7 +294,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         if (is_cpu())
             return;
 
-        size_t total_kernels_num = std::accumulate(kernels.begin(), kernels.end(), 0,
+        size_t total_kernels_num = std::accumulate(kernels.begin(), kernels.end(), static_cast<size_t>(0),
             [](size_t val, cldnn::kernels_cache::compiled_kernels::value_type& p) {
                 return (val + p.second.size());
             });
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
index db823d7195084f..7bc2a31e086ccc 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
@@ -132,7 +132,7 @@ static std::string GetInputBlockND(const scatter_nd_update_params& params, size_
     block_nd_s[rank] = "1";
     size_t input_offset = dyn_offset * 6;
 
-    for (int32_t idx = rank - 1; idx >= 0; --idx) {
+    for (int32_t idx = static_cast<int32_t>(rank) - 1; idx >= 0; --idx) {
         block_nd[idx] = input_dims[idx] * block_nd[idx + 1];
 
         size_t dim_offset = idx < 2 ? idx : (6 - dims.size()) + idx; // convert to 6d bfwzyx idx
diff --git a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
index c7eebb713d38d5..513e9254caff5b 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp
@@ -61,7 +61,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
                 new_axis += op->get_input_shape(0).size();
             }
             out_shape.push_back(1);
-            for (int i = out_shape.size() - 1; i > new_axis ; i--) {
+            for (int i = static_cast<int>(out_shape.size()) - 1; i > new_axis ; i--) {
                 out_shape[i] = out_shape[i-1];
             }
             out_shape[new_axis] = 1;
diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
index 89d418109ba1a1..c7cee584112cb5 100644
--- a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp
@@ -4484,7 +4484,7 @@ TEST(scatter_nd_update_gpu, dynamic_5d) {
                                ov::Shape input_shape,
                                ov::Shape indices_shape,
                                ov::Shape updates_shape) -> std::vector<float> {
-        size_t count = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<size_t>());
+        size_t count = std::accumulate(input_shape.begin(), input_shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
         auto outputs_ref = std::vector<float>(count);
         ngraph::runtime::reference::scatterNdUpdate<float, int32_t>(input.data(),
                                                                     indices.data(),
@@ -4503,12 +4503,17 @@ TEST(scatter_nd_update_gpu, dynamic_5d) {
         std::vector<int32_t> result;
         size_t last_indices_dim = indices_shape.at(indices_shape.size() - 1);
 
-        size_t count = std::accumulate(indices_shape.begin(), indices_shape.end(), 1, std::multiplies<size_t>()) / last_indices_dim;
+        size_t count = std::accumulate(indices_shape.begin(),
+                                       indices_shape.end(),
+                                       static_cast<size_t>(1),
+                                       std::multiplies<size_t>()) / last_indices_dim;
 
         while (unique_indices.size() != count) {
             std::vector<int32_t> indices;
             for (size_t i = 0; i < last_indices_dim; i++) {
-                indices.push_back(static_cast<int32_t>(generate_random_val<int>(0, data_shape[i] - 1)));
+                const int min = 0;
+                const int max = static_cast<int>(data_shape[i]) - 1;
+                indices.push_back(static_cast<int32_t>(generate_random_val<int>(min, max)));
             }
 
             unique_indices.insert(indices);

From daf562832fe1db9b8754948388a3c0005ec96ccd Mon Sep 17 00:00:00 2001
From: Taylor Yeonbok Lee <taylor.lee@intel.com>
Date: Tue, 28 Mar 2023 21:19:24 -0700
Subject: [PATCH 145/296] [GPU] Fix malfunction in crop static kernel in
 dynamic shape scenario (#16586)

* Fix malfunction in crop static kernel in dynamic shape execution

* Add unittest

* Fix lint errort
---
 .../intel_gpu/runtime/internal_properties.hpp |  1 +
 .../graph/graph_optimizer/compile_graph.cpp   |  3 +-
 .../intel_gpu/src/graph/impls/ocl/crop.cpp    |  3 +-
 .../src/runtime/execution_config.cpp          |  3 +-
 .../tests/test_cases/crop_gpu_test.cpp        | 33 +++++++++++++------
 5 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
index c5ccf331910476..9d07f0da81ae98 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@@ -46,6 +46,7 @@ static constexpr Property<bool, PropertyMutability::RW> optimize_data{"GPU_OPTIM
 static constexpr Property<bool, PropertyMutability::RW> allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"};
 static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"GPU_PARTIAL_BUILD"};
 static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
+static constexpr Property<bool, PropertyMutability::RW> use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"};
 static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
 static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
 static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
index 536c2f9805de2b..efa7043b630878 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@@ -34,9 +34,10 @@ void compile_graph::run(program& p) {
     std::exception_ptr exception;
     for (size_t idx = 0; idx < proc_order.size(); idx++) {
         auto& node = *(std::next(proc_order.begin(), idx));
+        bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape);
         bool can_select_impl = !node->is_type<data>() &&
                                !(node->is_type<mutable_data>() && node->get_dependencies().empty()) &&
-                               (!node->is_dynamic() || node->type()->does_dynamic_implementation_exist(*node));
+                               (!node->is_dynamic() || (use_shape_agnostic_impl && node->type()->does_dynamic_implementation_exist(*node)));
 
         // TODO: Remove this WA once we have shape agnostic reshape kernel
         if (node->is_type<reshape>() && node->is_dynamic() && !node->can_be_optimized())
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
index b11ba4f49569ea..93eedcca9fe3c9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
@@ -25,12 +25,11 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
 
 public:
     static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
-        const auto& primitive = impl_param.typed_desc<crop>();
         auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);
         auto optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(impl_param.get_program());
 
         params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
-        if (impl_param.get_program().get_node(primitive->id).is_dynamic()) {
+        if (impl_param.is_dynamic() || is_shape_agnostic) {
             // WA to always match compiled dynamic kernel with dispatch data
             // W/O enforcing this option we may generate kernel for "broadcast" scneario due to umatched tensor dimensions
             // but in runtime dispatch data will be generated for non-broadcast case as shapes are actually same.
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 7a4ecf26e330e6..d73dfda4ec93b3 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -68,7 +68,8 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::intel_gpu::dump_graphs, ""),
         std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
         std::make_tuple(ov::intel_gpu::partial_build_program, false),
-        std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false));
+        std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false),
+        std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false));
 }
 
 void ExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {
diff --git a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
index 0d828c66506fd1..0917cc92f56425 100644
--- a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
@@ -1291,22 +1291,35 @@ TEST(crop_gpu, dynamic_i32_in2x3x2x2_crop_offsets) {
     set_values(input, input_vec);
     ExecutionConfig config = get_test_default_config(engine);
     config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
-    network network(engine, topology, config);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("crop").get_memory();
-    cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
-
+    network network1(engine, topology, config); // run with shape agnostic kernel
+    network1.set_input_data("input", input);
+    auto outputs1 = network1.execute();
+    auto output1 = outputs1.at("crop").get_memory();
+    cldnn::mem_lock<int32_t> output1_ptr(output1, get_test_stream());
     for (int b = 0; b < crop_batch_num; ++b) { //B
         for (int f = 0; f < crop_feature_num; ++f) { //F
             for (int y = 0; y < crop_y_size; ++y) { //Y
                 for (int x = 0; x < crop_x_size; ++x) { //X
                     int linear_id = (b + batch_offset) * (feature_num * y_size * x_size) + (f + feature_offset) * (y_size * x_size) + (y + y_offset) * x_size + (x + x_offset);
                     int output_linear_id = b * (crop_feature_num * crop_y_size * crop_x_size) + f * (crop_y_size * crop_x_size) + y * crop_x_size + x;
-                    ASSERT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                    ASSERT_EQ(output1_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+    config.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
+    network network2(engine, topology, config); // run with static kernel
+    network2.set_input_data("input", input);
+    auto outputs2 = network2.execute();
+    auto output2 = outputs2.at("crop").get_memory();
+    cldnn::mem_lock<int32_t> output2_ptr(output2, get_test_stream());
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = (b + batch_offset) * (feature_num * y_size * x_size) + (f + feature_offset) * (y_size * x_size) + (y + y_offset) * x_size + (x + x_offset);
+                    int output_linear_id = b * (crop_feature_num * crop_y_size * crop_x_size) + f * (crop_y_size * crop_x_size) + y * crop_x_size + x;
+                    ASSERT_EQ(output2_ptr[output_linear_id], input_vec[linear_id]);
                 }
             }
         }

From f7891aa034cee652278dfcee75372d61980a5e06 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Wed, 29 Mar 2023 07:11:56 +0200
Subject: [PATCH 146/296] [Interpolate-11] Reference implementation for
 Interpolate-11 (#16342)

* Reference impl for interpolate-11 init

* ND support init

* Tests clean up

* Add evaluate method for Interpolate-11

* New version tests init

* Type parametrized tests

* Tests duplication clean up and reusage of v4 test cases

* Add clipping to the type bounds

* Style fix

* Add float type tests

* Fix default ports values

* Commented code clean up

* Add passing cube_coeff param

* Tests clean up

* Add separate namespace

* Adjust variable names

* Adjust function name

* Use vectors instead of raw ptrs

* update func to static inline

* Adjust types

* Add Interpolate-11 to template plugin evaluates map

* Revert interpolate-11 core evaluate support

* Use const ref to filter

* Use static cast

* Update link
---
 licensing/third-party-programs.txt            |  35 ++
 .../ngraph/runtime/reference/interpolate.hpp  | 112 ++++
 .../runtime/reference/interpolate_pil.hpp     | 320 +++++++++++
 src/plugins/template/backend/CMakeLists.txt   |   2 +-
 .../template/backend/evaluates_map.cpp        | 190 +++++++
 .../template/backend/opset_int_tbl.hpp        |   2 +
 .../functional/op_reference/interpolate.cpp   | 537 +++++++++++++++++-
 7 files changed, 1196 insertions(+), 2 deletions(-)
 create mode 100644 src/core/reference/include/ngraph/runtime/reference/interpolate_pil.hpp

diff --git a/licensing/third-party-programs.txt b/licensing/third-party-programs.txt
index 27216060437ea4..b1fdd9a5ca0bbb 100644
--- a/licensing/third-party-programs.txt
+++ b/licensing/third-party-programs.txt
@@ -1605,3 +1605,38 @@ Some of the benchmark data in testdata/ is licensed differently:
    domain; the latter does not have expired copyright, but is still in the
    public domain according to the license information
    (http://www.gutenberg.org/ebooks/53).
+
+-------------------------------------------------------------
+
+29. Pillow (https://github.com/python-pillow/Pillow)
+
+The Python Imaging Library (PIL) is
+
+    Copyright © 1997-2011 by Secret Labs AB
+    Copyright © 1995-2011 by Fredrik Lundh
+
+Pillow is the friendly PIL fork. It is
+
+    Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
+
+Like PIL, Pillow is licensed under the open source HPND License:
+
+By obtaining, using, and/or copying this software and/or its associated
+documentation, you agree that you have read, understood, and will comply
+with the following terms and conditions:
+
+Permission to use, copy, modify and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appears in all copies, and that
+both that copyright notice and this permission notice appear in supporting
+documentation, and that the name of Secret Labs AB or the author not be
+used in advertising or publicity pertaining to distribution of the software
+without specific, written prior permission.
+
+SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
+INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
diff --git a/src/core/reference/include/ngraph/runtime/reference/interpolate.hpp b/src/core/reference/include/ngraph/runtime/reference/interpolate.hpp
index 909338ca897484..7db65c5d7a3f0a 100644
--- a/src/core/reference/include/ngraph/runtime/reference/interpolate.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/interpolate.hpp
@@ -12,9 +12,11 @@
 #include <functional>
 #include <map>
 
+#include "interpolate_pil.hpp"
 #include "ngraph/coordinate_transform.hpp"
 #include "ngraph/op/interpolate.hpp"
 #include "ngraph/shape_util.hpp"
+#include "transpose.hpp"
 
 namespace ngraph {
 namespace runtime {
@@ -302,6 +304,12 @@ class InterpolateEval final {
         case InterpolateMode::CUBIC:
             cubic_func(input_data, out);
             break;
+        case InterpolateMode::BILINEAR_PILLOW:
+            bilinear_pil_func(input_data, out);
+            break;
+        case InterpolateMode::BICUBIC_PILLOW:
+            bicubic_pil_func(input_data, out);
+            break;
         default:
             OPENVINO_THROW("Unsupported interpolation mode");
             break;
@@ -345,6 +353,10 @@ class InterpolateEval final {
     /// \param input_data pointer to input data
     /// \param out pointer to memory block for output data
     void nearest_func(const T* input_data, T* out);
+
+    void bilinear_pil_func(const T* input_data, T* out);
+    void bicubic_pil_func(const T* input_data, T* out);
+    void multidim_pil_func(const T* input_data, T* out, const interpolate_pil::filter& filterp);
 };
 
 template <typename T>
@@ -564,6 +576,106 @@ void InterpolateEval<T>::cubic_func(const T* input_data, T* out) {
     NGRAPH_SUPPRESS_DEPRECATED_END
 }
 
+template <typename T>
+void InterpolateEval<T>::bilinear_pil_func(const T* input_data, T* out) {
+    struct interpolate_pil::filter bilinear = {interpolate_pil::bilinear_filter, 1.0, m_cube_coeff};
+    multidim_pil_func(input_data, out, bilinear);
+}
+
+template <typename T>
+void InterpolateEval<T>::bicubic_pil_func(const T* input_data, T* out) {
+    struct interpolate_pil::filter bicubic = {interpolate_pil::bicubic_filter, 2.0, m_cube_coeff};
+    multidim_pil_func(input_data, out, bicubic);
+}
+
+template <typename T>
+void InterpolateEval<T>::multidim_pil_func(const T* input_data, T* out, const interpolate_pil::filter& filterp) {
+    OPENVINO_ASSERT(m_axes.size() == 2, "For Pillow based modes exactly two (HW) axes need to be provided.");
+
+    auto h_dim_idx = m_axes[0];
+    auto w_dim_idx = m_axes[1];
+    auto h_dim_in = m_input_data_shape[h_dim_idx];
+    auto w_dim_in = m_input_data_shape[w_dim_idx];
+    auto h_dim_out = m_out_shape[h_dim_idx];
+    auto w_dim_out = m_out_shape[w_dim_idx];
+    auto in_matrix_elem_size = h_dim_in * w_dim_in;
+    auto out_matrix_elem_size = h_dim_out * w_dim_out;
+
+    auto box = std::vector<float>{0.f, 0.f, static_cast<float>(w_dim_in), static_cast<float>(h_dim_in)};
+
+    if (shape_size(m_input_data_shape) == in_matrix_elem_size) {
+        // Input data is 2D or ND with other dimensions equal 1
+        interpolate_pil::imaging_resample_inner(input_data,
+                                                w_dim_in,
+                                                h_dim_in,
+                                                w_dim_out,
+                                                h_dim_out,
+                                                filterp,
+                                                box.data(),
+                                                out);
+    } else {
+        // Flatten other dimensions and interpolate over 2D matrices
+        std::vector<int64_t> in_transp_axes_order;
+        for (size_t i = 0; i < m_input_data_shape.size(); ++i) {
+            if (std::find(m_axes.begin(), m_axes.end(), i) == m_axes.end()) {
+                in_transp_axes_order.push_back(i);
+            }
+        }
+        in_transp_axes_order.insert(in_transp_axes_order.end(), m_axes.begin(), m_axes.end());
+
+        Shape transp_input_shape;
+        Shape transp_output_shape;
+        for (auto&& axis : in_transp_axes_order) {
+            transp_input_shape.push_back(m_input_data_shape[axis]);
+            transp_output_shape.push_back(m_out_shape[axis]);
+        }
+        size_t flat_batch_size =
+            transp_input_shape.size() > 2
+                ? shape_size(transp_input_shape.begin(), transp_input_shape.begin() + transp_input_shape.size() - 2)
+                : 1;
+
+        // Transpose HW dimensions to the end of the tensor shape
+        std::vector<T> transposed_in(input_data, input_data + shape_size(m_input_data_shape));
+        transpose(reinterpret_cast<const char*>(input_data),
+                  reinterpret_cast<char*>(transposed_in.data()),
+                  m_input_data_shape,
+                  sizeof(T),
+                  in_transp_axes_order.data(),
+                  transp_input_shape);
+
+        std::vector<T> transposed_out(shape_size(m_out_shape));
+        T* in_matrix_ptr = transposed_in.data();
+        T* out_matrix_ptr = transposed_out.data();
+
+        // Resample each 2D matrix
+        for (size_t i = 0; i < flat_batch_size; ++i) {
+            interpolate_pil::imaging_resample_inner(in_matrix_ptr,
+                                                    w_dim_in,
+                                                    h_dim_in,
+                                                    w_dim_out,
+                                                    h_dim_out,
+                                                    filterp,
+                                                    box.data(),
+                                                    out_matrix_ptr);
+            in_matrix_ptr += in_matrix_elem_size;
+            out_matrix_ptr += out_matrix_elem_size;
+        }
+
+        std::vector<int64_t> out_transp_axes_order(m_out_shape.size() - 2);
+        std::iota(out_transp_axes_order.begin(), out_transp_axes_order.end(), 0);
+        out_transp_axes_order.insert(out_transp_axes_order.begin() + h_dim_idx, transp_input_shape.size() - 2);
+        out_transp_axes_order.insert(out_transp_axes_order.begin() + w_dim_idx, transp_input_shape.size() - 1);
+
+        // Transpose back to the original data dimensions order
+        transpose(reinterpret_cast<const char*>(transposed_out.data()),
+                  reinterpret_cast<char*>(out),
+                  transp_output_shape,
+                  sizeof(T),
+                  out_transp_axes_order.data(),
+                  m_out_shape);
+    }
+}
+
 template <typename T>
 void InterpolateEval<T>::nearest_func(const T* input_data, T* out) {
     NGRAPH_SUPPRESS_DEPRECATED_START
diff --git a/src/core/reference/include/ngraph/runtime/reference/interpolate_pil.hpp b/src/core/reference/include/ngraph/runtime/reference/interpolate_pil.hpp
new file mode 100644
index 00000000000000..93a2b9776f5bbc
--- /dev/null
+++ b/src/core/reference/include/ngraph/runtime/reference/interpolate_pil.hpp
@@ -0,0 +1,320 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+// The implementation for BILINEAR_PILLOW and BICUBIC_PILLOW is based on the
+// Pillow library code from:
+// https://github.com/python-pillow/Pillow/blob/9.4.0/src/libImaging/Resample.c
+
+// The Python Imaging Library (PIL) is
+
+//     Copyright © 1997-2011 by Secret Labs AB
+//     Copyright © 1995-2011 by Fredrik Lundh
+
+// Pillow is the friendly PIL fork. It is
+
+//     Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
+
+// Like PIL, Pillow is licensed under the open source HPND License:
+
+// By obtaining, using, and/or copying this software and/or its associated
+// documentation, you agree that you have read, understood, and will comply
+// with the following terms and conditions:
+
+// Permission to use, copy, modify and distribute this software and its
+// documentation for any purpose and without fee is hereby granted,
+// provided that the above copyright notice appears in all copies, and that
+// both that copyright notice and this permission notice appear in supporting
+// documentation, and that the name of Secret Labs AB or the author not be
+// used in advertising or publicity pertaining to distribution of the software
+// without specific, written prior permission.
+
+// SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+// SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+// IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
+// INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+
+#include "ngraph/op/interpolate.hpp"
+#include "ngraph/shape_util.hpp"
+
+namespace ngraph {
+namespace runtime {
+namespace reference {
+namespace interpolate_pil {
+
+struct filter {
+    double (*filter)(double x, double coeff_a);
+    double support;
+    double coeff_a;
+};
+
+template <typename T_out, typename T_in>
+T_out round_up(T_in x) {
+    return (T_out)(x >= 0.0 ? x + 0.5F : x - 0.5F);
+}
+
+template <typename T_out, typename T_in>
+T_out clip(const T_in& x,
+           const T_out& min = std::numeric_limits<T_out>::min(),
+           const T_out& max = std::numeric_limits<T_out>::max()) {
+    return T_out(std::max(T_in(min), std::min(x, T_in(max))));
+}
+
+static inline double bilinear_filter(double x, double) {
+    if (x < 0.0) {
+        x = -x;
+    }
+    if (x < 1.0) {
+        return 1.0 - x;
+    }
+    return 0.0;
+}
+
+static inline double bicubic_filter(double x, double a) {
+    if (x < 0.0) {
+        x = -x;
+    }
+    if (x < 1.0) {
+        return ((a + 2.0) * x - (a + 3.0)) * x * x + 1;
+    }
+    if (x < 2.0) {
+        return (((x - 5) * x + 8) * x - 4) * a;
+    }
+    return 0.0;
+}
+
+static inline int precompute_coeffs(int in_size,
+                                    float in0,
+                                    float in1,
+                                    int out_size,
+                                    const filter& filterp,
+                                    std::vector<int>& bounds,
+                                    std::vector<double>& kk) {
+    double support, scale, filterscale;
+    double center, ww, ss;
+    int xx, x, ksize, xmin, xmax;
+    /* prepare for horizontal stretch */
+    filterscale = scale = (double)(in1 - in0) / out_size;
+    if (filterscale < 1.0) {
+        filterscale = 1.0;
+    }
+
+    /* determine support size (length of resampling filter) */
+    support = filterp.support * filterscale;
+
+    /* maximum number of coeffs */
+    ksize = (int)ceil(support) * 2 + 1;
+
+    /* coefficient buffer */
+    kk.resize(out_size * ksize);
+    bounds.resize(out_size * 2);
+
+    for (xx = 0; xx < out_size; xx++) {
+        center = in0 + (xx + 0.5) * scale;
+        ww = 0.0;
+        ss = 1.0 / filterscale;
+        // Round the value
+        xmin = (int)(center - support + 0.5);
+        if (xmin < 0) {
+            xmin = 0;
+        }
+        // Round the value
+        xmax = (int)(center + support + 0.5);
+        if (xmax > in_size) {
+            xmax = in_size;
+        }
+        xmax -= xmin;
+        double* k = &kk[xx * ksize];
+        for (x = 0; x < xmax; x++) {
+            double w = filterp.filter((x + xmin - center + 0.5) * ss, filterp.coeff_a);
+            k[x] = w;
+            ww += w;
+        }
+        for (x = 0; x < xmax; x++) {
+            if (ww != 0.0) {
+                k[x] /= ww;
+            }
+        }
+        // Remaining values should stay empty if they are used despite of xmax.
+        for (; x < ksize; x++) {
+            k[x] = 0;
+        }
+        bounds[xx * 2 + 0] = xmin;
+        bounds[xx * 2 + 1] = xmax;
+    }
+    return ksize;
+}
+
+template <typename T>
+void imaging_resample_horizontal(T* im_out,
+                                 Shape im_out_shape,
+                                 const T* im_in,
+                                 Shape im_in_shape,
+                                 int offset,
+                                 int ksize,
+                                 std::vector<int>& bounds,
+                                 std::vector<double>& kk) {
+    double ss;
+    int x, xmin, xmax;
+    double* k;
+
+    for (size_t yy = 0; yy < im_out_shape[0]; yy++) {
+        for (size_t xx = 0; xx < im_out_shape[1]; xx++) {
+            xmin = bounds[xx * 2 + 0];
+            xmax = bounds[xx * 2 + 1];
+            k = &kk[xx * ksize];
+            ss = 0.0;
+            for (x = 0; x < xmax; x++) {
+                size_t in_idx = (yy + offset) * im_in_shape[1] + (x + xmin);
+                ss += im_in[in_idx] * k[x];
+            }
+            size_t out_idx = yy * im_out_shape[1] + xx;
+            if (std::is_integral<T>()) {
+                im_out[out_idx] = T(clip<T, int64_t>(round_up<int64_t, double>(ss)));
+            } else {
+                im_out[out_idx] = T(ss);
+            }
+        }
+    }
+}
+
+template <typename T>
+void imaging_resample_vertical(T* im_out,
+                               Shape im_out_shape,
+                               const T* im_in,
+                               Shape im_in_shape,
+                               int offset,
+                               int ksize,
+                               std::vector<int>& bounds,
+                               std::vector<double>& kk) {
+    double ss;
+    int y, ymin, ymax;
+    double* k;
+
+    for (size_t yy = 0; yy < im_out_shape[0]; yy++) {
+        ymin = bounds[yy * 2 + 0];
+        ymax = bounds[yy * 2 + 1];
+        k = &kk[yy * ksize];
+        for (size_t xx = 0; xx < im_out_shape[1]; xx++) {
+            ss = 0.0;
+            for (y = 0; y < ymax; y++) {
+                size_t in_idx = (y + ymin) * im_in_shape[1] + xx;
+                ss += im_in[in_idx] * k[y];
+            }
+            size_t out_idx = yy * im_out_shape[1] + xx;
+            if (std::is_integral<T>()) {
+                im_out[out_idx] = T(clip<T, int64_t>(round_up<int64_t, double>(ss)));
+            } else {
+                im_out[out_idx] = T(ss);
+            }
+        }
+    }
+}
+
+template <typename T>
+void imaging_resample_inner(const T* im_in,
+                            size_t im_in_xsize,
+                            size_t im_in_ysize,
+                            size_t xsize,
+                            size_t ysize,
+                            const filter& filterp,
+                            float* box,
+                            T* im_out) {
+    int ybox_first, ybox_last;
+    int ksize_horiz, ksize_vert;
+
+    std::vector<int> bounds_horiz;
+    std::vector<int> bounds_vert;
+    std::vector<double> kk_horiz;
+    std::vector<double> kk_vert;
+
+    auto need_horizontal = xsize != im_in_xsize || bool(box[0]) || box[2] != xsize;
+    auto need_vertical = ysize != im_in_ysize || bool(box[1]) || box[3] != ysize;
+
+    ksize_horiz = precompute_coeffs(static_cast<int>(im_in_xsize),
+                                    box[0],
+                                    box[2],
+                                    static_cast<int>(xsize),
+                                    filterp,
+                                    bounds_horiz,
+                                    kk_horiz);
+    ksize_vert = precompute_coeffs(static_cast<int>(im_in_ysize),
+                                   box[1],
+                                   box[3],
+                                   static_cast<int>(ysize),
+                                   filterp,
+                                   bounds_vert,
+                                   kk_vert);
+
+    // First used row in the source image
+    ybox_first = bounds_vert[0];
+    // Last used row in the source image
+    ybox_last = bounds_vert[ysize * 2 - 2] + bounds_vert[ysize * 2 - 1];
+
+    size_t im_temp_ysize = (ybox_last - ybox_first);
+    auto im_temp_elem_count = im_temp_ysize * xsize;
+    auto im_temp = std::vector<T>(im_temp_elem_count, 0);
+
+    /* two-pass resize, horizontal pass */
+    if (need_horizontal) {
+        // Shift bounds for vertical pass
+        for (size_t i = 0; i < ysize; i++) {
+            bounds_vert[i * 2] -= ybox_first;
+        }
+
+        if (im_temp.size() > 0) {
+            imaging_resample_horizontal(im_temp.data(),
+                                        Shape{im_temp_ysize, xsize},
+                                        im_in,
+                                        Shape{im_in_ysize, im_in_xsize},
+                                        ybox_first,
+                                        ksize_horiz,
+                                        bounds_horiz,
+                                        kk_horiz);
+        }
+    }
+
+    /* vertical pass */
+    if (need_vertical) {
+        /* im_in can be the original image or horizontally resampled one */
+        if (need_horizontal) {
+            imaging_resample_vertical(im_out,
+                                      Shape{ysize, xsize},
+                                      im_temp.data(),
+                                      Shape{im_temp_ysize, xsize},
+                                      0,
+                                      ksize_vert,
+                                      bounds_vert,
+                                      kk_vert);
+        } else {
+            imaging_resample_vertical(im_out,
+                                      Shape{ysize, xsize},
+                                      im_in,
+                                      Shape{im_in_ysize, im_in_xsize},
+                                      0,
+                                      ksize_vert,
+                                      bounds_vert,
+                                      kk_vert);
+        }
+    }
+    /* none of the previous steps are performed, copying */
+    if (!need_horizontal && !need_vertical) {
+        std::copy(im_in, im_in + (im_in_xsize * im_in_ysize), im_out);
+    } else if (need_horizontal && !need_vertical) {
+        std::copy(im_temp.begin(), im_temp.end(), im_out);
+    }
+
+    return;
+}
+
+}  // namespace interpolate_pil
+}  // namespace reference
+}  // namespace runtime
+}  // namespace ngraph
diff --git a/src/plugins/template/backend/CMakeLists.txt b/src/plugins/template/backend/CMakeLists.txt
index 16645f0df8f164..57e0c537fb2d15 100644
--- a/src/plugins/template/backend/CMakeLists.txt
+++ b/src/plugins/template/backend/CMakeLists.txt
@@ -32,7 +32,7 @@ target_compile_definitions(interpreter_backend
         SHARED_LIB_PREFIX="${CMAKE_SHARED_LIBRARY_PREFIX}"
         SHARED_LIB_SUFFIX="${IE_BUILD_POSTFIX}${CMAKE_SHARED_LIBRARY_SUFFIX}"
 )
-target_link_libraries(interpreter_backend PRIVATE ngraph::builder ngraph::reference openvino::util openvino::runtime::dev)
+target_link_libraries(interpreter_backend PRIVATE ngraph::builder ngraph::reference openvino::util openvino::runtime::dev ov_shape_inference)
 
 target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
 
diff --git a/src/plugins/template/backend/evaluates_map.cpp b/src/plugins/template/backend/evaluates_map.cpp
index 7b4f95af62522f..145b86cb89ff65 100644
--- a/src/plugins/template/backend/evaluates_map.cpp
+++ b/src/plugins/template/backend/evaluates_map.cpp
@@ -91,6 +91,7 @@
 #include <ngraph/runtime/reference/utils/nms_common.hpp>
 
 #include "backend.hpp"
+#include "interpolate_shape_inference.hpp"
 #include "ngraph/ops.hpp"
 #include "ngraph/runtime/reference/convert_color_nv12.hpp"
 #include "ov_ops/augru_cell.hpp"
@@ -4147,6 +4148,195 @@ bool evaluate(const shared_ptr<op::v0::Interpolate>& op,
     return true;
 }
 
+namespace eval {
+namespace interpolate {
+// The helpers below are similar to the internal utils used in evaluate method of v4::Intepolate core op
+// Those functions can be unified and moved to a common place
+std::vector<int64_t> get_axes_vector(const ngraph::HostTensorVector& args,
+                                     size_t default_size,
+                                     size_t axes_port,
+                                     size_t max_num_of_ports) {
+    size_t num_of_inputs = args.size();
+    std::vector<int64_t> axes;
+
+    if (num_of_inputs == max_num_of_ports) {
+        auto axes_arg = args[axes_port];
+        size_t num_of_axes = args[axes_port]->get_shape()[0];
+        axes.reserve(num_of_axes);
+
+        if (axes_arg->get_element_type() == ov::element::i64) {
+            int64_t* axes_ptr = axes_arg->get_data_ptr<int64_t>();
+            axes.insert(axes.end(), axes_ptr, axes_ptr + num_of_axes);
+        } else if (axes_arg->get_element_type() == ov::element::i32) {
+            int32_t* axes_ptr = axes_arg->get_data_ptr<int32_t>();
+            for (size_t i = 0; i < num_of_axes; ++i)
+                axes.push_back(axes_ptr[i]);
+        } else {
+            OPENVINO_ASSERT(false, "Failed to process ", axes_arg->get_element_type());
+        }
+    } else {
+        for (size_t i = 0; i < default_size; ++i) {
+            axes.push_back(i);
+        }
+    }
+
+    return axes;
+}
+
+std::vector<int64_t> get_target_shape_vector(const ngraph::HostTensorVector& args,
+                                             size_t num_of_axes,
+                                             size_t target_shape_port = 1) {
+    std::vector<int64_t> target_shape;
+    target_shape.reserve(num_of_axes);
+
+    auto target_shape_arg = args[target_shape_port];
+    if (target_shape_arg->get_element_type() == ov::element::i64) {
+        int64_t* target_shape_ptr = target_shape_arg->get_data_ptr<int64_t>();
+        target_shape.insert(target_shape.end(), target_shape_ptr, target_shape_ptr + num_of_axes);
+    } else if (target_shape_arg->get_element_type() == ov::element::i32) {
+        int32_t* target_shape_ptr = target_shape_arg->get_data_ptr<int32_t>();
+        for (size_t i = 0; i < num_of_axes; ++i)
+            target_shape.push_back(target_shape_ptr[i]);
+    } else {
+        OPENVINO_ASSERT(false, "Failed to process ", target_shape_arg->get_element_type());
+    }
+
+    return target_shape;
+}
+
+std::vector<float> get_scales_vector(const ngraph::HostTensorVector& args,
+                                     const ov::Shape& input_shape,
+                                     const ov::op::util::InterpolateBase::InterpolateAttrs& attrs,
+                                     std::vector<int64_t> axes,
+                                     size_t scales_port) {
+    std::vector<float> scales;
+    size_t num_of_axes = axes.size();
+    if (attrs.shape_calculation_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) {
+        float* scales_ptr = args[scales_port]->get_data_ptr<float>();
+        scales.insert(scales.end(), scales_ptr, scales_ptr + num_of_axes);
+    } else {
+        auto target_shape = get_target_shape_vector(args, num_of_axes);
+        for (size_t i = 0; i < num_of_axes; ++i) {
+            size_t axis = axes[i];
+            float scale = static_cast<float>(target_shape[i]) / static_cast<float>(input_shape[axis]);
+            scales.push_back(scale);
+        }
+    }
+    return scales;
+}
+
+static void pad_input_data(const uint8_t* data_ptr,
+                           uint8_t* padded_data_ptr,
+                           size_t type_size,
+                           const ov::Shape& input_shape,
+                           const ov::Shape& padded_input_shape,
+                           const std::vector<size_t>& pads_begin) {
+    NGRAPH_SUPPRESS_DEPRECATED_START
+    ngraph::CoordinateTransform input_transform(input_shape);
+    ngraph::CoordinateTransform padded_transform(padded_input_shape);
+
+    for (const ngraph::Coordinate& input_coord : input_transform) {
+        auto padded_coord = input_coord;
+        size_t i = 0;
+        for (size_t pad : pads_begin) {
+            padded_coord[i] += pad;
+            ++i;
+        }
+        uint8_t* dst_ptr = padded_data_ptr + type_size * padded_transform.index(padded_coord);
+        const uint8_t* src_ptr = data_ptr + type_size * input_transform.index(input_coord);
+        memcpy(dst_ptr, src_ptr, type_size);
+    }
+    NGRAPH_SUPPRESS_DEPRECATED_END
+}
+
+namespace v11 {
+bool evaluate_interpolate(const shared_ptr<op::v11::Interpolate>& op,
+                          const HostTensorVector& outputs,
+                          const HostTensorVector& inputs) {
+    using namespace ov::op;
+
+    constexpr size_t data_port = 0;
+    constexpr size_t scales_sizes_port = 1;
+    constexpr size_t axes_port = 2;
+    constexpr size_t max_num_of_ports = 3;
+
+    element::Type input_et = inputs[0]->get_element_type();
+    size_t type_size = input_et.size();
+
+    ov::PartialShape input_shape{inputs[data_port]->get_shape()};
+    auto m_attrs = op->get_attrs();
+    util::correct_pads_attr(op.get(), m_attrs.pads_begin, m_attrs.pads_end, std::vector<PartialShape>{input_shape});
+
+    ov::Shape padded_input_shape;
+    for (size_t i = 0; i < input_shape.size(); ++i) {
+        padded_input_shape.emplace_back(m_attrs.pads_begin[i] + m_attrs.pads_end[i] + input_shape[i].get_length());
+    }
+
+    auto axes = get_axes_vector(inputs, inputs[1]->get_shape().size(), axes_port, max_num_of_ports);
+    auto scales = get_scales_vector(inputs, padded_input_shape, m_attrs, axes, scales_sizes_port);
+
+    ov::PartialShape output_shape{padded_input_shape};
+    if (m_attrs.shape_calculation_mode == util::InterpolateBase::ShapeCalcMode::SCALES) {
+        util::infer_using_scales(output_shape, axes, scales);
+    } else {
+        auto sizes = get_target_shape_vector(inputs, axes.size(), scales_sizes_port);
+        for (size_t i = 0; i < sizes.size(); ++i) {
+            output_shape[axes[i]] = Dimension(sizes[i]);
+        }
+    }
+
+    ov::Shape out_shape = output_shape.to_shape();
+    outputs[0]->set_shape(out_shape);
+    outputs[0]->set_element_type(input_et);
+
+    size_t bytes_in_padded_input = shape_size(padded_input_shape) * type_size;
+    std::vector<uint8_t> padded_input_data(bytes_in_padded_input, 0);
+
+    const uint8_t* data_ptr = inputs[0]->get_data_ptr<uint8_t>();
+    uint8_t* padded_data_ptr = padded_input_data.data();
+
+    pad_input_data(data_ptr,
+                   padded_data_ptr,
+                   type_size,
+                   input_shape.to_shape(),
+                   padded_input_shape,
+                   m_attrs.pads_begin);
+
+    switch (input_et) {
+    case element::Type_t::f32:
+        ngraph::runtime::reference::interpolate<float>(reinterpret_cast<float*>(padded_data_ptr),
+                                                       padded_input_shape,
+                                                       scales,
+                                                       axes,
+                                                       outputs[0]->get_data_ptr<float>(),
+                                                       out_shape,
+                                                       m_attrs);
+        break;
+    case element::Type_t::u8:
+        ngraph::runtime::reference::interpolate<uint8_t>(reinterpret_cast<uint8_t*>(padded_data_ptr),
+                                                         padded_input_shape,
+                                                         scales,
+                                                         axes,
+                                                         outputs[0]->get_data_ptr<uint8_t>(),
+                                                         out_shape,
+                                                         m_attrs);
+        break;
+    default:;
+    }
+
+    return true;
+}
+}  // namespace v11
+}  // namespace interpolate
+}  // namespace eval
+
+template <element::Type_t ET>
+bool evaluate(const shared_ptr<op::v11::Interpolate>& op,
+              const HostTensorVector& outputs,
+              const HostTensorVector& inputs) {
+    return eval::interpolate::v11::evaluate_interpolate(op, outputs, inputs);
+}
+
 template <element::Type_t ET>
 bool evaluate(const shared_ptr<op::v9::SoftSign>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
     element::Type input_et = op->get_input_element_type(0);
diff --git a/src/plugins/template/backend/opset_int_tbl.hpp b/src/plugins/template/backend/opset_int_tbl.hpp
index 75d2e4bfd8b2b8..f5f00052895edd 100644
--- a/src/plugins/template/backend/opset_int_tbl.hpp
+++ b/src/plugins/template/backend/opset_int_tbl.hpp
@@ -146,5 +146,7 @@ _OPENVINO_OP_REG(IsInf, op::v10)
 _OPENVINO_OP_REG(IsNaN, op::v10)
 _OPENVINO_OP_REG(Unique, op::v10)
 
+_OPENVINO_OP_REG(Interpolate, op::v11)
+
 _OPENVINO_OP_REG(AUGRUCell, ov::op::internal)
 _OPENVINO_OP_REG(AUGRUSequence, ov::op::internal)
diff --git a/src/plugins/template/tests/functional/op_reference/interpolate.cpp b/src/plugins/template/tests/functional/op_reference/interpolate.cpp
index 91656df0907acd..646ebb3bb8a86a 100644
--- a/src/plugins/template/tests/functional/op_reference/interpolate.cpp
+++ b/src/plugins/template/tests/functional/op_reference/interpolate.cpp
@@ -777,7 +777,8 @@ class ReferenceInterpolate_v4 : public testing::TestWithParam<InterpolateV4TestP
         auto scales = op::v0::Constant::create<float>(element::f32, Shape{scales_data.size()}, scales_data);
         const auto& axes_data = param.axes_data;
         auto axes = op::v0::Constant::create<int64_t>(element::i64, Shape{axes_data.size()}, axes_data);
-        auto interpolate = std::make_shared<op::v4::Interpolate>(image, target_spatial_shape, scales, axes, param.attrs);
+        auto interpolate =
+            std::make_shared<op::v4::Interpolate>(image, target_spatial_shape, scales, axes, param.attrs);
         return std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{image});
     }
 };
@@ -792,4 +793,538 @@ INSTANTIATE_TEST_SUITE_P(smoke,
                          ReferenceInterpolate_v4::getTestCaseName);
 
 }  // namespace attribute_tests
+
+namespace interpolate_v11_tests {
+using InterpolateAttrs = op::v11::Interpolate::InterpolateAttrs;
+using InterpolateMode = op::v11::Interpolate::InterpolateMode;
+using ShapeCalcMode = op::v11::Interpolate::ShapeCalcMode;
+using CoordinateTransformMode = op::v11::Interpolate::CoordinateTransformMode;
+using TransformMode = op::v11::Interpolate::CoordinateTransformMode;
+using NearestMode = op::v11::Interpolate::NearestMode;
+
+class InterpolateV11TestParams {
+public:
+    template <class Data_t = float>
+    InterpolateV11TestParams(std::string test_name,
+                             Shape input_data_shape,
+                             std::vector<int64_t> spatial_shape_data,
+                             Shape output_shape,
+                             std::vector<float> scales_data,
+                             std::vector<int64_t> axes_data,
+                             InterpolateAttrs interp_attrs,
+                             std::vector<Data_t> input_data,
+                             std::vector<Data_t> expected_results,
+                             double cube_coeff_a = -0.75,
+                             element::Type inType = element::from<Data_t>())
+        : test_name(test_name),
+          input_data_shape(input_data_shape),
+          spatial_shape_data(spatial_shape_data),
+          output_shape(output_shape),
+          scales_data(scales_data),
+          axes_data(axes_data),
+          attrs(interp_attrs),
+          m_input_data(CreateTensor(inType, input_data)),
+          m_expected_result(CreateTensor(inType, expected_results)),
+          inType(inType) {
+        attrs.cube_coeff = cube_coeff_a;
+    };
+
+    template <class Data_t = float>
+    InterpolateV11TestParams(const attribute_tests::InterpolateV4TestParams& v4_params)
+        : test_name(v4_params.test_name),
+          input_data_shape(v4_params.input_data_shape),
+          spatial_shape_data(v4_params.spatial_shape_data),
+          output_shape(v4_params.output_shape),
+          scales_data(v4_params.scales_data),
+          axes_data(v4_params.axes_data),
+          attrs(v4_params.attrs),
+          m_input_data(CreateTensor(element::from<Data_t>(), v4_params.input_data)),
+          m_expected_result(CreateTensor(element::from<Data_t>(), v4_params.expected_results)),
+          inType(element::from<Data_t>()){};
+
+    std::string test_name;
+    Shape input_data_shape;
+    std::vector<int64_t> spatial_shape_data;
+    Shape output_shape;
+    std::vector<float> scales_data;
+    std::vector<int64_t> axes_data;
+    InterpolateAttrs attrs;
+    ov::Tensor m_input_data;
+    ov::Tensor m_expected_result;
+    element::Type inType;
+};
+
+template <typename Data_t = uint8_t>
+std::vector<InterpolateV11TestParams> generateParamsForInterpolate_bilinear_pil_int() {
+    const std::vector<size_t> zero_pads{0, 0, 0, 0};
+    return {
+        {
+            "bilinear.downsample_sizes_linear_range_h_pixel_hw_2D",
+            Shape{8, 8},
+            {4, 4},
+            Shape{4, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{7, 9, 11, 12, 21, 23, 25, 26, 37, 39, 41, 42, 51, 53, 55, 56},
+        },
+        {
+            "bilinear.downsample_scales_linear_range_h_pixel_hw_2D_scales",
+            Shape{8, 8},
+            {},
+            Shape{4, 4},
+            {0.5f, 0.5f},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{7, 9, 11, 12, 21, 23, 25, 26, 37, 39, 41, 42, 51, 53, 55, 56},
+        },
+        {
+            "bilinear.downsample_scales_linear_rand_h_pixel_nhwc",
+            Shape{1, 4, 4, 3},
+            {},
+            Shape{1, 2, 2, 3},
+            {0.5f, 0.5f},
+            {1, 2},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, zero_pads, zero_pads},
+            std::vector<Data_t>{172, 10,  127, 140, 47,  170, 196, 151, 117, 166, 22,  183, 192, 204, 33,  216,
+                                67,  179, 78,  154, 251, 82,  162, 219, 195, 118, 125, 139, 103, 125, 229, 216,
+                                9,   164, 116, 108, 211, 222, 161, 159, 21,  81,  89,  165, 242, 214, 102, 98},
+            std::vector<Data_t>{174, 97, 132, 144, 119, 173, 175, 129, 124, 160, 138, 129},
+        },
+        {
+            "bilinear.downsample_scales_linear_range_h_pixel_nhwc",
+            Shape{1, 4, 4, 3},
+            {},
+            Shape{1, 2, 2, 3},
+            {0.5f, 0.5f},
+            {1, 2},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, zero_pads, zero_pads},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47},
+            std::vector<Data_t>{11, 12, 13, 16, 17, 18, 29, 30, 31, 34, 35, 36},
+        },
+        {
+            "bilinear.downsample_scales_linear_rand_h_pixel_nhwc_batch_2",
+            Shape{2, 4, 4, 3},
+            {},
+            Shape{2, 2, 2, 3},
+            {0.5f, 0.5f},
+            {1, 2},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, zero_pads, zero_pads},
+            std::vector<Data_t>{172, 10,  127, 140, 47,  170, 196, 151, 117, 166, 22,  183, 192, 204, 33,  216,
+                                67,  179, 78,  154, 251, 82,  162, 219, 195, 118, 125, 139, 103, 125, 229, 216,
+                                9,   164, 116, 108, 211, 222, 161, 159, 21,  81,  89,  165, 242, 214, 102, 98,
+                                0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,
+                                16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
+                                32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47},
+            std::vector<Data_t>{174, 97, 132, 144, 119, 173, 175, 129, 124, 160, 138, 129,
+                                11,  12, 13,  16,  17,  18,  29,  30,  31,  34,  35,  36},
+        },
+        {
+            "bilinear.downsample_sizes_nhwc_1x5x6x3_to_1x2x4x3",
+            Shape{1, 5, 6, 3},
+            {2, 4},
+            Shape{1, 2, 4, 3},
+            {},
+            {1, 2},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, zero_pads, zero_pads},
+            std::vector<Data_t>{37,  244, 193, 106, 235, 128, 71,  255, 140, 47,  103, 184, 72,  20,  188,
+                                238, 255, 126, 7,   0,   137, 195, 204, 32,  203, 170, 101, 77,  133, 30,
+                                193, 255, 79,  203, 145, 37,  192, 83,  112, 60,  144, 128, 163, 23,  129,
+                                80,  134, 101, 204, 191, 174, 47,  71,  30,  78,  99,  237, 170, 118, 88,
+                                252, 121, 116, 171, 134, 141, 146, 101, 25,  125, 127, 239, 178, 228, 239,
+                                137, 20,  213, 167, 216, 254, 84,  80,  107, 101, 177, 50,  80,  146, 139},
+            std::vector<Data_t>{89 /* 90 */, 137, 129, 138, 169, 107, 109, 140, 113, 168, 161, 95,
+                                134,         119, 178, 171, 118, 148, 138, 130, 106, 116, 133, 120},
+        },
+        {
+            "bilinear.upsample_sizes_nhwc_1x2x4x3_to_1x5x6x3",
+            Shape{1, 2, 4, 3},
+            {5, 6},
+            Shape{1, 5, 6, 3},
+            {},
+            {1, 2},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, zero_pads, zero_pads},
+            std::vector<Data_t>{37, 244, 193, 106, 235, 128, 71, 255, 140, 47,  103, 184,
+                                72, 20,  188, 238, 255, 126, 7,  0,   137, 195, 204, 32},
+            std::vector<Data_t>{37,  244, 193,         72,  240, 161, 100, 238, 130, 77,  252, 138, 59,  179, 162, 47,
+                                103, 184, 41 /* 40 */, 222, 193, 80,  230, 161, 110, 235,  // Rounding?
+                                130, 74,  231,         138, 63,  171, 154, 62,  113, 169, 55,  132, 191, 114, 189, 159,
+                                150, 225, 129,         62,  148, 137, 80,  141, 124, 121, 154, 108, 69,  42,  188, 147,
+                                148, 157, 189,         215, 128, 49,  64,  135, 97,  110, 93,  180, 194, 47,  72,  20,
+                                188, 155, 138,         157, 199, 212, 128, 46,  43,  135, 101, 102, 85,  195, 204, 32},
+        },
+        {
+            "bilinear.downsample_sizes_nchw_1x3x5x6_to_1x3x2x4",
+            Shape{1, 3, 5, 6},
+            {2, 4},
+            Shape{1, 3, 2, 4},
+            {},
+            {2, 3},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, zero_pads, zero_pads},
+            std::vector<Data_t>{37,  106, 71,  47,  72,  238, 7,   195, 203, 77,  193, 203, 192, 60,  163,
+                                80,  204, 47,  78,  170, 252, 171, 146, 125, 178, 137, 167, 84,  101, 80,
+                                244, 235, 255, 103, 20,  255, 0,   204, 170, 133, 255, 145, 83,  144, 23,
+                                134, 191, 71,  99,  118, 121, 134, 101, 127, 228, 20,  216, 80,  177, 146,
+                                193, 128, 140, 184, 188, 126, 137, 32,  101, 30,  79,  37,  112, 128, 129,
+                                101, 174, 30,  237, 88,  116, 141, 25,  239, 239, 213, 254, 107, 50,  139},
+            std::vector<Data_t>{89 /* 90 */, 138, 109, 168, 134, 171, 138, 116, 137, 169, 140, 161,
+                                119,         118, 130, 133, 129, 107, 113, 95,  178, 148, 106, 120},
+        },
+        {
+            "bilinear.downsample_scales_range_h_pixel_nchw",
+            Shape{1, 3, 4, 4},
+            {},
+            Shape{1, 3, 2, 2},
+            {0.5f, 0.5f},
+            {2, 3},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, zero_pads, zero_pads},
+            std::vector<Data_t>{0, 3, 6, 9,  12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45,
+                                1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46,
+                                2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47},
+            std::vector<Data_t>{11, 16, 29, 34, 12, 17, 30, 35, 13, 18, 31, 36},
+        }};
+}
+
+template <typename Data_t = uint8_t>
+std::vector<InterpolateV11TestParams> generateParamsForInterpolate_bicubic_pil_int() {
+    return {
+        {
+            "bicubic.downsample_scales_2D",
+            Shape{8, 8},
+            {},
+            Shape{4, 4},
+            {0.5f, 0.5f},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{5, 6, 9, 10, 21, 22, 25, 26, 37, 38, 41, 42, 53, 54, 57, 58},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_sizes_2D",
+            Shape{8, 8},
+            {4, 4},
+            Shape{4, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{5, 6, 9, 10, 21, 22, 25, 26, 37, 38, 41, 42, 53, 54, 57, 58},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_sizes_2D",
+            Shape{5, 6},
+            {2, 4},
+            Shape{2, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{168, 92, 157, 111, 15,  138, 97,  47,  237, 25,  163, 6,   72,  118, 121,
+                                238, 22, 174, 182, 140, 43,  121, 158, 242, 210, 73,  113, 111, 75,  132},
+            std::vector<Data_t>{99, 143, 105, 88, 146, 98, 123, 154},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_sizes_2D_ov_default_cube_coeff",
+            Shape{5, 6},
+            {2, 4},
+            Shape{2, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{168, 92, 157, 111, 15,  138, 97,  47,  237, 25,  163, 6,   72,  118, 121,
+                                238, 22, 174, 182, 140, 43,  121, 158, 242, 210, 73,  113, 111, 75,  132},
+            std::vector<Data_t>{97, 144, 106, 88, 145, 98, 121, 153},
+            // default cube_coeff -0.75
+        },
+        {
+            "bicubic.downsample_sizes_1x1x8x8_nchw",
+            Shape{1, 1, 8, 8},
+            {4, 4},
+            Shape{4, 4},
+            {},
+            {2, 3},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{5, 6, 9, 10, 21, 22, 25, 26, 37, 38, 41, 42, 53, 54, 57, 58},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_sizes_1x8x8x1_nhwc",
+            Shape{1, 8, 8, 1},
+            {4, 4},
+            Shape{4, 4},
+            {},
+            {1, 2},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+            std::vector<Data_t>{5, 6, 9, 10, 21, 22, 25, 26, 37, 38, 41, 42, 53, 54, 57, 58},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.upsample_sizes_1x2x4x3_to_1x5x6x3_nhwc",
+            Shape{1, 2, 4, 3},
+            {5, 6},
+            Shape{1, 5, 6, 3},
+            {},
+            {1, 2},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{168, 92,  157, 111, 15, 138, 97,  47,  237, 25,  163, 6,
+                                72,  118, 121, 238, 22, 174, 182, 140, 43,  121, 158, 242},
+            std::vector<Data_t>{183, 94,  162, 141, 53, 141, 94,  11, 150, 93,  27,  255, 49,  105, 119, 10,  172, 0,
+                                165, 99,  155, 143, 55, 143, 116, 14, 152, 108, 42,  226, 64,  113, 122, 26,  170, 17,
+                                117, 111, 138, 148, 60, 148, 175, 22, 155, 148, 80,  143, 102, 133, 131, 69,  165, 128,
+                                68,  122, 121, 152, 65, 153, 233, 29, 158, 188, 118, 60,  140, 153, 140, 111, 160, 238,
+                                50,  127, 114, 154, 67, 155, 255, 32, 160, 203, 133, 29,  155, 161, 143, 127, 158, 255},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_sizes_1x5x6x3_to_1x2x4x3_nhwc",
+            Shape{1, 5, 6, 3},
+            {2, 4},
+            Shape{1, 2, 4, 3},
+            {},
+            {1, 2},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{168, 92,  157, 111, 15,  138, 97,  47,  237, 25,  163, 6,   72,  118, 121,
+                                238, 22,  174, 182, 140, 43,  121, 158, 242, 210, 73,  113, 111, 75,  132,
+                                24,  124, 104, 57,  157, 107, 7,   173, 14,  82,  162, 210, 144, 84,  177,
+                                129, 136, 39,  95,  218, 99,  52,  75,  170, 232, 178, 213, 138, 136, 158,
+                                47,  20,  181, 30,  63,  43,  182, 76,  31,  125, 52,  124, 218, 202, 78,
+                                68,  148, 25,  251, 161, 124, 160, 2,   159, 116, 78,  119, 209, 37,  219},
+            std::vector<Data_t>{126, 125, 124, 133, 79,  181, 77,  127, 79, 95,  111, 131,
+                                147, 178, 119, 124, 102, 144, 117, 75,  84, 135, 78,  134},
+            -0.5,  // cube_coeff
+        }};
+}
+
+template <typename Data_t = float>
+std::vector<InterpolateV11TestParams> generateParamsForInterpolate_bilinear_pil_float() {
+    return {
+        {
+            "bilinear.downsample_2D_sizes",
+            Shape{5, 6},
+            {2, 4},
+            Shape{2, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{159.58046, 141.59782, 138.78581, 111.842384, 96.50358, 129.36433, 159.38596, 128.2533},
+        },
+        {
+            "bilinear.downsample_to_2x4_2D_scales",
+            Shape{5, 6},
+            {},
+            Shape{2, 4},
+            {0.4f, 0.7f},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{159.58046, 141.59782, 138.78581, 111.842384, 96.50358, 129.36433, 159.38596, 128.2533},
+        },
+        {
+            "bilinear.downsample_to_2x3_2D_scales",
+            Shape{5, 6},
+            {},
+            Shape{2, 4},
+            {0.4f, 0.6666f},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{158.00597, 137.05489, 121.252205, 102.18909, 147.77483, 137.24052},
+        },
+        {
+            "bilinear.upsample_2D_sizes",
+            Shape{2, 4},
+            {5, 6},
+            Shape{5, 6},
+            {},
+            {0, 1},
+            {InterpolateMode::BILINEAR_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{214.42, 66.97, 27.98, 76.41, 105.94, 208.44, 115.53, 23.53},
+            std::vector<Data_t>{214.42,  140.695, 60.47167, 34.478333, 52.195,    76.41,      203.57199, 142.34451,
+                                73.72,   44.132,  53.9285,  71.122,    160.18,    148.9425,   126.71333, 82.746666,
+                                60.8625, 49.97,   116.788,  155.5405,  179.70667, 121.361336, 67.7965,   28.818,
+                                105.94,  157.19,  192.955,  131.015,   69.53,     23.53},
+        }};
+}
+
+template <typename Data_t = float>
+std::vector<InterpolateV11TestParams> generateParamsForInterpolate_bicubic_pil_float() {
+    return {
+        {
+            "bicubic.downsample_2D_sizes",
+            Shape{5, 6},
+            {2, 4},
+            Shape{2, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{162.90814, 143.26627, 138.46507, 109.5325, 92.69513, 126.17204, 164.13477, 127.86513},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_to_2x4_2D_scales",
+            Shape{5, 6},
+            {},
+            Shape{2, 4},
+            {0.4f, 0.7f},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{162.90814, 143.26627, 138.46507, 109.5325, 92.69513, 126.17204, 164.13477, 127.86513},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.downsample_2D_sizes_cube_coeff_ov_default",
+            Shape{5, 6},
+            {2, 4},
+            Shape{2, 4},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<
+                Data_t>{162.548325, 144.773224, 138.243408, 110.827049, 92.899925, 125.124802, 164.711548, 129.240463},
+            // default cube_coeff -0.75
+        },
+        {
+            "bicubic.downsample_to_2x3_2D_scales",
+            Shape{5, 6},
+            {},
+            Shape{2, 3},
+            {0.4f, 0.6666f},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SCALES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{121.14, 131.03, 193.32, 243.32, 8.92,   36.9,   210.67, 242.85, 63.8,  79.83,
+                                222.47, 108.37, 69.93,  211.89, 65.79,  104.75, 164.82, 140.7,  21.95, 7.06,
+                                221.59, 192.9,  214.5,  137.76, 209.29, 84.41,  115.89, 201.84, 31.72, 77.62},
+            std::vector<Data_t>{162.16028, 136.76193, 118.96405, 95.98418, 151.06361, 137.54117},
+            -0.5,  // cube_coeff
+        },
+        {
+            "bicubic.upsample_2D_sizes",
+            Shape{2, 4},
+            {5, 6},
+            Shape{5, 6},
+            {},
+            {0, 1},
+            {InterpolateMode::BICUBIC_PILLOW, ShapeCalcMode::SIZES, {0, 0}, {0, 0}},
+            std::vector<Data_t>{214.42, 66.97, 27.98, 76.41, 105.94, 208.44, 115.53, 23.53},
+            std::vector<Data_t>{236.49521, 146.10538, 38.218796, 17.75709,  50.332058, 85.74947,  215.93185, 148.13255,
+                                63.085896, 35.050694, 51.983547, 75.524284, 161.65862, 153.48294, 128.71808, 80.69401,
+                                56.342354, 48.53678,  107.38538, 158.83333, 194.35027, 126.33732, 60.70116,  21.549273,
+                                86.82202,  160.8605,  219.21736, 143.63092, 62.35265,  11.32409},
+            -0.5,  // cube_coeff
+        }};
+}
+
+std::vector<InterpolateV11TestParams> generateCombinedParamsForInterpolate_v11() {
+    const std::vector<std::vector<InterpolateV11TestParams>> allTypeParamsV11{
+        generateParamsForInterpolate_bilinear_pil_float<float>(),
+        generateParamsForInterpolate_bicubic_pil_float<float>(),
+        generateParamsForInterpolate_bilinear_pil_int<uint8_t>(),
+        generateParamsForInterpolate_bicubic_pil_int<uint8_t>()};
+
+    const std::vector<std::vector<attribute_tests::InterpolateV4TestParams>> allTypeParamsV4{
+        attribute_tests::generateParamsForInterpolate_v4_cubic(),
+        attribute_tests::generateParamsForInterpolate_v4_nearest(),
+        attribute_tests::generateParamsForInterpolate_v4_linear_onnx(),
+        attribute_tests::generateParamsForInterpolate_v4_linear_onnx5d()};
+
+    std::vector<InterpolateV11TestParams> combinedParams;
+    for (auto& params : allTypeParamsV11) {
+        std::move(params.begin(), params.end(), std::back_inserter(combinedParams));
+    }
+    for (auto& params : allTypeParamsV4) {
+        for (auto& param : params) {
+            combinedParams.emplace_back(param);
+        }
+    }
+    return combinedParams;
+}
+
+class ReferenceInterpolate_v11 : public testing::TestWithParam<InterpolateV11TestParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        const auto& params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.m_input_data};
+        refOutData = {params.m_expected_result};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<InterpolateV11TestParams>& obj) {
+        const auto& param = obj.param;
+        std::ostringstream result;
+        result << "data_type=" << param.inType << "; ";
+        result << "data_shape=" << param.input_data_shape << "; ";
+        if (param.attrs.mode == InterpolateMode::BICUBIC_PILLOW || param.attrs.mode == InterpolateMode::CUBIC) {
+            result << "cubic_coeff=" << param.attrs.cube_coeff << "; ";
+        }
+        if (!param.test_name.empty()) {
+            result << "tested_case=" << param.test_name << "; ";
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Model> CreateFunction(const InterpolateV11TestParams& param) {
+        auto image = std::make_shared<op::v0::Parameter>(param.inType, param.input_data_shape);
+        ov::Output<ov::Node> sizes_or_scales;
+        if (param.attrs.shape_calculation_mode == ShapeCalcMode::SCALES) {
+            const auto& scales_data = param.scales_data;
+            sizes_or_scales = op::v0::Constant::create<float>(element::f32, Shape{scales_data.size()}, scales_data);
+        } else {
+            const auto& spatial_shape_data = param.spatial_shape_data;
+            sizes_or_scales =
+                op::v0::Constant::create<int64_t>(element::i64, Shape{spatial_shape_data.size()}, spatial_shape_data);
+        }
+        const auto& axes_data = param.axes_data;
+        auto axes = op::v0::Constant::create<int64_t>(element::i64, Shape{axes_data.size()}, axes_data);
+        auto interpolate = std::make_shared<op::v11::Interpolate>(image, sizes_or_scales, axes, param.attrs);
+        return std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{image});
+    }
+};
+
+TEST_P(ReferenceInterpolate_v11, LayerTest) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke,
+                         ReferenceInterpolate_v11,
+                         ::testing::ValuesIn(generateCombinedParamsForInterpolate_v11()),
+                         ReferenceInterpolate_v11::getTestCaseName);
+
+}  // namespace interpolate_v11_tests
 }  // namespace

From 966c47e7cd4077caf01aa7e5e4c3b5557f26d913 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 29 Mar 2023 09:23:14 +0400
Subject: [PATCH 147/296] [MO] Remove Python version check (#16612)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 tools/mo/openvino/tools/mo/subprocess_main.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tools/mo/openvino/tools/mo/subprocess_main.py b/tools/mo/openvino/tools/mo/subprocess_main.py
index 717c26dde94a01..0570fe75112ce3 100644
--- a/tools/mo/openvino/tools/mo/subprocess_main.py
+++ b/tools/mo/openvino/tools/mo/subprocess_main.py
@@ -7,16 +7,6 @@
 import sys
 
 
-def check_python_version():
-    """
-    Checks python version to be greater or equal than 3.4
-    :return: exit code (1 - error, None - successful)
-    """
-    if sys.version_info < (3, 4):
-        print('Python version should be of version 3.4 or newer')
-        return 1
-
-
 def log_ie_not_found():
     log.error("Could not find the Inference Engine or nGraph Python API.\n"
               "Consider building the Inference Engine and nGraph Python APIs"
@@ -29,10 +19,6 @@ def log_mo_root_dir_not_found():
 
 
 def setup_env():
-    ret_code = check_python_version()
-    if ret_code:
-        sys.exit(ret_code)
-
     mo_root_path = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir)
 
     # Check that MO root directory already set to the PYTHONPATH

From ea6e3481cd1e2a484f058b7590af2a2cef46b7d5 Mon Sep 17 00:00:00 2001
From: "Min, Byungil" <byungil.min@intel.com>
Date: Wed, 29 Mar 2023 15:50:09 +0900
Subject: [PATCH 148/296] [GPU] Fix failed onednn tests (#16410)

* Fix failed unit-tests on dGPU

+ modified fully_connected_random_test_i8_3d not to have ambiguous
+ oneDNN does NOT support i64 type for reorder. Added exception.
+ bugfix in prepare_primitive_fusing about exception of activation function
+ Add exception logic for dynamic to select ocl type in is_node_for_onednn

Signed-off-by: Min, Byungil <byungil.min@intel.com>
---
 .../prepare_primitive_fusing.cpp              | 17 +++++---
 .../src/graph/include/layout_optimizer.h      |  1 +
 .../intel_gpu/src/graph/layout_optimizer.cpp  | 20 ++++++++-
 .../tests/fusions/activation_fusion_test.cpp  | 40 ++++++++++++++++-
 .../fusions/fully_connected_fusion_test.cpp   |  3 --
 .../tests/fusions/lrn_fusion_test.cpp         | 43 ++++++++++++++++++-
 .../test_cases/fully_connected_gpu_test.cpp   | 29 +++++++++----
 7 files changed, 130 insertions(+), 23 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index 457c16a6fe5745..25cc361fa10ec8 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -704,14 +704,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
             if (_lo.get_optimization_attributes().use_onednn_impls) {
                 if (input.is_type<reshape>() || input.is_type<concatenation>())
                     return;
-                #ifdef ENABLE_ONEDNN_FOR_GPU
-                // Activation should not fused if it isn't supported in onednn
-                try {
-                    onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
-                } catch (...) {
-                    return;
+
+                // Activation should not be fused if oneDNN does NOT support it
+                if (_lo.is_primitive_implemented_for_onednn(input))  {
+                    #ifdef ENABLE_ONEDNN_FOR_GPU
+                    try {
+                        onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
+                    } catch (...) {
+                        return;
+                    }
+                    #endif
                 }
-                #endif
             }
 
             bool should_fuse = input.is_type<binary_convolution>();
diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
index f07b03c624c020..a78a77aee14b4e 100644
--- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
+++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
@@ -189,6 +189,7 @@ class layout_optimizer {
     impl_types get_forced_impl_type_by_config(program_node& node);
     static bool are_data_types_suitable_for_onednn(program_node& node);
     bool are_layouts_suitable_for_onednn(program_node& node);
+    bool is_primitive_implemented_for_onednn(program_node& node);
     bool is_format_supported(program_node& node, format::type fmt);
 
     // Returns whether reorder between "prev" with format fmt_prev and "next" with format fmt_next
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index e2bbf5886a3464..9bae95843e8868 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -825,7 +825,8 @@ static bool is_node_for_onednn(deconvolution_node const& node) {
 
 static bool is_node_for_onednn(fully_connected_node const& node) {
     auto fc_prim = node.get_primitive();
-    auto ps = node.get_output_layout().get_partial_shape();
+    auto output_layout = node.get_output_layout();
+    auto ps = output_layout.get_partial_shape();
     size_t non_spatial_count = 2 + (fc_prim->input_size == 3 ? 1 : 0);
     size_t rank = ps.size();
 
@@ -1178,6 +1179,9 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
     if (in_dt == data_types::f32 && (!node.is_type<fully_connected>() && !node.is_type<convolution>()))
         return false;
 
+    if (in_dt == data_types::i64 || out_dt == data_types::i64)
+        return false;
+
     if (node.is_type<pooling>()) {
         if (!data_type_traits::is_floating_point(in_dt) && in_dt != out_dt)
             return false;
@@ -1259,6 +1263,16 @@ bool layout_optimizer::are_layouts_suitable_for_onednn(program_node& node) {
     return true;
 }
 
+bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) {
+    if (node.is_type<fully_connected>() || node.is_type<gemm>() || node.is_type<pooling>() ||
+        node.is_type<convolution>() || node.is_type<deconvolution>() ||
+        node.is_type<reduce>() || node.is_type<reorder>() || node.is_type<concatenation>()) {
+            return true;
+    }
+
+    return false;
+}
+
 impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) {
 #ifdef GPU_DEBUG_CONFIG
     GPU_DEBUG_GET_INSTANCE(debug_config);
@@ -1419,6 +1433,10 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
             preferred_impl = impl_types::ocl;
         }
 
+        if (!are_data_types_suitable_for_onednn(node)) {
+            preferred_impl = impl_types::ocl;
+        }
+
         // For mixed precision case, onednn is slower than cldnn
         if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(input_dt))
             preferred_impl = impl_types::ocl;
diff --git a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
index bf7378cfdab595..8bcaabe3254b6a 100644
--- a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp
@@ -172,7 +172,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, basic) {
         data("out_low", get_mem(get_single_element_layout(p), -127)),
         data("out_high", get_mem(get_single_element_layout(p), 127)),
         eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
-        activation("act2", input_info("eltwise"), activation_func::softsign),
+        activation("act2", input_info("eltwise"), activation_func::swish),
         quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
                  input_info("out_low"), input_info("out_high"), 256, data_types::u8),
         reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
@@ -193,7 +193,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
         data("out_low", get_mem(get_single_element_layout(p), -127)),
         data("out_high", get_mem(get_single_element_layout(p), 127)),
         eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
-        activation("act2", input_info("eltwise"), activation_func::softsign),
+        activation("act2", input_info("eltwise"), activation_func::pow),
         quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
                  input_info("out_low"), input_info("out_high"), 256, data_types::u8),
         reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
@@ -223,6 +223,42 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8,
     activation_test_params{ CASE_ACTIVATION_3D_F32_2, 3, 5, "activation_ref" },
 }));
 
+class activation_eltwise_activation_quantize_u8_onendnn : public ActivationFusingTest {};
+TEST_P(activation_eltwise_activation_quantize_u8_onendnn, same_behavior) {
+    // Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        activation("act", input_info("input"), activation_func::relu),
+        data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
+        data("in_low", get_mem(get_single_element_layout(p), 0)),
+        data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
+        data("out_low", get_mem(get_single_element_layout(p), -127)),
+        data("out_high", get_mem(get_single_element_layout(p), 127)),
+        eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
+        activation("act2", input_info("eltwise"), activation_func::softsign),
+        quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
+                 input_info("out_low"), input_info("out_high"), 256, data_types::u8),
+        reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8_onendnn, ::testing::ValuesIn(std::vector<activation_test_params>{
+    // InputDataType = FP32
+    activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_opt" },
+    activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_opt" },
+    activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_opt" },
+    activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_opt" },
+
+    activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_ref" },
+    activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_ref" },
+    activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_ref" },
+    activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_ref" },
+}));
+
 INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
     activation_test_params{ CASE_ACTIVATION_3D_F32_5, 3, 5, "activation_ref" },  // FIXME - accuracy bug
 }));
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index 1c758c1988c80f..cd9849986d55ca 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -306,9 +306,6 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_eltwise, ::testing::ValuesIn(std::
     fully_connected_test_params{ CASE_FC_U8S8_1, 2, 3 },
     fully_connected_test_params{ CASE_FC_U8S8_2, 2, 3 },
     fully_connected_test_params{ CASE_FC_U8S8_3, 2, 3 },
-    fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 3 },
-    fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 3 },
-    fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 3 },
 }));
 
 class fc_int8_quantize_u8 : public FullyConnectedFusingTest {};
diff --git a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
index 9b35647f3c6f89..764953f730220c 100644
--- a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp
@@ -118,7 +118,7 @@ TEST_P(lrn_fp32_quantize_u8_eltwise_activation, basic) {
         quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
                  input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
         eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
-        activation("activation", input_info("eltwise"), activation_func::floor),
+        activation("activation", input_info("eltwise"), activation_func::relu),
         reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
     );
 
@@ -176,6 +176,47 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation, :
     lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
 }));
 
+class lrn_fp32_quantize_u8_eltwise_activation_onednn : public LrnFusingTest {};
+TEST_P(lrn_fp32_quantize_u8_eltwise_activation_onednn, same_behavior) {
+    // Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
+    auto p = GetParam();
+    uint32_t size = 5;
+    float k = 1.0f;
+    float alpha = (float)9.9e-05;
+    float beta = 0.75;
+
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("in_lo", get_mem(get_single_element_layout(p), min_random, 0)),
+        data("in_hi", get_mem(get_single_element_layout(p), 1, max_random)),
+        data("out_lo", get_mem(get_single_element_layout(p), 0)),
+        data("out_hi", get_mem(get_single_element_layout(p), 255)),
+        data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
+        lrn("lrn_norm", input_info("input"), size, k, alpha, beta, p.lrn_type),
+        quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
+                 input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
+        eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
+        activation("activation", input_info("eltwise"), activation_func::floor),
+        reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
+    );
+
+    tolerance = default_tolerance(data_types::u8);
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation_onednn, ::testing::ValuesIn(std::vector<lrn_test_params>{
+    // InputDataType = FP32   OutputDataType = FP32
+    lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
+    lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
+    lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel" },
+
+    // InputDataType = FP32   OutputDataType = FP16
+    lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
+    lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
+    lrn_test_params{ CASE_LRN_FP32_TO_FP16_3, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt" },
+    lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
+}));
+
 class lrn_fp32_quantize_i8_eltwise_activation : public LrnFusingTest {};
 TEST_P(lrn_fp32_quantize_i8_eltwise_activation, basic) {
     auto p = GetParam();
diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
index c63218e4ff21cb..9f5910e0af8def 100644
--- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
@@ -1140,6 +1140,7 @@ using fully_connected_random_test_f32_3d = fully_connected_random_test_3d<float,
 using fully_connected_random_test_f16_3d = fully_connected_random_test_3d<FLOAT16, FLOAT16, FLOAT16, FLOAT16>;
 using fully_connected_random_test_i8_3d = fully_connected_random_test_3d<int8_t, int8_t, int8_t, float>;
 
+
 TEST_P(fully_connected_random_test_f32_3d, basic) {
     run_test();
 }
@@ -1149,9 +1150,9 @@ INSTANTIATE_TEST_SUITE_P(
     fully_connected_random_test_f32_3d,
     ::testing::Combine(
         ::testing::Values(1, 3),
-        ::testing::Values(shared_dims{1, 1, 1},
+        ::testing::Values(shared_dims{1, 1, 2},
                           shared_dims{1, 1, 3},
-                          shared_dims{3, 1, 1},
+                          shared_dims{3, 1, 2},
                           shared_dims{3, 1, 3}),
         ::testing::Values(1, 3, 16),
         ::testing::Values(format::bfyx),
@@ -1201,9 +1202,9 @@ INSTANTIATE_TEST_SUITE_P(
     fully_connected_random_test_f16_3d,
     ::testing::Combine(
         ::testing::Values(1, 3),
-        ::testing::Values(shared_dims{1, 1, 1},
+        ::testing::Values(shared_dims{1, 1, 2},
                           shared_dims{1, 1, 16},
-                          shared_dims{3, 1, 1},
+                          shared_dims{3, 1, 2},
                           shared_dims{3, 1, 16}),
         ::testing::Values(1, 3, 16),
         ::testing::Values(format::bfyx),
@@ -1221,9 +1222,9 @@ INSTANTIATE_TEST_SUITE_P(
     fully_connected_random_test_i8_3d,
     ::testing::Combine(
         ::testing::Values(1, 3),
-        ::testing::Values(shared_dims{1, 1, 1},
+        ::testing::Values(shared_dims{1, 1, 2},
                           shared_dims{1, 1, 16},
-                          shared_dims{3, 1, 1},
+                          shared_dims{3, 1, 2},
                           shared_dims{3, 1, 16}),
         ::testing::Values(1, 3, 16),
         ::testing::Values(format::bfyx),
@@ -2169,9 +2170,19 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam<fully_connected_dy
                                                                               input_data_vec,
                                                                               weights_data_vec,
                                                                               bias_data_vec);
-            for (int b = 0; b < batch_size; b++) {
-                for (int ofm = 0; ofm < output_f; ofm++) {
-                    ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
+
+            if (engine.get_device_info().supports_immad) {
+                for (int b = 0; b < batch_size; b++) {
+                    for (int ofm = 0; ofm < output_f; ofm++) {
+                        EXPECT_NEAR(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm],
+                                    default_tolerance(input_dt));
+                    }
+                }
+            } else {
+                for (int b = 0; b < batch_size; b++) {
+                    for (int ofm = 0; ofm < output_f; ofm++) {
+                        ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
+                    }
                 }
             }
         }

From 7a95830d24d9aefad4708317ab7ff91559b58b20 Mon Sep 17 00:00:00 2001
From: "Min, Byungil" <byungil.min@intel.com>
Date: Wed, 29 Mar 2023 15:53:04 +0900
Subject: [PATCH 149/296] [GPU] Disable failed onednn tests (#16614)

* Resolved failed unit-tests for fully connected

Signed-off-by: Min, Byungil <byungil.min@intel.com>
---
 .../tests/test_cases/fully_connected_gpu_test.cpp         | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
index 9f5910e0af8def..16f4f6f446867e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
@@ -1562,10 +1562,10 @@ INSTANTIATE_TEST_SUITE_P(
     fully_connected_i8_i8_test,
     testing::Combine(
         testing::Values(1, 2),
-        testing::Values(3, 64),
+        testing::Values(16, 64),
         testing::Values(1),
         testing::Values(1),
-        testing::Values(3, 32),
+        testing::Values(16, 32),
         testing::Values(format::bfyx, format::b_fs_yx_fsv4, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32)
     ),
     fully_connected_i8_i8_test::PrintToStringParamName
@@ -1917,9 +1917,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
     network network(engine, topology, config);
 
     auto inst = network.get_primitive("fc");
-    auto impl = inst->get_impl();
-    ASSERT_TRUE(impl != nullptr);
-    ASSERT_TRUE(impl->is_dynamic());
+    ASSERT_TRUE(inst->is_dynamic());
 
     {
         network.set_input_data("input", input_data1);

From 35e03d33bbc22b738179613fff4f5b967d20bb32 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 29 Mar 2023 11:58:08 +0400
Subject: [PATCH 150/296] [TF FE] Support frozen models in text protobuf format
 aka pbtxt (#16604)

* [TF FE] Support frozen models in Text Protobuf format aka pbtxt

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Fix gen_wrapper.py for pbtxt

* Fix is_supported method

* Fix gen_wrapper.py script

* Adopt test_text_frozen_format unit-test

* Update src/frontends/tensorflow/src/frontend.cpp

* Update src/frontends/tensorflow/src/frontend.cpp

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/src/frontend.cpp     |  15 +-
 .../tensorflow/src/graph_iterator_proto.hpp   |  32 +-
 .../src/graph_iterator_proto_txt.hpp          |  52 +++
 .../tensorflow/tests/convert_model.cpp        |   6 +-
 .../tests/convert_tricky_models.cpp           |  36 +-
 .../tensorflow/tests/convert_unsupported.cpp  |   2 +-
 .../tests/test_models/gen_wrapper.py          |  17 +-
 tools/mo/openvino/tools/mo/front/tf/loader.py |   7 +-
 .../mo/front/tf/convert_to_pb_test.py         |   3 +-
 .../moc_tf_fe/conversion_basic_models.py      |  18 +
 .../moc_tf_fe/test_models/model_with_if.pbtxt | 320 ++++++++++++++++++
 .../moc_tf_fe/test_models/model_with_if.py    |  32 ++
 12 files changed, 489 insertions(+), 51 deletions(-)
 create mode 100644 src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
 create mode 100644 tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.pbtxt
 create mode 100644 tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.py

diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 58a329cc1b92f2..7bda635da38fc2 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -5,6 +5,7 @@
 #include "openvino/frontend/tensorflow/frontend.hpp"
 
 #include "graph_iterator_proto.hpp"
+#include "graph_iterator_proto_txt.hpp"
 #include "graph_iterator_saved_model.hpp"
 #include "helper_transforms/block_lstm_replacer.hpp"
 #include "helper_transforms/const_to_result_remover.hpp"
@@ -92,6 +93,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
             return true;
         } else if (GraphIteratorSavedModel::is_supported(model_path)) {
             return true;
+        } else if (GraphIteratorProtoTxt::is_supported(model_path)) {
+            // handle text protobuf format
+            return true;
         }
     }
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@@ -105,6 +109,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
             return true;
         } else if (GraphIteratorSavedModel::is_supported(model_path)) {
             return true;
+        } else if (GraphIteratorProtoTxt::is_supported(model_path)) {
+            // handle text protobuf format
+            return true;
         }
     }
 #endif
@@ -116,7 +123,7 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
 }
 
 ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
-    // TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint, pbtxt
+    // TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint
 
     // Last boolean flag in `variants` (if presented) is reserved for FE configuration
     size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
@@ -141,6 +148,9 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
                                                 graph_iterator->get_variables_index(),
                                                 graph_iterator->get_saved_model_input_names(),
                                                 graph_iterator->get_saved_model_output_names());
+        } else if (GraphIteratorProtoTxt::is_supported(model_path)) {
+            // handle text protobuf format
+            return std::make_shared<InputModel>(std::make_shared<GraphIteratorProtoTxt>(model_path), m_telemetry);
         }
     }
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@@ -163,6 +173,9 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
                                                 graph_iterator->get_variables_index(),
                                                 graph_iterator->get_saved_model_input_names(),
                                                 graph_iterator->get_saved_model_output_names());
+        } else if (GraphIteratorProtoTxt::is_supported(model_path)) {
+            // handle text protobuf format with a path in Unicode
+            return std::make_shared<InputModel>(std::make_shared<GraphIteratorProtoTxt>(model_path), m_telemetry);
         }
     }
 #endif
diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
index a4b5a83644faf1..a69d3b05ffaa7e 100644
--- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp
@@ -34,6 +34,24 @@ class GraphIteratorProto : public GraphIterator {
           m_func_def(nullptr),
           m_library_map() {}
 
+    void initialize_decoders_and_library() {
+        FRONT_END_GENERAL_CHECK(m_graph_def, "GraphDef is not initialized.");
+
+        auto nodes_size = m_graph_def->node_size();
+        m_decoders.resize(static_cast<size_t>(nodes_size));
+        for (int node_ind = 0; node_ind < nodes_size; ++node_ind) {
+            m_decoders[node_ind] = std::make_shared<DecoderProto>(&m_graph_def->node(node_ind), m_graph_def);
+        }
+
+        // initialize a library map
+        auto num_funcs = m_graph_def->library().function_size();
+        for (int func_ind = 0; func_ind < num_funcs; ++func_ind) {
+            auto func = m_graph_def->library().function(func_ind);
+            auto func_name = func.signature().name();
+            m_library_map.insert(std::pair<std::string, int>(func_name, func_ind));
+        }
+    }
+
 public:
     GraphIteratorProto(const std::shared_ptr<::tensorflow::GraphDef>& graph_def,
                        const std::shared_ptr<::tensorflow::FunctionDef>& func_def,
@@ -82,19 +100,7 @@ class GraphIteratorProto : public GraphIterator {
         FRONT_END_GENERAL_CHECK(pb_stream && pb_stream.is_open(), "Model file does not exist");
         FRONT_END_GENERAL_CHECK(m_graph_def->ParseFromIstream(&pb_stream), "Model cannot be parsed");
 
-        auto nodes_size = m_graph_def->node_size();
-        m_decoders.resize(static_cast<size_t>(nodes_size));
-        for (int node_ind = 0; node_ind < nodes_size; ++node_ind) {
-            m_decoders[node_ind] = std::make_shared<DecoderProto>(&m_graph_def->node(node_ind), m_graph_def);
-        }
-
-        // initialize a library map
-        auto num_funcs = m_graph_def->library().function_size();
-        for (int func_ind = 0; func_ind < num_funcs; ++func_ind) {
-            auto func = m_graph_def->library().function(func_ind);
-            auto func_name = func.signature().name();
-            m_library_map.insert(std::pair<std::string, int>(func_name, func_ind));
-        }
+        initialize_decoders_and_library();
     }
 
     /// \brief Check if the input file is supported
diff --git a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
new file mode 100644
index 00000000000000..a38b6031e52609
--- /dev/null
+++ b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <fstream>
+
+#include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/text_format.h"
+#include "graph_iterator_proto.hpp"
+#include "openvino/frontend/exception.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+class GraphIteratorProtoTxt : public GraphIteratorProto {
+public:
+    template <typename T>
+    GraphIteratorProtoTxt(const std::basic_string<T>& path) : GraphIteratorProto() {
+        std::ifstream pbtxt_stream(path, std::ios::in);
+        FRONT_END_GENERAL_CHECK(pbtxt_stream && pbtxt_stream.is_open(), "Model file does not exist");
+        auto input_stream = std::make_shared<::google::protobuf::io::IstreamInputStream>(&pbtxt_stream);
+        auto is_parsed = ::google::protobuf::TextFormat::Parse(input_stream.get(), m_graph_def.get());
+        FRONT_END_GENERAL_CHECK(
+            is_parsed,
+            "[TensorFlow Frontend] Incorrect model or internal error: Model in text Protobuf format cannot be parsed.");
+
+        initialize_decoders_and_library();
+    }
+
+    /// \brief Check if the input file is supported
+    template <typename T>
+    static bool is_supported(const std::basic_string<T>& path) {
+        std::ifstream pbtxt_stream(path, std::ios::in);
+        bool model_exists = (pbtxt_stream && pbtxt_stream.is_open());
+        if (!model_exists) {
+            return false;
+        }
+        auto input_stream = std::make_shared<::google::protobuf::io::IstreamInputStream>(&pbtxt_stream);
+        if (!input_stream) {
+            return false;
+        }
+        auto graph_def = std::make_shared<::tensorflow::GraphDef>();
+        auto is_parsed = ::google::protobuf::TextFormat::Parse(input_stream.get(), graph_def.get());
+        return is_parsed;
+    }
+};
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow/tests/convert_model.cpp b/src/frontends/tensorflow/tests/convert_model.cpp
index ddf0b4fe4ef951..71c028a6ae73cf 100644
--- a/src/frontends/tensorflow/tests/convert_model.cpp
+++ b/src/frontends/tensorflow/tests/convert_model.cpp
@@ -13,9 +13,9 @@ using TFConvertModelTest = FrontEndConvertModelTest;
 
 static const std::vector<std::string> models{
     std::string("2in_2out/2in_2out.pb"),
-    std::string("forward_edge_model/forward_edge_model.pb"),
-    std::string("forward_edge_model2/forward_edge_model2.pb"),
-    std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pb")};
+    std::string("forward_edge_model/forward_edge_model.pbtxt"),
+    std::string("forward_edge_model2/forward_edge_model2.pbtxt"),
+    std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt")};
 
 INSTANTIATE_TEST_SUITE_P(TFConvertModelTest,
                          FrontEndConvertModelTest,
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
index 80ee6436b12774..b1faca0554fd40 100644
--- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp
+++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -77,7 +77,7 @@ ov::OutputVector fake_translator_ragged_tensor_to_sparse(const ov::frontend::Nod
 TEST(FrontEndConvertTrickyModels, undefined_input_shape) {
     shared_ptr<Model> model;
     try {
-        model = convert_model("undefined_input_shape/undefined_input_shape.pb");
+        model = convert_model("undefined_input_shape/undefined_input_shape.pbtxt");
     } catch (std::exception& ex) {
         ASSERT_TRUE(false) << ex.what();
     }
@@ -96,7 +96,7 @@ TEST(FrontEndConvertTrickyModels, undefined_input_shape) {
 TEST(FrontEndConvertTrickyModels, simple_wide_and_deep) {
     shared_ptr<Model> model;
     try {
-        model = convert_model("simple_wide_and_deep/simple_wide_and_deep.pb");
+        model = convert_model("simple_wide_and_deep/simple_wide_and_deep.pbtxt");
     } catch (std::exception& ex) {
         ASSERT_TRUE(false) << ex.what();
     }
@@ -114,7 +114,7 @@ TEST(FrontEndConvertTrickyModels, simple_wide_and_deep) {
 TEST(FrontEndConvertTrickyModels, model_with_output_shapes) {
     shared_ptr<Model> model;
     try {
-        model = convert_model("model_with_output_shapes_attr/model_with_output_shapes_attr.pb");
+        model = convert_model("model_with_output_shapes_attr/model_with_output_shapes_attr.pbtxt");
     } catch (std::exception& ex) {
         ASSERT_TRUE(false) << ex.what();
     }
@@ -130,7 +130,7 @@ TEST(FrontEndConvertTrickyModels, model_with_output_shapes) {
 
 TEST_F(TransformationTestsF, AssertAndStringTensors) {
     {
-        model = convert_model("string_tensors_model/string_tensors_model.pb");
+        model = convert_model("string_tensors_model/string_tensors_model.pbtxt");
         // TODO: investigate - why we have redundant nodes after the conversion
         manager.register_pass<pass::MOCTransformations>(false);
     }
@@ -145,13 +145,13 @@ TEST_F(TransformationTestsF, AssertAndStringTensors) {
 }
 
 TEST_F(TransformationTestsF, UnsortedNodes) {
-    { model = convert_model("forward_edge_model_unsorted/forward_edge_model_unsorted.pb"); }
-    { model_ref = convert_model("forward_edge_model/forward_edge_model.pb"); }
+    { model = convert_model("forward_edge_model_unsorted/forward_edge_model_unsorted.pbtxt"); }
+    { model_ref = convert_model("forward_edge_model/forward_edge_model.pbtxt"); }
 }
 
 TEST_F(TransformationTestsF, ModelWithSwishF32BodyGraph) {
     {
-        model = convert_model("swish_f32/swish_f32.pb");
+        model = convert_model("swish_f32/swish_f32.pbtxt");
         // need to call shape inference since body graphs can be injected with undefined shapes
         model->validate_nodes_and_infer_types();
     }
@@ -169,7 +169,7 @@ TEST_F(TransformationTestsF, ModelWithSwishF32BodyGraph) {
 
 TEST_F(TransformationTestsF, PartitionedCall) {
     {
-        model = convert_model("partitioned_call/partitioned_call.pb");
+        model = convert_model("partitioned_call/partitioned_call.pbtxt");
         // need to call shape inference since body graphs can be injected with undefined shapes
         model->validate_nodes_and_infer_types();
     }
@@ -185,7 +185,7 @@ TEST_F(TransformationTestsF, PartitionedCall) {
 }
 
 TEST_F(TransformationTestsF, ModelWithIf) {
-    { model = convert_model("model_with_if/model_with_if.pb"); }
+    { model = convert_model("model_with_if/model_with_if.pbtxt"); }
     {
         // create then branch body graph
         auto then_x = make_shared<Parameter>(i32, Shape{2});
@@ -219,7 +219,7 @@ TEST_F(TransformationTestsF, ModelWithIf) {
 
 TEST_F(TransformationTestsF, InjectedBodyAndIf) {
     {
-        model = convert_model("injected_body_and_if/injected_body_and_if.pb");
+        model = convert_model("injected_body_and_if/injected_body_and_if.pbtxt");
         // need to call shape inference since body graphs can be injected with undefined shapes
         model->validate_nodes_and_infer_types();
     }
@@ -258,7 +258,7 @@ TEST_F(TransformationTestsF, InjectedBodyAndIf) {
 
 TEST_F(TransformationTestsF, ModelWithDilatedGroupConvolution) {
     {
-        model = convert_model("dilated_gconv_model/dilated_gconv_model.pb");
+        model = convert_model("dilated_gconv_model/dilated_gconv_model.pbtxt");
         // need to call MOC to fuse BatchToSpace/SpaceToBatch with GroupConvolution
         manager.register_pass<pass::MOCTransformations>(false);
     }
@@ -282,7 +282,7 @@ TEST_F(TransformationTestsF, ModelWithDilatedGroupConvolution) {
 
 TEST_F(TransformationTestsF, ModelWithSaveV2) {
     {
-        model = convert_model("model_savev2/model_savev2.pb");
+        model = convert_model("model_savev2/model_savev2.pbtxt");
         // need to call shape inference since body graphs can be injected with undefined shapes
         model->validate_nodes_and_infer_types();
     }
@@ -297,7 +297,7 @@ TEST_F(TransformationTestsF, ModelWithSaveV2) {
 }
 
 TEST_F(TransformationTestsF, ModelWithConstResultSubgraphs) {
-    { model = convert_model("model_with_const_result/model_with_const_result.pb"); }
+    { model = convert_model("model_with_const_result/model_with_const_result.pbtxt"); }
     {
         // create a reference graph
         auto x = make_shared<Parameter>(element::f32, PartialShape{Dimension::dynamic(), 60, 60, 1});
@@ -320,7 +320,7 @@ TEST_F(TransformationTestsF, ModelWithConstResultSubgraphs) {
 }
 
 TEST_F(TransformationTestsF, ModelWithIteratorGetNext) {
-    { model = convert_model("model_with_iterator_get_next/model_with_iterator_get_next.pb"); }
+    { model = convert_model("model_with_iterator_get_next/model_with_iterator_get_next.pbtxt"); }
     {
         // create a reference graph
         auto x = make_shared<Parameter>(element::f32, Shape{2, 3});
@@ -332,7 +332,7 @@ TEST_F(TransformationTestsF, ModelWithIteratorGetNext) {
 }
 
 TEST_F(TransformationTestsF, ModelWithQueueOperations) {
-    { model = convert_model("model_with_queue_ops/model_with_queue_ops.pb"); }
+    { model = convert_model("model_with_queue_ops/model_with_queue_ops.pbtxt"); }
     {
         // create a reference graph
         auto x = make_shared<Parameter>(element::f32, PartialShape{Dimension::dynamic(), 160, 160, 3});
@@ -344,7 +344,7 @@ TEST_F(TransformationTestsF, ModelWithQueueOperations) {
 }
 
 TEST_F(TransformationTestsF, ModelWithQueueOperations2) {
-    { model = convert_model("model_with_queue_ops2/model_with_queue_ops2.pb"); }
+    { model = convert_model("model_with_queue_ops2/model_with_queue_ops2.pbtxt"); }
     {
         // create a reference graph
         auto x = make_shared<Parameter>(element::f32, PartialShape{1, Dimension::dynamic(), Dimension::dynamic(), 3});
@@ -358,7 +358,7 @@ TEST_F(TransformationTestsF, ModelWithQueueOperations2) {
 }
 
 TEST_F(TransformationTestsF, ModelWithLookupTableOperations) {
-    { model = convert_model("model_with_lookup_table/model_with_lookup_table.pb"); }
+    { model = convert_model("model_with_lookup_table/model_with_lookup_table.pbtxt"); }
     {
         // create a reference graph
         auto x = make_shared<Parameter>(element::f32, Shape{2});
@@ -382,7 +382,7 @@ TEST_F(TransformationTestsF, ModelWithIteratorGetNextAndUnsupportedOp) {
 }
 
 TEST_F(TransformationTestsF, ModelWithMultioutputBodyGraphNode) {
-    { model = convert_model("partitioned_call2/partitioned_call2.pb"); }
+    { model = convert_model("partitioned_call2/partitioned_call2.pbtxt"); }
     {
         auto x = make_shared<Parameter>(i32, Shape{5});
         auto y = make_shared<Parameter>(i32, Shape{5});
diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp
index 20edc81da9a6fa..bfd9729575a94a 100644
--- a/src/frontends/tensorflow/tests/convert_unsupported.cpp
+++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp
@@ -110,7 +110,7 @@ TEST(FrontEndConvertModelTest, test_unsupported_tf1_while) {
     ASSERT_NO_THROW(frontEnd = fem.load_by_framework(TF_FE));
     ASSERT_NE(frontEnd, nullptr);
     auto model_filename = FrontEndTestUtils::make_model_path(string(TEST_TENSORFLOW_MODELS_DIRNAME) +
-                                                             string("model_tf1_while/model_tf1_while.pb"));
+                                                             string("model_tf1_while/model_tf1_while.pbtxt"));
     ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename));
     ASSERT_NE(inputModel, nullptr);
     shared_ptr<ov::Model> model;
diff --git a/src/frontends/tensorflow/tests/test_models/gen_wrapper.py b/src/frontends/tensorflow/tests/test_models/gen_wrapper.py
index 0e4b726e9b2a61..285e5851fb525d 100644
--- a/src/frontends/tensorflow/tests/test_models/gen_wrapper.py
+++ b/src/frontends/tensorflow/tests/test_models/gen_wrapper.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
+import shutil
 import subprocess
 import sys
 
@@ -19,16 +20,14 @@
 if gen_script.endswith('.py'):
     subprocess.run([sys.executable, gen_script, out_folder], env=os.environ)
 elif gen_script.endswith('.pbtxt'):
-    import tensorflow.compat.v1 as tf
-    from google.protobuf import text_format
-
     model_pbtxt = gen_script
-    with open(model_pbtxt, "r") as f:
-        model_name = os.path.basename(model_pbtxt).split('.')[0]
-        graph_def = tf.GraphDef()
-        text_format.Merge(f.read(), graph_def)
-        tf.import_graph_def(graph_def, name='')
-        tf.io.write_graph(graph_def, os.path.join(sys.argv[2], model_name), model_name + '.pb', False)
+    model_name = os.path.basename(model_pbtxt).split('.')[0]
+    dest_path = os.path.join(out_folder, model_name, model_name + '.pbtxt')
+    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+    try:
+        shutil.copy(model_pbtxt, dest_path)
+    except shutil.SameFileError:
+        pass
 
 # Create mark file indicating that script was executed
 with open(mark_file, "w") as fp:
diff --git a/tools/mo/openvino/tools/mo/front/tf/loader.py b/tools/mo/openvino/tools/mo/front/tf/loader.py
index db7894fca2bae3..133c686d749865 100644
--- a/tools/mo/openvino/tools/mo/front/tf/loader.py
+++ b/tools/mo/openvino/tools/mo/front/tf/loader.py
@@ -328,10 +328,9 @@ def convert_to_pb(argv: argparse.Namespace):
     if "tensorflow" in env_setup and env_setup["tensorflow"] >= LooseVersion("2.0.0"):
         tf.keras.backend.clear_session()
 
-    # if this is already binary frozen format .pb, there is no need to create auxiliary binary frozen protobuf
-    # the main thing is to differentiate this format from text frozen format and checkpoint
-    # that can utilize input_model option
-    if argv.input_model and not argv.input_model_is_text and not argv.input_checkpoint and \
+    # if this is already binary or text frozen format .pb or .pbtxt,
+    # there is no need to create auxiliary binary frozen protobuf
+    if argv.input_model and not argv.input_checkpoint and \
             isinstance(argv.input_model, str):
         return None
 
diff --git a/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py b/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
index 43c2fb68b9ea64..7a54756b73c08f 100644
--- a/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
+++ b/tools/mo/unit_tests/mo/front/tf/convert_to_pb_test.py
@@ -81,8 +81,7 @@ def test_text_frozen_format(self):
                             "The test model in frozen text format must exist")
             # test convert_to_pb
             path_to_pb = convert_to_pb(self.argv)
-            self.assertTrue(os.path.exists(path_to_pb), "The auxiliary .pb is not generated")
-            self.assertTrue(os.path.getsize(path_to_pb) != 0, "The auxiliary .pb is empty")
+            self.assertTrue(path_to_pb is None, "Auxiliary .pb must not be generated for .pbtxt")
 
     def test_binary_frozen_format(self):
         try:
diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models.py
index d1c0115bf0f711..50f95e545d45d5 100644
--- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models.py
+++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models.py
@@ -347,3 +347,21 @@ def test_conversion_dir_model(self):
             self.basic(".", None, None, None, None,
                        only_conversion=True, input_model_is_text=False, use_new_frontend=True,
                        use_legacy_frontend=False)
+
+    @generate(
+        *[
+            (
+                    {"x": np.array([1, 2], dtype=np.int32), "y": np.array([4], dtype=np.int32)},
+                    np.array([-3, -2], dtype=np.int32),
+                    np.int32,
+            ),
+            (
+                    {"x": np.array([20, 25], dtype=np.int32), "y": np.array([10], dtype=np.int32)},
+                    np.array([30, 35], dtype=np.int32),
+                    np.int32,
+            )
+        ],
+    )
+    def test_conversion_pbtxt_model_with_inference(self, inputs, expected, dtype):
+        self.basic("model_with_if.pbtxt", None, inputs, dtype, expected, only_conversion=False,
+                   input_model_is_text=False, use_new_frontend=True, use_legacy_frontend=False)
diff --git a/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.pbtxt b/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.pbtxt
new file mode 100644
index 00000000000000..735ef4a32c89f0
--- /dev/null
+++ b/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.pbtxt
@@ -0,0 +1,320 @@
+node {
+  name: "x"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 2
+        }
+      }
+    }
+  }
+}
+node {
+  name: "y"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 10
+      }
+    }
+  }
+}
+node {
+  name: "Greater"
+  op: "Greater"
+  input: "x"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "All"
+  op: "All"
+  input: "Greater"
+  input: "Const_1"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "If"
+  op: "If"
+  input: "All"
+  input: "x"
+  input: "y"
+  attr {
+    key: "Tcond"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "Tin"
+    value {
+      list {
+        type: DT_INT32
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "else_branch"
+    value {
+      func {
+        name: "else_branch_func_Fw4jHLGozIk"
+      }
+    }
+  }
+  attr {
+    key: "output_shapes"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "then_branch"
+    value {
+      func {
+        name: "then_branch_func_mdn8Hcdd6RQ"
+      }
+    }
+  }
+}
+node {
+  name: "init"
+  op: "NoOp"
+}
+library {
+  function {
+    signature {
+      name: "then_branch_func_mdn8Hcdd6RQ"
+      input_arg {
+        name: "x"
+        type: DT_INT32
+      }
+      input_arg {
+        name: "y"
+        type: DT_INT32
+      }
+      output_arg {
+        name: "add"
+        type: DT_INT32
+      }
+    }
+    node_def {
+      name: "add_0"
+      op: "AddV2"
+      input: "x"
+      input: "y"
+      attr {
+        key: "T"
+        value {
+          type: DT_INT32
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "add"
+      }
+    }
+    ret {
+      key: "add"
+      value: "add_0:z:0"
+    }
+    attr {
+      key: "_disable_call_shape_inference"
+      value {
+        b: true
+      }
+    }
+    arg_attr {
+      value {
+        attr {
+          key: "_output_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+      }
+    }
+    arg_attr {
+      key: 1
+      value {
+        attr {
+          key: "_output_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  function {
+    signature {
+      name: "else_branch_func_Fw4jHLGozIk"
+      input_arg {
+        name: "x"
+        type: DT_INT32
+      }
+      input_arg {
+        name: "y"
+        type: DT_INT32
+      }
+      output_arg {
+        name: "sub"
+        type: DT_INT32
+      }
+    }
+    node_def {
+      name: "sub_0"
+      op: "Sub"
+      input: "x"
+      input: "y"
+      attr {
+        key: "T"
+        value {
+          type: DT_INT32
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "sub"
+      }
+    }
+    ret {
+      key: "sub"
+      value: "sub_0:z:0"
+    }
+    attr {
+      key: "_disable_call_shape_inference"
+      value {
+        b: true
+      }
+    }
+    arg_attr {
+      value {
+        attr {
+          key: "_output_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+      }
+    }
+    arg_attr {
+      key: 1
+      value {
+        attr {
+          key: "_output_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+versions {
+  producer: 808
+  min_consumer: 12
+}
diff --git a/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.py b/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.py
new file mode 100644
index 00000000000000..4a68b1810ea0a3
--- /dev/null
+++ b/tools/mo/unit_tests/moc_tf_fe/test_models/model_with_if.py
@@ -0,0 +1,32 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import tensorflow.compat.v1 as tf
+from tensorflow.python.framework import function
+
+tf.reset_default_graph()
+
+
+@function.Defun(tf.int32, tf.int32)
+def then_branch_func(x, y):
+    return x + y
+
+
+@function.Defun(tf.int32, tf.int32)
+def else_branch_func(x, y):
+    return x - y
+
+
+with tf.Session() as sess:
+    x = tf.placeholder(tf.int32, [2], 'x')
+    y = tf.placeholder(tf.int32, [1], 'y')
+    const_cond = tf.constant(10, dtype=tf.int32)
+    greater = tf.raw_ops.Greater(x=x, y=const_cond)
+    axis = tf.constant([0], dtype=tf.int32)
+    cond = tf.raw_ops.All(input=greater, axis=axis)
+    if_op = tf.raw_ops.If(cond=cond, input=[x, y], Tout=[tf.int32], then_branch=then_branch_func,
+                          else_branch=else_branch_func)
+    tf.global_variables_initializer()
+    tf_net = sess.graph_def
+
+tf.io.write_graph(tf_net, './', 'model_with_if.pbtxt', as_text=True)

From 556d469f6b4d0da8186d4cb61a3bfd7735b50304 Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Wed, 29 Mar 2023 10:23:47 +0200
Subject: [PATCH 151/296] [PADDLE] add paddle opextension support (#16439)

* add opextension support

* support opconversion

* fix test contructor ambiguous

* fix ci fail

* add tag to avoid compiler ambiguous

* move tests to layer_tests & remove PaddleTag

* static cast

* use create_ov_node_by_name

---------

Co-authored-by: Luo Cheng <cheng.luo@intel.com>
---
 docs/snippets/ov_extensions.cpp               |   4 +-
 .../src/openvino/frontend/paddle/__init__.py  |   1 +
 .../python/src/openvino/runtime/__init__.py   |   1 +
 .../src/pyopenvino/frontend/extension.cpp     |  46 +++++
 .../pyopenvino/frontend/paddle/extension.cpp  |  48 +++++
 .../pyopenvino/frontend/paddle/extension.hpp  |   1 +
 .../pyopenvino/frontend/paddle/py_module.cpp  |   1 +
 .../include/openvino/core/op_extension.hpp    |   2 +
 .../openvino/frontend/extension/op.hpp        | 181 ++++++++++++++++--
 .../openvino/frontend/paddle/extension/op.hpp |  18 ++
 src/frontends/paddle/tests/op_extension.cpp   | 181 ++++++++++++++++++
 .../builtin_extensions.cpp                    |  15 +-
 .../py_frontend_tests/test_paddle_frontend.py | 170 ++++++++++++++++
 tests/layer_tests/requirements.txt            |   1 +
 14 files changed, 645 insertions(+), 25 deletions(-)
 create mode 100644 src/frontends/paddle/include/openvino/frontend/paddle/extension/op.hpp
 create mode 100644 src/frontends/paddle/tests/op_extension.cpp
 create mode 100644 tests/layer_tests/py_frontend_tests/test_paddle_frontend.py

diff --git a/docs/snippets/ov_extensions.cpp b/docs/snippets/ov_extensions.cpp
index 697224c400b70a..a9c11e421f5e96 100644
--- a/docs/snippets/ov_extensions.cpp
+++ b/docs/snippets/ov_extensions.cpp
@@ -121,14 +121,14 @@ core.add_extension(ov::frontend::OpExtension<CustomOperation>());
 
 //! [frontend_extension_CustomOperation_rename]
 core.add_extension(ov::frontend::OpExtension<CustomOperation>(
-    { {"attr1", "fw_attr1"}, {"attr2", "fw_attr2"} },
+    std::map<std::string, std::string>{ {"attr1", "fw_attr1"}, {"attr2", "fw_attr2"} },
     {}
 ));
 //! [frontend_extension_CustomOperation_rename]
 
 //! [frontend_extension_CustomOperation_rename_set]
 core.add_extension(ov::frontend::OpExtension<CustomOperation>(
-    { {"attr1", "fw_attr1"} },
+    std::map<std::string, std::string>{ {"attr1", "fw_attr1"} },
     { {"attr2", 5} }
 ));
 //! [frontend_extension_CustomOperation_rename_set]
diff --git a/src/bindings/python/src/openvino/frontend/paddle/__init__.py b/src/bindings/python/src/openvino/frontend/paddle/__init__.py
index 240c9a7ac395c5..c90eb4079b0d8c 100644
--- a/src/bindings/python/src/openvino/frontend/paddle/__init__.py
+++ b/src/bindings/python/src/openvino/frontend/paddle/__init__.py
@@ -15,5 +15,6 @@
 
 try:
     from openvino.frontend.paddle.py_paddle_frontend import ConversionExtensionPaddle as ConversionExtension
+    from openvino.frontend.paddle.py_paddle_frontend import OpExtensionPaddle as OpExtension
 except ImportError as err:
     raise ImportError("OpenVINO Paddle frontend is not available, please make sure the frontend is built." "{}".format(err))
diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py
index 9241819e87135c..5a6fe3efcc925c 100644
--- a/src/bindings/python/src/openvino/runtime/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/__init__.py
@@ -44,6 +44,7 @@
 from openvino._pyopenvino import get_batch
 from openvino._pyopenvino import set_batch
 from openvino._pyopenvino import serialize
+from openvino._pyopenvino import shutdown
 
 # Import opsets
 from openvino.runtime import opset1
diff --git a/src/bindings/python/src/pyopenvino/frontend/extension.cpp b/src/bindings/python/src/pyopenvino/frontend/extension.cpp
index 72b18d58f74d68..57c65af64a4536 100644
--- a/src/bindings/python/src/pyopenvino/frontend/extension.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/extension.cpp
@@ -140,4 +140,50 @@ void regclass_frontend_OpExtension(py::module m) {
             py::arg("fw_type_name"),
             py::arg("attr_names_map") = std::map<std::string, std::string>(),
             py::arg("attr_values_map") = std::map<std::string, py::object>());
+
+    ext.def(py::init([](const std::string& fw_type_name,
+                        const std::vector<std::string>& in_names_vec,
+                        const std::vector<std::string>& out_names_vec,
+                        const std::map<std::string, std::string>& attr_names_map,
+                        const std::map<std::string, py::object>& attr_values_map) {
+                std::map<std::string, ov::Any> any_map;
+                for (const auto& it : attr_values_map) {
+                    any_map[it.first] = Common::utils::py_object_to_any(it.second);
+                }
+                return std::make_shared<OpExtension<void>>(fw_type_name,
+                                                           in_names_vec,
+                                                           out_names_vec,
+                                                           attr_names_map,
+                                                           any_map);
+            }),
+            py::arg("fw_type_name"),
+            py::arg("in_names_vec"),
+            py::arg("out_names_vec"),
+            py::arg("attr_names_map") = std::map<std::string, std::string>(),
+            py::arg("attr_values_map") = std::map<std::string, py::object>());
+
+    ext.def(py::init([](const std::string& ov_type_name,
+                        const std::string& fw_type_name,
+                        const std::vector<std::string>& in_names_vec,
+                        const std::vector<std::string>& out_names_vec,
+                        const std::map<std::string, std::string>& attr_names_map,
+                        const std::map<std::string, py::object>& attr_values_map) {
+                std::map<std::string, ov::Any> any_map;
+                for (const auto& it : attr_values_map) {
+                    any_map[it.first] = Common::utils::py_object_to_any(it.second);
+                }
+
+                return std::make_shared<OpExtension<void>>(ov_type_name,
+                                                           fw_type_name,
+                                                           in_names_vec,
+                                                           out_names_vec,
+                                                           attr_names_map,
+                                                           any_map);
+            }),
+            py::arg("ov_type_name"),
+            py::arg("fw_type_name"),
+            py::arg("in_names_vec"),
+            py::arg("out_names_vec"),
+            py::arg("attr_names_map") = std::map<std::string, std::string>(),
+            py::arg("attr_values_map") = std::map<std::string, py::object>());
 }
diff --git a/src/bindings/python/src/pyopenvino/frontend/paddle/extension.cpp b/src/bindings/python/src/pyopenvino/frontend/paddle/extension.cpp
index 75ce233c8aaf34..9140aacd1e1845 100644
--- a/src/bindings/python/src/pyopenvino/frontend/paddle/extension.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/paddle/extension.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "extension.hpp"
+#include "utils.hpp"
 
 #include <pybind11/functional.h>
 #include <pybind11/pybind11.h>
@@ -10,6 +11,9 @@
 #include <pybind11/stl_bind.h>
 
 #include "openvino/frontend/paddle/extension/conversion.hpp"
+#include "openvino/frontend/paddle/extension/op.hpp"
+#include "openvino/frontend/paddle/frontend.hpp"
+#include "openvino/frontend/paddle/node_context.hpp"
 
 namespace py = pybind11;
 
@@ -41,3 +45,47 @@ void regclass_frontend_paddle_ConversionExtension(py::module m) {
         return std::make_shared<PyConversionExtension>(op_type, f);
     }));
 }
+
+void regclass_frontend_paddle_OpExtension(py::module m) {
+    py::class_<OpExtension<void>, std::shared_ptr<OpExtension<void>>, ConversionExtension> ext(
+            m,
+            "OpExtensionPaddle",
+            py::dynamic_attr());
+
+    ext.def(py::init([](const std::string& fw_type_name,
+                        const std::vector<std::string>& in_names_vec,
+                        const std::vector<std::string>& out_names_vec,
+                        const std::map<std::string, std::string>& attr_names_map,
+                        const std::map<std::string, py::object>& attr_values_map) {
+
+        std::map<std::string, ov::Any> any_map;
+        for (const auto& it : attr_values_map) {
+            any_map[it.first] = Common::utils::py_object_to_any(it.second);
+        }
+        return std::make_shared<OpExtension<void>>(fw_type_name, in_names_vec, out_names_vec, attr_names_map, any_map);
+    }), py::arg("fw_type_name"),
+            py::arg("in_names_vec"),
+            py::arg("out_names_vec"),
+            py::arg("attr_names_map") = std::map<std::string, std::string>(),
+            py::arg("attr_values_map") = std::map<std::string, ov::Any>());
+
+    ext.def(py::init([](const std::string& ov_type_name,
+                               const std::string& fw_type_name,
+                               const std::vector<std::string>& in_names_vec,
+                               const std::vector<std::string>& out_names_vec,
+                               const std::map<std::string, std::string>& attr_names_map,
+                               const std::map<std::string, py::object>& attr_values_map) {
+
+        std::map<std::string, ov::Any> any_map;
+        for (const auto& it : attr_values_map) {
+            any_map[it.first] = Common::utils::py_object_to_any(it.second);
+        }
+        return std::make_shared<OpExtension<void>>(ov_type_name, fw_type_name, in_names_vec, out_names_vec, attr_names_map, any_map);
+    }),
+            py::arg("ov_type_name"),
+            py::arg("fw_type_name"),
+            py::arg("in_names_vec"),
+            py::arg("out_names_vec"),
+            py::arg("attr_names_map") = std::map<std::string, std::string>(),
+            py::arg("attr_values_map") = std::map<std::string, py::object>());
+}
diff --git a/src/bindings/python/src/pyopenvino/frontend/paddle/extension.hpp b/src/bindings/python/src/pyopenvino/frontend/paddle/extension.hpp
index 533081e0d15291..4c322ec0358f00 100644
--- a/src/bindings/python/src/pyopenvino/frontend/paddle/extension.hpp
+++ b/src/bindings/python/src/pyopenvino/frontend/paddle/extension.hpp
@@ -9,3 +9,4 @@
 namespace py = pybind11;
 
 void regclass_frontend_paddle_ConversionExtension(py::module m);
+void regclass_frontend_paddle_OpExtension(py::module m);
diff --git a/src/bindings/python/src/pyopenvino/frontend/paddle/py_module.cpp b/src/bindings/python/src/pyopenvino/frontend/paddle/py_module.cpp
index 880bd36aeec414..f63cf7b33b13cd 100644
--- a/src/bindings/python/src/pyopenvino/frontend/paddle/py_module.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/paddle/py_module.cpp
@@ -12,4 +12,5 @@ namespace py = pybind11;
 
 PYBIND11_MODULE(py_paddle_frontend, m) {
     regclass_frontend_paddle_ConversionExtension(m);
+    regclass_frontend_paddle_OpExtension(m);
 }
diff --git a/src/core/include/openvino/core/op_extension.hpp b/src/core/include/openvino/core/op_extension.hpp
index b81aab87305591..e9a756ea2d32c1 100644
--- a/src/core/include/openvino/core/op_extension.hpp
+++ b/src/core/include/openvino/core/op_extension.hpp
@@ -60,6 +60,7 @@ namespace detail {
     static auto collect_attached_extensions_##FRAMEWORK(ov::Any)->void {}
 
 OV_COLLECT_ATTACHED_EXTENSIONS(onnx)
+OV_COLLECT_ATTACHED_EXTENSIONS(paddle)
 OV_COLLECT_ATTACHED_EXTENSIONS(tensorflow)
 }  // namespace detail
 
@@ -95,6 +96,7 @@ class OpExtension : public BaseOpExtension {
     std::vector<ov::Extension::Ptr> get_attached_extensions() const override {
         std::vector<ov::Extension::Ptr> res;
         detail::collect_attached_extensions_onnx<T>(res);
+        detail::collect_attached_extensions_paddle<T>(res);
         detail::collect_attached_extensions_tensorflow<T>(res);
         return res;
     }
diff --git a/src/frontends/common/include/openvino/frontend/extension/op.hpp b/src/frontends/common/include/openvino/frontend/extension/op.hpp
index 1abc162c3bcff4..27ed4e0c7ecfba 100644
--- a/src/frontends/common/include/openvino/frontend/extension/op.hpp
+++ b/src/frontends/common/include/openvino/frontend/extension/op.hpp
@@ -117,10 +117,25 @@ class OpExtensionBase : public BaseConversionType {
                     const std::map<std::string, ov::Any>& attr_values_map = {})
         : OpExtensionBase(OVOpType::get_type_info_static().name, attr_names_map, attr_values_map) {}
 
-    // Maps op with a given type in FW and OV type given in template parameter
     OpExtensionBase(const std::string& fw_type_name,
                     const std::map<std::string, size_t>& attr_names_map,
                     const std::map<std::string, ov::Any>& attr_values_map = {});
+
+    OpExtensionBase(const std::vector<std::string>& in_names_vec,
+                    const std::vector<std::string>& out_names_vec,
+                    const std::map<std::string, std::string>& attr_names_map = {},
+                    const std::map<std::string, ov::Any>& attr_values_map = {})
+        : OpExtensionBase(OVOpType::get_type_info_static().name,
+                          in_names_vec,
+                          out_names_vec,
+                          attr_names_map,
+                          attr_values_map) {}
+
+    OpExtensionBase(const std::string& fw_type_name,
+                    const std::vector<std::string>& in_names_vec,
+                    const std::vector<std::string>& out_names_vec,
+                    const std::map<std::string, std::string>& attr_names_map = {},
+                    const std::map<std::string, ov::Any>& attr_values_map = {});
 };
 
 template <typename BaseConversionType>
@@ -146,11 +161,29 @@ class OpExtensionBase<BaseConversionType, void> : public BaseConversionType {
                              const std::map<std::string, ov::Any>& attr_values_map = {})
         : OpExtensionBase(fw_ov_type_name, fw_ov_type_name, attr_names_map, attr_values_map) {}
 
-    // Maps op with a given type in FW and specified OV type given in template parameter
     OpExtensionBase(const std::string& ov_type_name,
                     const std::string& fw_type_name,
                     const std::map<std::string, size_t>& attr_names_map,
                     const std::map<std::string, ov::Any>& attr_values_map = {});
+
+    OpExtensionBase(const std::string& fw_ov_type_name,
+                    const std::vector<std::string>& in_names_vec,
+                    const std::vector<std::string>& out_names_vec,
+                    const std::map<std::string, std::string>& attr_names_map = {},
+                    const std::map<std::string, ov::Any>& attr_values_map = {})
+        : OpExtensionBase(fw_ov_type_name,
+                          fw_ov_type_name,
+                          in_names_vec,
+                          out_names_vec,
+                          attr_names_map,
+                          attr_values_map) {}
+
+    explicit OpExtensionBase(const std::string& ov_type_name,
+                             const std::string& fw_type_name,
+                             const std::vector<std::string>& in_names_vec,
+                             const std::vector<std::string>& out_names_vec,
+                             const std::map<std::string, std::string>& attr_names_map = {},
+                             const std::map<std::string, ov::Any>& attr_values_map = {});
 };
 
 class FWVisitor : public ov::AttributeVisitor {
@@ -218,6 +251,54 @@ class OpConversionFunction {
     std::map<std::string, ov::Any> m_attr_values_map;
 };
 
+class OpConversionFunctionNamed {
+public:
+    explicit OpConversionFunctionNamed(const std::function<std::shared_ptr<ov::Node>()>& op_creator,
+                                       const std::vector<std::string>& in_names_vec,
+                                       const std::vector<std::string>& out_names_vec,
+                                       const std::map<std::string, std::string>& attr_names_map = {},
+                                       const std::map<std::string, ov::Any>& attr_values_map = {})
+        : m_op_creator(op_creator),
+          m_in_names_vec(in_names_vec),
+          m_out_names_vec(out_names_vec),
+          m_attr_names_map(attr_names_map),
+          m_attr_values_map(attr_values_map) {}
+
+    std::map<std::string, OutputVector> operator()(const NodeContext& context) {
+        auto node = m_op_creator();
+
+        std::vector<Output<Node>> inputs;
+        for (const auto& name : m_in_names_vec) {
+            for (size_t i = 0; i < context.get_input_size(name); ++i) {
+                inputs.push_back(context.get_input(name, static_cast<int>(i)));
+            }
+        }
+
+        node->set_arguments(inputs);
+        FWVisitor fw_visitor(context, m_attr_names_map, m_attr_values_map);
+        node->visit_attributes(fw_visitor);
+        node->validate_and_infer_types();
+        std::map<std::string, OutputVector> out;
+        OPENVINO_ASSERT(m_out_names_vec.size() == node->get_output_size(),
+                        "each output should has a name, names number: ",
+                        m_out_names_vec.size(),
+                        ", output size: ",
+                        node->get_output_size());
+        int i = 0;
+        for (const auto& name : m_out_names_vec) {
+            out[name].emplace_back(node->output(i++));
+        }
+        return out;
+    }
+
+private:
+    std::function<std::shared_ptr<ov::Node>()> m_op_creator;
+    std::vector<std::string> m_in_names_vec;
+    std::vector<std::string> m_out_names_vec;
+    std::map<std::string, std::string> m_attr_names_map;
+    std::map<std::string, ov::Any> m_attr_values_map;
+};
+
 class FWVisitorInputAttributes : public ov::AttributeVisitor {
 public:
     explicit FWVisitorInputAttributes(const NodeContext& context,
@@ -320,6 +401,23 @@ OpExtensionBase<BaseConversionType, void>::OpExtensionBase(const std::string& ov
                              attr_names_map,
                              attr_values_map)) {}
 
+template <typename BaseConversionType>
+OpExtensionBase<BaseConversionType, void>::OpExtensionBase(const std::string& ov_type_name,
+                                                           const std::string& fw_type_name,
+                                                           const std::vector<std::string>& in_names_vec,
+                                                           const std::vector<std::string>& out_names_vec,
+                                                           const std::map<std::string, std::string>& attr_names_map,
+                                                           const std::map<std::string, ov::Any>& attr_values_map)
+    : BaseConversionType(fw_type_name,
+                         OpConversionFunctionNamed(
+                             [ov_type_name]() -> std::shared_ptr<ov::Node> {
+                                 return create_ov_node_by_name(ov_type_name);
+                             },
+                             in_names_vec,
+                             out_names_vec,
+                             attr_names_map,
+                             attr_values_map)) {}
+
 template <typename BaseConversionType, typename OVOpType>
 OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string& fw_type_name,
                                                                const std::map<std::string, std::string>& attr_names_map,
@@ -344,29 +442,72 @@ OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string
                              attr_names_map,
                              attr_values_map)) {}
 
+template <typename BaseConversionType, typename OVOpType>
+OpExtensionBase<BaseConversionType, OVOpType>::OpExtensionBase(const std::string& fw_type_name,
+                                                               const std::vector<std::string>& in_names_vec,
+                                                               const std::vector<std::string>& out_names_vec,
+                                                               const std::map<std::string, std::string>& attr_names_map,
+                                                               const std::map<std::string, ov::Any>& attr_values_map)
+    : BaseConversionType(fw_type_name,
+                         OpConversionFunctionNamed(
+                             []() {
+                                 return std::make_shared<OVOpType>();
+                             },
+                             in_names_vec,
+                             out_names_vec,
+                             attr_names_map,
+                             attr_values_map)) {}
+
 template <typename OVOpType = void>
 using OpExtension = ov::frontend::OpExtensionBase<ov::frontend::ConversionExtension, OVOpType>;
 
+#define FRONTEND_EXPAND(X)  X
+#define FRONTEND_CAT_(x, y) x##y
+#define FRONTEND_CAT(x, y)  FRONTEND_CAT_(x, y)
+// extract common attribute and values
+#define GEN_VAR_COMMON(...)                     \
+    auto params = make_spec_tuple(__VA_ARGS__); \
+    const auto& name = std::get<0>(params);     \
+    const auto& attr_mp = std::get<1>(params);  \
+    const auto& val_mp = std::get<2>(params);
+// extract paddle specific param + common values
+#define GEN_VAR_PADDLE(in_names, out_names, ...)             \
+    const std::vector<std::string> in_names_vec(in_names);   \
+    const std::vector<std::string> out_names_vec(out_names); \
+    GEN_VAR_COMMON(__VA_ARGS__)
+// make common except paddle OpExtension
+#define MAKE_MAP_COMMON(FRAMEWORK, ...)                                                          \
+    GEN_VAR_COMMON(__VA_ARGS__)                                                                  \
+    if (!name.empty())                                                                           \
+        return std::make_shared<ov::frontend::FRAMEWORK::OpExtension<T>>(name, attr_mp, val_mp); \
+    return std::make_shared<ov::frontend::FRAMEWORK::OpExtension<T>>(attr_mp, val_mp);
+#define MAKE_MAP_onnx(...)            MAKE_MAP_COMMON(onnx, __VA_ARGS__)
+#define MAKE_MAP_tensorflow(...)      MAKE_MAP_COMMON(tensorflow, __VA_ARGS__)
+#define MAKE_MAP_tensorflow_lite(...) MAKE_MAP_COMMON(tensorflow_lite, __VA_ARGS__)
+// make paddle OpExtension
+#define MAKE_MAP_paddle(...)                                                         \
+    FRONTEND_EXPAND(GEN_VAR_PADDLE(__VA_ARGS__))                                     \
+    if (!name.empty())                                                               \
+        return std::make_shared<ov::frontend::paddle::OpExtension<T>>(name,          \
+                                                                      in_names_vec,  \
+                                                                      out_names_vec, \
+                                                                      attr_mp,       \
+                                                                      val_mp);       \
+    return std::make_shared<ov::frontend::paddle::OpExtension<T>>(in_names_vec, out_names_vec, attr_mp, val_mp);
+
 // Per each FRAMEWORK this macro can be used once in one operation class definition
 // It defines a member inline function that creates required extension.
-#define OPENVINO_FRAMEWORK_MAP(FRAMEWORK, ...)                                                           \
-    template <typename T>                                                                                \
-    struct __openvino_framework_map_helper_##FRAMEWORK {                                                 \
-        static auto get() -> std::shared_ptr<ov::frontend::FRAMEWORK::OpExtension<T>> {                  \
-            auto make_spec_tuple = [](const std::string& s = "",                                         \
-                                      const std::map<std::string, std::string>& attr_mp = {},            \
-                                      const std::map<std::string, ov::Any>& val_mp = {}) {               \
-                return std::make_tuple(s, attr_mp, val_mp);                                              \
-            };                                                                                           \
-            auto params = make_spec_tuple(__VA_ARGS__);                                                  \
-            const auto& name = std::get<0>(params);                                                      \
-            const auto& attr_mp = std::get<1>(params);                                                   \
-            const auto& val_mp = std::get<2>(params);                                                    \
-            if (!name.empty())                                                                           \
-                return std::make_shared<ov::frontend::FRAMEWORK::OpExtension<T>>(name, attr_mp, val_mp); \
-            return std::make_shared<ov::frontend::FRAMEWORK::OpExtension<T>>(attr_mp, val_mp);           \
-        }                                                                                                \
+#define OPENVINO_FRAMEWORK_MAP(FRAMEWORK, ...)                                                \
+    template <typename T>                                                                     \
+    struct __openvino_framework_map_helper_##FRAMEWORK {                                      \
+        static auto get() -> std::shared_ptr<ov::frontend::FRAMEWORK::OpExtension<T>> {       \
+            auto make_spec_tuple = [](const std::string& s = "",                              \
+                                      const std::map<std::string, std::string>& attr_mp = {}, \
+                                      const std::map<std::string, ov::Any>& val_mp = {}) {    \
+                return std::make_tuple(s, attr_mp, val_mp);                                   \
+            };                                                                                \
+            FRONTEND_CAT(MAKE_MAP_, FRAMEWORK)(__VA_ARGS__)                                   \
+        }                                                                                     \
     };
-
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/paddle/include/openvino/frontend/paddle/extension/op.hpp b/src/frontends/paddle/include/openvino/frontend/paddle/extension/op.hpp
new file mode 100644
index 00000000000000..68cea85c19cc44
--- /dev/null
+++ b/src/frontends/paddle/include/openvino/frontend/paddle/extension/op.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "openvino/frontend/extension/op.hpp"
+#include "openvino/frontend/paddle/extension/conversion.hpp"
+
+namespace ov {
+namespace frontend {
+namespace paddle {
+
+template <typename OVOpType = void>
+using OpExtension = ov::frontend::OpExtensionBase<ConversionExtension, OVOpType>;
+
+}  // namespace paddle
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/paddle/tests/op_extension.cpp b/src/frontends/paddle/tests/op_extension.cpp
new file mode 100644
index 00000000000000..6d0efe03d4db78
--- /dev/null
+++ b/src/frontends/paddle/tests/op_extension.cpp
@@ -0,0 +1,181 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "op_extension.hpp"
+
+#include "openvino/frontend/extension/op.hpp"
+#include "openvino/frontend/paddle/extension/op.hpp"
+#include "openvino/frontend/paddle/frontend.hpp"
+#include "openvino/runtime/core.hpp"
+#include "paddle_utils.hpp"
+#include "so_extension.hpp"
+#include "utils.hpp"
+
+using namespace ov::frontend;
+
+class Relu1 : public Relu {
+public:
+    OPENVINO_OP("relu");
+    OPENVINO_FRAMEWORK_MAP(paddle, {"X"}, {"Out"})
+};
+
+class Relu2 : public Relu {
+public:
+    OPENVINO_FRAMEWORK_MAP(paddle, {"X"}, {"Out"}, "CustomRelu_2")
+};
+
+class Relu3 : public Relu {
+public:
+    OPENVINO_FRAMEWORK_MAP(paddle,
+                           {"X"},
+                           {"Out"},
+                           "CustomRelu_3",
+                           {{"ov_attribute_1", "fw_attribute_1"}, {"ov_attribute_2", "fw_attribute_2"}})
+};
+
+class Relu4 : public Relu {
+public:
+    OPENVINO_FRAMEWORK_MAP(paddle,
+                           {"X"},
+                           {"Out"},
+                           "CustomRelu_4",
+                           {{"ov_attribute_1", "fw_attribute_1"}, {"ov_attribute_2", "fw_attribute_2"}},
+                           {
+                               {"ov_attribute_str", "string"},
+                               {"ov_attribute_int", 4},
+                               {"ov_attribute_bool", true},
+                               {"ov_attribute_float", 4.f},
+                               {"ov_attribute_vec_string", std::vector<std::string>{"str1", "str2", "str3"}},
+                               {"ov_attribute_vec_int", std::vector<int>{1, 2, 3, 4, 5, 6, 7}},
+                               {"ov_attribute_vec_bool", std::vector<bool>{true, false, true}},
+                               {"ov_attribute_vec_float", std::vector<float>{1., 2., 3., 4., 5., 6., 7.}},
+                           })
+};
+
+static OpExtensionFEParam getTestDataOpExtensionViaUserClass() {
+    OpExtensionFEParam res;
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
+    res.m_modelName = "relu/relu.pdmodel";
+    // use core OpExtension
+    res.m_extensions = std::vector<std::shared_ptr<ov::Extension>>{std::make_shared<ov::OpExtension<Relu1>>(),
+                                                                   std::make_shared<ov::OpExtension<Relu2>>(),
+                                                                   std::make_shared<ov::OpExtension<Relu3>>(),
+                                                                   std::make_shared<ov::OpExtension<Relu4>>()};
+    return res;
+}
+
+static OpExtensionFEParam getTestDataOpExtensionViaPaddleConstructor() {
+    OpExtensionFEParam res;
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
+    res.m_modelName = "relu/relu.pdmodel";
+    // use ov::frontend::paddle OpExtension
+    res.m_extensions = std::vector<std::shared_ptr<ov::Extension>>{
+        std::make_shared<ov::frontend::paddle::OpExtension<>>("CustomRelu_5",
+                                                              std::vector<std::string>{"X"},
+                                                              std::vector<std::string>{"Out"}),
+        std::make_shared<ov::frontend::paddle::OpExtension<>>("ov_CustomRelu_6",
+                                                              "fw_CustomRelu_6",
+                                                              std::vector<std::string>{"X"},
+                                                              std::vector<std::string>{"Out"}),
+        std::make_shared<ov::frontend::paddle::OpExtension<>>(
+            "ov_CustomRelu_7",
+            "fw_CustomRelu_7",
+            std::vector<std::string>{"X"},
+            std::vector<std::string>{"Out"},
+            std::map<std::string, std::string>{{"ov_attribute_1", "fw_attribute_1"},
+                                               {"ov_attribute_2", "fw_attribute_2"}}),
+        std::make_shared<ov::frontend::paddle::OpExtension<>>(
+            "ov_CustomRelu_8",
+            "fw_CustomRelu_8",
+            std::vector<std::string>{"X"},
+            std::vector<std::string>{"Out"},
+            std::map<std::string, std::string>{{"ov_attribute_1", "fw_attribute_1"},
+                                               {"ov_attribute_2", "fw_attribute_2"}},
+            std::map<std::string, ov::Any>{
+                {"ov_attribute_str", "string"},
+                {"ov_attribute_int", 4},
+                {"ov_attribute_bool", true},
+                {"ov_attribute_float", 4.f},
+                {"ov_attribute_vec_string", std::vector<std::string>{"str1", "str2", "str3"}},
+                {"ov_attribute_vec_int", std::vector<int>{1, 2, 3, 4, 5, 6, 7}},
+                {"ov_attribute_vec_bool", std::vector<bool>{true, false, true}},
+                {"ov_attribute_vec_float", std::vector<float>{1., 2., 3., 4., 5., 6., 7.}},
+            })};
+    return res;
+}
+
+static OpExtensionFEParam getTestDataOpExtensionViaCommonConstructor() {
+    OpExtensionFEParam res;
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
+    res.m_modelName = "relu/relu.pdmodel";
+    // use ov::frontend::OpExtension
+    res.m_extensions = std::vector<std::shared_ptr<ov::Extension>>{
+        std::make_shared<ov::frontend::OpExtension<>>("CustomRelu_9",
+                                                      std::vector<std::string>{"X"},
+                                                      std::vector<std::string>{"Out"}),
+        std::make_shared<ov::frontend::OpExtension<>>("ov_CustomRelu_10",
+                                                      "fw_CustomRelu_10",
+                                                      std::vector<std::string>{"X"},
+                                                      std::vector<std::string>{"Out"}),
+        std::make_shared<ov::frontend::OpExtension<>>(
+            "ov_CustomRelu_11",
+            "fw_CustomRelu_11",
+            std::vector<std::string>{"X"},
+            std::vector<std::string>{"Out"},
+            std::map<std::string, std::string>{{"ov_attribute_1", "fw_attribute_1"},
+                                               {"ov_attribute_2", "fw_attribute_2"}}),
+        std::make_shared<ov::frontend::OpExtension<>>(
+            "ov_CustomRelu_12",
+            "fw_CustomRelu_12",
+            std::vector<std::string>{"X"},
+            std::vector<std::string>{"Out"},
+            std::map<std::string, std::string>{{"ov_attribute_1", "fw_attribute_1"},
+                                               {"ov_attribute_2", "fw_attribute_2"}},
+            std::map<std::string, ov::Any>{
+                {"ov_attribute_str", "string"},
+                {"ov_attribute_int", 4},
+                {"ov_attribute_bool", true},
+                {"ov_attribute_float", 4.f},
+                {"ov_attribute_vec_string", std::vector<std::string>{"str1", "str2", "str3"}},
+                {"ov_attribute_vec_int", std::vector<int>{1, 2, 3, 4, 5, 6, 7}},
+                {"ov_attribute_vec_bool", std::vector<bool>{true, false, true}},
+                {"ov_attribute_vec_float", std::vector<float>{1., 2., 3., 4., 5., 6., 7.}},
+            })};
+    return res;
+}
+
+INSTANTIATE_TEST_SUITE_P(PaddleOpExtensionTestViaUserClass,
+                         FrontEndOpExtensionTest,
+                         ::testing::Values(getTestDataOpExtensionViaUserClass()),
+                         FrontEndOpExtensionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(PaddleOpExtensionViaPaddleConstructor,
+                         FrontEndOpExtensionTest,
+                         ::testing::Values(getTestDataOpExtensionViaPaddleConstructor()),
+                         FrontEndOpExtensionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(PaddleOpExtensionViaCommonConstructor,
+                         FrontEndOpExtensionTest,
+                         ::testing::Values(getTestDataOpExtensionViaCommonConstructor()),
+                         FrontEndOpExtensionTest::getTestCaseName);
+
+TEST(FrontEndOpExtensionTest, paddle_opextension_relu) {
+    ov::Core core;
+    const auto extensions = std::vector<std::shared_ptr<ov::Extension>>{std::make_shared<ov::OpExtension<Relu1>>()};
+    core.add_extension(extensions);
+    std::string m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
+    std::string m_modelName = "relu/relu.pdmodel";
+    auto model = core.read_model(FrontEndTestUtils::make_model_path(m_modelsPath + m_modelName));
+    bool has_relu = false;
+    for (const auto& op : model->get_ops()) {
+        std::string name = op->get_type_info().name;
+        std::string version = op->get_type_info().version_id;
+        if (name == "relu" && version == "extension")
+            has_relu = true;
+    }
+    EXPECT_TRUE(has_relu);
+}
diff --git a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
index 1910fabee6a7db..a397af1345eea5 100644
--- a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
+++ b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp
@@ -17,9 +17,11 @@
 
 #ifdef ENABLE_OV_PADDLE_FRONTEND
 #    include <openvino/frontend/paddle/extension/conversion.hpp>
-#    define PADDLE_EXT                                                                                        \
-        std::make_shared<ov::frontend::paddle::ConversionExtension>("NewCustomOp_4", CustomTranslatorPaddle), \
-            std::make_shared<ov::frontend::paddle::ConversionExtension>("relu", ReluToSwishTranslatorPDPD),
+#    define PADDLE_EXT                                                                                            \
+        std::make_shared<ov::frontend::paddle::ConversionExtension>("NewCustomOp_4", CustomTranslatorPaddle),     \
+            std::make_shared<ov::frontend::paddle::ConversionExtension>("relu", ReluToSwishTranslatorPDPD),       \
+            std::make_shared<ov::frontend::paddle::ConversionExtension>("NewCustomOp_4", CustomTranslatorPaddle), \
+            std::make_shared<ov::frontend::paddle::ConversionExtension>("relu6", Relu6ToReluTranslatorPaddle),
 #else
 #    define PADDLE_EXT
 #endif
@@ -71,6 +73,13 @@ std::map<std::string, ov::OutputVector> CustomTranslatorPaddle(const ov::fronten
     return std::map<std::string, ov::OutputVector>();
 }
 
+std::map<std::string, ov::OutputVector> Relu6ToReluTranslatorPaddle(const ov::frontend::NodeContext& node) {
+    auto relu = std::make_shared<ov::opset8::Relu>(node.get_input("X"));
+    std::map<std::string, ov::OutputVector> ret;
+    ret["Out"] = {relu};
+    return ret;
+}
+
 class CustomElu : public ov::op::Op {
 public:
     OPENVINO_OP("CustomElu");
diff --git a/tests/layer_tests/py_frontend_tests/test_paddle_frontend.py b/tests/layer_tests/py_frontend_tests/test_paddle_frontend.py
new file mode 100644
index 00000000000000..df5bdae007b7e2
--- /dev/null
+++ b/tests/layer_tests/py_frontend_tests/test_paddle_frontend.py
@@ -0,0 +1,170 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+import os
+import glob
+import re
+
+import paddle
+from paddle.jit import to_static
+from paddle.static import InputSpec
+import pytest
+
+import sys
+print(sys.path)
+
+from openvino.frontend import FrontEndManager
+from openvino.runtime import shutdown
+
+PADDLE_FRONTEND_NAME = "paddle"
+paddle_relu6_model_basename = "relu6"
+paddle_relu6_model_filename = paddle_relu6_model_basename + ".pdmodel"
+paddle_concat_model_basename = "concat"
+paddle_concat_model_filename = paddle_concat_model_basename + ".pdmodel"
+fem = FrontEndManager()
+
+
+def skip_if_paddle_frontend_is_disabled():
+    front_ends = fem.get_available_front_ends()
+    if PADDLE_FRONTEND_NAME not in front_ends:
+        pytest.skip()
+
+
+def create_paddle_model():
+    @to_static()
+    def test(x):
+        return paddle.nn.functional.relu6(x)
+    x_spec = InputSpec(shape=[None, 3], dtype="float32", name="x")
+    paddle.jit.save(test, path=paddle_relu6_model_basename, input_spec=[x_spec, ])
+
+
+def create_concat_model():
+    @to_static()
+    def test(x, y):
+        return paddle.concat([x, y], axis=0)
+    x_spec = InputSpec(shape=[None, 3], dtype="float32", name="x")
+    y_spec = InputSpec(shape=[None, 3], dtype="float32", name="y")
+    paddle.jit.save(test, path=paddle_concat_model_basename, input_spec=[x_spec, y_spec])
+
+
+def setup_module():
+    create_paddle_model()
+    create_concat_model()
+
+
+def teardown_module():
+    os.remove(paddle_relu6_model_filename)
+    os.remove(paddle_concat_model_filename)
+    shutdown()
+
+
+def test_paddle_conversion_extension():
+    skip_if_paddle_frontend_is_disabled()
+
+    # use specific (openvino.frontend.onnx) import here
+    from openvino.frontend.paddle import ConversionExtension
+    from openvino.frontend import NodeContext
+    import openvino.runtime.opset8 as ops
+
+    fe = fem.load_by_model(paddle_relu6_model_filename)
+    assert fe
+    assert fe.get_name() == "paddle"
+
+    invoked = False
+
+    def custom_converter(node: NodeContext):
+        nonlocal invoked
+        invoked = True
+        x = node.get_input("X")
+        threshold = node.get_attribute("threshold")
+        add = ops.clamp(x, 0.0, threshold)
+        return {"Out": [add.output(0)]}
+
+    fe.add_extension(ConversionExtension("relu6", custom_converter))
+    input_model = fe.load(paddle_relu6_model_filename)
+    assert input_model
+    model = fe.convert(input_model)
+    assert model
+    assert invoked
+
+
+def test_op_extension_via_paddle_extension_set_attrs_values():
+    skip_if_paddle_frontend_is_disabled()
+
+    # use specific (openvino.frontend.paddle) import here
+    from openvino.frontend.paddle import OpExtension
+    from openvino.runtime import Core
+
+    ie = Core()
+
+    # check the model is valid
+    model = ie.read_model(paddle_relu6_model_filename)
+    assert model
+
+    # add extensions
+    ie.add_extension(OpExtension("Clamp", "relu6", ["X"], ["Out"], {}, {"min": 0.0, "max": 6.0}))
+
+    model = ie.read_model(paddle_relu6_model_filename)
+    assert model
+
+
+def test_op_extension_via_frontend_extension_set_attrs_values():
+    skip_if_paddle_frontend_is_disabled()
+
+    # use common (openvino.frontend) import here
+    from openvino.frontend import OpExtension
+    from openvino.runtime import Core
+
+    ie = Core()
+    # check the model is valid
+    model = ie.read_model(paddle_relu6_model_filename)
+    assert model
+
+    # add extensions
+    ie.add_extension(OpExtension("Clamp", "relu6", ["X"], ["Out"], {}, {"min": 0.0, "max": 6.0}))
+
+    model = ie.read_model(paddle_relu6_model_filename)
+    assert model
+
+
+def get_builtin_extensions_path():
+    base_paths = [Path(__file__).parent.parent.parent.parent]
+    repo_dir = os.environ.get("REPO_DIR")
+    if repo_dir:
+        base_paths.append(repo_dir)
+
+    for base_path in base_paths:
+        paths = glob.glob(os.path.join(base_path, "bin", "*", "*", "*test_builtin_extensions*"))
+        for path in paths:
+            if re.search(r"(lib)?test_builtin_extensions.?\.(dll|so)", path):
+                return path
+    raise RuntimeError("Unable to find test_builtin_extensions")
+
+
+def test_so_extension_via_frontend_convert_input_model():
+    skip_if_paddle_frontend_is_disabled()
+
+    def load_model():
+        fe = fem.load_by_framework(framework=PADDLE_FRONTEND_NAME)
+        fe.add_extension(get_builtin_extensions_path())
+        in_model = fe.load(paddle_relu6_model_filename)
+        return fe.convert(in_model)
+
+    model = load_model()
+
+    assert any(op.get_type_name() == "Relu" for op in model.get_ops())
+    assert all(op.get_type_name() != "Clamp" for op in model.get_ops())
+
+
+def test_so_extension_via_frontend_decode_input_model():
+    skip_if_paddle_frontend_is_disabled()
+
+    def load_decoded_model():
+        fe = fem.load_by_framework(framework=PADDLE_FRONTEND_NAME)
+        fe.add_extension(get_builtin_extensions_path())
+        in_model = fe.load(paddle_relu6_model_filename)
+        return fe.decode(in_model)
+
+    decoded_model = load_decoded_model()
+    assert decoded_model
diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt
index 10c0d6fcfccf8e..1fa23c01b70c4c 100644
--- a/tests/layer_tests/requirements.txt
+++ b/tests/layer_tests/requirements.txt
@@ -2,5 +2,6 @@ requests>=2.25.1
 numpy>=1.19.2
 torch
 torchvision
+paddlepaddle
 pytest
 tensorflow-addons

From 05ab0f32d708842b5a58f6f85af33fd4d2a5a1bf Mon Sep 17 00:00:00 2001
From: Tingqian Li <tingqian.li@intel.com>
Date: Wed, 29 Mar 2023 16:27:08 +0800
Subject: [PATCH 152/296] [CPU] Simple fix of redundant const-weight reordering
 for brgconv node in dynamic model (#16305)

---
 src/plugins/intel_cpu/src/graph.cpp           | 18 ++--
 src/plugins/intel_cpu/src/node.cpp            | 45 ++++++++++
 src/plugins/intel_cpu/src/node.h              | 10 +++
 src/plugins/intel_cpu/src/nodes/conv.cpp      | 82 +++++++++++++------
 src/plugins/intel_cpu/src/nodes/conv.h        |  4 +-
 .../intel_cpu/src/nodes/fullyconnected.cpp    | 45 ----------
 .../intel_cpu/src/nodes/fullyconnected.h      |  6 --
 .../src/utils/debug_capabilities.cpp          | 47 +++++++----
 ...ntwise_branch_selection_transformation.cpp |  4 -
 .../snippets/conv_eltwise.cpp                 |  4 +-
 .../fake_quantize_decomposition_test.cpp      |  4 +-
 .../src/concat_const_inplace.cpp              |  4 +-
 12 files changed, 163 insertions(+), 110 deletions(-)

diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index 0c275fc183bc19..3c1b32b8ca075b 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -450,11 +450,19 @@ void Graph::InitDescriptors() {
         node->filterSupportedPrimitiveDescriptors();
 
 #ifdef CPU_DEBUG_CAPS
-        DEBUG_LOG("==================");
-        for (auto & pd : node->getSupportedPrimitiveDescriptors())
-            DEBUG_LOG("#", node->getExecIndex(),
-                      " ", node->getName(),
-                      "  SupportedPrimitiveDescriptor:\n", pd);
+        const auto& SPDs = node->getSupportedPrimitiveDescriptors();
+        for (int i = 0; i < SPDs.size(); i++) {
+            DEBUG_LOG("#",
+                      node->getExecIndex(),
+                      " ",
+                      node->getName(),
+                      "  SupportedPrimitiveDescriptors [",
+                      i,
+                      "/",
+                      SPDs.size(),
+                      "]: \n",
+                      SPDs[i]);
+        }
 #endif
     }
 
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index f80ed309383447..67e289aa1cb9b2 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -821,6 +821,51 @@ void Node::prepareMemory(dnnl::primitive_desc_iterator& itpd) {
     Node::prepareMemory(intDescs);
 }
 
+MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) {
+    if (!getParentEdgeAt(1)->getParent()->isConstant())
+        IE_THROW() << "Weight input is not const for node " << getName() << ".";
+    auto edgeMem = getParentEdgeAt(1)->getMemoryPtr();
+    if (!edgeMem)
+        IE_THROW() << "Cannot get const weights edgeMem for node " << getName() << ".";
+
+    auto constDnnlMemOutDesc = edgeMem->GetDescWithType<DnnlMemoryDesc>();
+    auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc();
+    weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims());
+    auto create = [&] () {
+        auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc);
+
+        Memory srcMemory{ getEngine() };
+        srcMemory.Create(newSrcDesc, edgeMem->GetData());
+
+        MemoryPtr _ptr = std::make_shared<Memory>(getEngine());
+        _ptr->Create(weightDesc);
+        node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache());
+
+        return _ptr;
+    };
+
+    MemoryPtr ptr;
+    const auto& format = weightDesc->serializeFormat();
+    auto itr = privateWeightCache.find(format);
+    if (privateWeightCache.end() != itr) {
+        ptr = itr->second;
+    } else {
+        auto weightCache = context->getWeightsCache();
+        if (weightCache != nullptr) {
+            const std::string string_hash = getName() + "_" + format
+                                            + "_" + std::to_string(edgeMem->GetSize())
+                                            + "_" + std::to_string(reinterpret_cast<uint64_t>(edgeMem->GetData()));
+
+            ptr = *weightCache->findOrCreate(string_hash, create);
+        } else {
+            ptr = create();
+        }
+        privateWeightCache[format] = ptr;
+    }
+
+    return ptr;
+}
+
 bool Node::isInPlace() {
     if (inplace == InPlaceType::Unknown) {
         auto selected_pd = getSelectedPrimitiveDescriptor();
diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h
index dd78bfd0159b85..d9f242b353d192 100644
--- a/src/plugins/intel_cpu/src/node.h
+++ b/src/plugins/intel_cpu/src/node.h
@@ -619,6 +619,8 @@ class Node {
     void prepareMemory(const std::vector<DnnlMemoryDescPtr>& intDescs);
     void prepareMemory(dnnl::primitive_desc_iterator& itpd);
 
+    MemoryPtr prepareWeightMemory(DnnlMemoryDescPtr weightDesc);
+
     bool isDynamic = false;
 
     bool isInputTensorAtPortEmpty(size_t port) const;
@@ -687,6 +689,14 @@ class Node {
     enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
     ConstantType checkConstant(LOOK look, std::vector<NodePtr>& checkNodes);
 
+    // we cannot rely on per-NUMA weightCache for caching weights because:
+    //   1.it may not exist(in single stream configuration)
+    //   2.it only holds weak references, the life-cycle of cached item
+    //     is still under control of strong references outside of cache.
+    // privateWeightCache is for holding strong references to constant weight
+    // copies of same content with different layouts.
+    std::unordered_map<std::string, MemoryPtr> privateWeightCache;
+
 #ifdef CPU_DEBUG_CAPS
     friend class Verbose;
 #endif
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index a7b8666782522b..94b6481b8cf779 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -57,6 +57,8 @@ struct ConvKey {
     dnnl::primitive_attr attr;
     impl_desc_type implType;
 
+    bool constWeight;
+
     size_t hash() const;
     bool operator==(const ConvKey& rhs) const;
 };
@@ -80,6 +82,7 @@ size_t ConvKey::hash() const {
 
     seed = hash_combine(seed, get_attr_hash(*attr.get()));
     seed = hash_combine(seed, implType);
+    seed = hash_combine(seed, constWeight);
     return seed;
 }
 
@@ -103,7 +106,7 @@ bool ConvKey::operator==(const ConvKey &rhs) const {
     retVal = retVal && paddingL == rhs.paddingL;
     retVal = retVal && paddingR == rhs.paddingR;
 
-    retVal = retVal && *attr.get() == *rhs.attr.get() && implType == rhs.implType;
+    retVal = retVal && *attr.get() == *rhs.attr.get() && implType == rhs.implType && constWeight == rhs.constWeight;
     return retVal;
 }
 
@@ -851,6 +854,14 @@ createDescriptorInternal(const dnnl::engine& engine,
 }
 } // namespace
 
+static memory::data_type deriveWeightDataType(memory::data_type src_dt) {
+    memory::data_type wdt = src_dt;
+    if (one_of(src_dt, memory::data_type::s8, memory::data_type::u8)) {
+        wdt = memory::data_type::s8;
+    }
+    return wdt;
+}
+
 void Convolution::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
                                    const std::vector<MemoryDescPtr>& outputDesc) {
     MemoryDescPtr inpDesc;
@@ -874,12 +885,7 @@ void Convolution::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
     const auto& inDnnlDesc = definedInpMemDesc->getDnnlDesc();
     const auto& outDnnlDesc = definedOutMemDesc->getDnnlDesc();
 
-    memory::data_type dt  = inDnnlDesc.get_data_type();
-    memory::data_type wdt = dt;
-
-    if (one_of(dt, memory::data_type::s8, memory::data_type::u8)) {
-        wdt = memory::data_type::s8;
-    }
+    memory::data_type wdt = deriveWeightDataType(inDnnlDesc.get_data_type());
 
     dnnl::memory::desc weightDnnlDesc(DnnlExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any);
     dnnl::memory::desc biasDnnlDesc;
@@ -1143,6 +1149,11 @@ bool Convolution::isPossibleToSkipInitConfig(const dnnl::primitive_desc &desc) c
 }
 
 std::shared_ptr<MemoryDesc> Convolution::getSrcMemDesc(dnnl::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    if (idx == 1) {
+        // report original plain layout for weight since it needs to be reordered dynamically at runtime
+        return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx),
+                                                      Shape(getInputShapeAtPort(idx).getStaticDims()));
+    }
     auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx);
     if (getInputShapeAtPort(idx).isDynamic()) {
         return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
@@ -1352,10 +1363,17 @@ void Convolution::prepareParams() {
                    paddingL,
                    paddingR,
                    *pAttrLocal,
-                   selected_pd->getImplementationType()};
+                   selected_pd->getImplementationType(),
+                   getParentEdgeAt(1)->getParent()->isConstant()};
 
     auto engine = getEngine();
     auto builder = [&engine](const ConvKey& key) -> executorPtr {
+        // remove the requirement on weight memory layout to let primitive
+        // report the best layout for weight to be reordered dynamically at runtime
+        auto wghDescAny =
+            dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()),
+                               deriveWeightDataType(key.inp0->getDataType()),
+                               memory::format_tag::any);
         auto createDnnlConvDesc = [](const dnnl::engine engine,
                                      const dnnl::memory::desc& srcDesc,
                                      const dnnl::memory::desc& wghDesc,
@@ -1390,7 +1408,7 @@ void Convolution::prepareParams() {
         const auto alg = (key.implType & impl_desc_type::winograd) ? dnnl::algorithm::convolution_winograd : dnnl::algorithm::convolution_direct;
         dnnl::primitive_desc desc = createDnnlConvDesc(engine,
                                                        key.inp0->getDnnlDesc(),
-                                                       key.inp1->getDnnlDesc(),
+                                                       wghDescAny,
                                                        key.out->getDnnlDesc(),
                                                        key.bias,
                                                        key.stride,
@@ -1401,7 +1419,6 @@ void Convolution::prepareParams() {
                                                        key.attr);
 
         auto itpd = desc;
-
         executorPtr execPtr = nullptr;
         while (static_cast<bool>(itpd)) {
             impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
@@ -1412,7 +1429,8 @@ void Convolution::prepareParams() {
                                                                 key.inp0->getDnnlDesc(),
                                                                 key.inp1->getDnnlDesc(),
                                                                 key.out->getDnnlDesc(),
-                                                                engine);
+                                                                engine,
+                                                                key.constWeight);
                 break;
             }
 
@@ -1425,16 +1443,13 @@ void Convolution::prepareParams() {
             auto inDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()),
                                                                                            key.inp0->getDataType(),
                                                                                            memory::format_tag::any);
-            auto wghDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()),
-                                                                                        key.inp1->getDataType(),
-                                                                                        memory::format_tag::any);
             auto outDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()),
                                                                                         key.out->getDataType(),
                                                                                         memory::format_tag::any);
 
             auto reorderConvDesc = createDnnlConvDesc(engine,
                                                       inDesc,
-                                                      wghDesc,
+                                                      wghDescAny,
                                                       outDesc,
                                                       key.bias,
                                                       key.stride,
@@ -1450,13 +1465,15 @@ void Convolution::prepareParams() {
                                                                 key.inp0->getDnnlDesc(),
                                                                 key.inp1->getDnnlDesc(),
                                                                 key.out->getDnnlDesc(),
-                                                                engine);
+                                                                engine,
+                                                                key.constWeight);
             }
         }
 
         return execPtr;
     };
 
+    auto prevExecPtr = execPtr;
     execPtr = nullptr;
     auto cache = context->getParamsCache();
     auto result = cache->getOrCreate(key, builder);
@@ -1465,9 +1482,22 @@ void Convolution::prepareParams() {
 
     if (execPtr) {
         primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
-        primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive();
         primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
 
+        if (key.constWeight) {
+            // const weight preparation/reordering needs to be done once at next execution
+            // when the input weight data is guaranteed to be ready (considering possible const-folding
+            // subgraphs inserted between constant weight node and conv)
+            auto it = primArgs.find(DNNL_ARG_WEIGHTS);
+            if (it == primArgs.end() || !prevExecPtr ||
+                !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) {
+                pendingConstWeightReorder = true;
+            }
+        } else {
+            // non-const weight will be reordered by executor on every exec
+            primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive();
+        }
+
         if (withBiases) {
             primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive();
         }
@@ -1497,12 +1527,14 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo
                                                                 const dnnl::memory::desc& inMemDesc,
                                                                 const dnnl::memory::desc& weightMemDesc,
                                                                 const dnnl::memory::desc& outMemDesc,
-                                                                const dnnl::engine& engine) : DnnlExecutor(pd) {
+                                                                const dnnl::engine& engine,
+                                                                bool constWeight) : DnnlExecutor(pd) {
     if (inMemDesc != getDnnlSrcDesc()) {
         inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)});
     }
 
-    if (weightMemDesc != getDnnlWeightDesc()) {
+    if (!constWeight && weightMemDesc != getDnnlWeightDesc()) {
+        // const weight will be reordered at first execution
         inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)});
     }
 
@@ -1516,6 +1548,11 @@ void Convolution::execute(dnnl::stream strm) {
         IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
     }
 
+    if (pendingConstWeightReorder) {
+        primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive();
+        pendingConstWeightReorder = false;
+    }
+
     execPtr->exec(primArgs, strm);
 }
 
@@ -1630,13 +1667,8 @@ void Convolution::appendZeroPointsArgs() {
     }
 }
 
-// brgconv will be enabled by default:
-// 1, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
-// 2, hw supports avx512+
+// brgconv will be enabled by default when HW supports avx512+
 void Convolution::initTryBrgconvFlag() {
-    if (isDynamicNode())
-        return;
-
     if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
         shouldTryBrgconv = true;
     }
diff --git a/src/plugins/intel_cpu/src/nodes/conv.h b/src/plugins/intel_cpu/src/nodes/conv.h
index fb9385601ca1c5..d0e4c48c1516b3 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.h
+++ b/src/plugins/intel_cpu/src/nodes/conv.h
@@ -94,8 +94,10 @@ class Convolution : public Node {
                                 const dnnl::memory::desc& inMemDesc,
                                 const dnnl::memory::desc& weightMemDesc,
                                 const dnnl::memory::desc& outMemDesc,
-                                const dnnl::engine& engine);
+                                const dnnl::engine& engine,
+                                bool constWeight);
     };
+    bool pendingConstWeightReorder = false;
 
     void prepareParams() override;
     void execute(dnnl::stream strm) override;
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 608ed26ac458d6..7e2181c444c4a5 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -913,51 +913,6 @@ bool FullyConnected::canBeExecutedInConv1x1() const {
     return retVal;
 }
 
-MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) {
-    if (!getParentEdgeAt(1)->getParent()->isConstant())
-        IE_THROW() << "Weight input is not const for node " << getName() << ".";
-    auto blob = getParentEdgeAt(1)->getMemoryPtr();
-    if (!blob)
-        IE_THROW() << "Cannot get const weights blob for node " << getName() << ".";
-
-    auto constDnnlMemOutDesc = blob->GetDescWithType<DnnlMemoryDesc>();
-    auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc();
-    weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims());
-    auto create = [&] () {
-        auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc);
-
-        Memory srcMemory{ getEngine() };
-        srcMemory.Create(newSrcDesc, blob->GetData());
-
-        MemoryPtr _ptr = std::make_shared<Memory>(getEngine());
-        _ptr->Create(weightDesc);
-        node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache());
-
-        return _ptr;
-    };
-
-    MemoryPtr ptr;
-    const auto& format = weightDesc->serializeFormat();
-    auto itr = privateWeightCache.find(format);
-    if (privateWeightCache.end() != itr) {
-        ptr = itr->second;
-    } else {
-        auto weightCache = context->getWeightsCache();
-        if (weightCache != nullptr) {
-            const std::string string_hash = getName() + "_" + format
-                                            + "_" + std::to_string(blob->GetSize())
-                                            + "_" + std::to_string(reinterpret_cast<uint64_t>(blob->GetData()));
-
-            ptr = *weightCache->findOrCreate(string_hash, create);
-        } else {
-            ptr = create();
-        }
-        privateWeightCache[format] = ptr;
-    }
-
-    return ptr;
-}
-
 bool FullyConnected::useSparseWeightsDecompression() {
     // minSparseRate == 1 means that sparse feature is switched off
     if (minSparseRate == 1.f) {
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
index 3f0983f2fc2a77..8add77440fdd2a 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
@@ -83,11 +83,6 @@ class FullyConnected : public Node {
     bool useConv1x1 = false;
     impl_desc_type implementationTypeIP;
     MemoryDescPtr weightDescIP;
-    // when weightCache is not enabled (such as stream=1), brgconv weights may change due to
-    // different shapes. Weights will be cached in privateWeightCache.
-    // When weightCache is enabled, it holds weight ptr reference since weightCache does not hold the
-    // reference
-    std::unordered_map<std::string, MemoryPtr> privateWeightCache;
     dnnl::primitive_attr attr;
 
     static dnnl::convolution_forward::primitive_desc
@@ -99,7 +94,6 @@ class FullyConnected : public Node {
                                     const dnnl::engine& engine);
 
     bool canBeExecutedInConv1x1() const;
-    MemoryPtr prepareWeightMemory(const DnnlMemoryDescPtr weightDesc);
 
     // sparse weights
     bool useSparseWeights = false;
diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
index fb13000708cd74..b791f18cec1958 100644
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
@@ -21,6 +21,17 @@
 namespace ov {
 namespace intel_cpu {
 
+namespace {
+    size_t replace_all(std::string & inout, std::string what, std::string with) {
+        std::size_t count{};
+        for (std::string::size_type pos{}; inout.npos != (pos = inout.find(what.data(), pos, what.length()));
+             pos += with.length(), ++count) {
+            inout.replace(pos, what.length(), with.data(), with.length());
+        }
+        return count;
+    }
+}
+
 DebugLogEnabled::DebugLogEnabled(const char* file, const char* func, int line, const char* name) {
     // check ENV
     const char* p_filters = std::getenv("OV_CPU_DEBUG_LOG");
@@ -96,19 +107,27 @@ std::ostream & operator<<(std::ostream & os, const MemoryDesc& desc) {
 }
 
 std::ostream & operator<<(std::ostream & os, const NodeDesc& desc) {
-    os << "    ImplementationType: " << impl_type_to_string(desc.getImplementationType()) << std::endl;
+    std::stringstream ss;
+    ss << "  " << impl_type_to_string(desc.getImplementationType()) << "(";
+    const char * sep = "";
     for (auto & conf : desc.getConfig().inConfs) {
-        os << "    inConfs: " << *conf.getMemDesc();
-        if (conf.inPlace() >= 0) os << " inPlace:" << conf.inPlace();
-        if (conf.constant()) os << " constant";
-        os << std::endl;
+        ss << sep << *conf.getMemDesc();
+        if (conf.inPlace() >= 0) ss << " inPlace:" << conf.inPlace();
+        if (conf.constant()) ss << " constant";
+        sep = ",";
     }
+    ss << ") -> (";
+    sep = "";
     for (auto & conf : desc.getConfig().outConfs) {
-        os << "    outConfs: " << *conf.getMemDesc();
-        if (conf.inPlace() >= 0) os << " inPlace:" << conf.inPlace();
-        if (conf.constant()) os << " constant";
-        os << std::endl;
+        ss << sep << *conf.getMemDesc();
+        if (conf.inPlace() >= 0) ss << " inPlace:" << conf.inPlace();
+        if (conf.constant()) ss << " constant";
+        sep = ",";
     }
+    ss << ")" << std::endl;
+    auto str = ss.str();
+    replace_all(str, "0 - ?", "?");
+    os << str;
     return os;
 }
 
@@ -137,15 +156,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
         }
         return true;
     };
-    auto replace_all = [](std::string& inout, std::string what, std::string with) {
-        std::size_t count{};
-        for (std::string::size_type pos{};
-            inout.npos != (pos = inout.find(what.data(), pos, what.length()));
-            pos += with.length(), ++count) {
-            inout.replace(pos, what.length(), with.data(), with.length());
-        }
-        return count;
-    };
+
     auto nodeDesc = node.getSelectedPrimitiveDescriptor();
     std::stringstream leftside;
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp
index f95c319d706364..ff0642926ce357 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp
@@ -42,8 +42,6 @@ const std::vector<LayerTestsDefinitions::ElementwiseBranchSelectionTestValues> p
         },
         { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
         {
-            {"Constant", "convolution1"},
-            {"Constant", "convolution2"},
             {"fakeQuantizeBefore1", "convolution1"},
             {"fakeQuantizeBefore2", "convolution2"},
             {"maxPool", "result"}
@@ -75,8 +73,6 @@ const std::vector<LayerTestsDefinitions::ElementwiseBranchSelectionTestValues> p
         },
         { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
         {
-            {"Constant", "convolution1"},
-            {"Constant", "convolution2"},
             {"fakeQuantizeBefore1", "convolution1"},
             {"fakeQuantizeBefore2", "convolution2"},
             {"maxPool", "result"}
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp
index ffc6ef57add7c6..ab0aaf27ef1688 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp
@@ -16,7 +16,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvAdd, ConvEltwise,
         ::testing::Values(convInputShape),
         ::testing::Values(convInputShape),
         ::testing::Values(std::shared_ptr<ov::Node> (std::make_shared<ov::op::v1::Add>())), // non-tokenizable
-        ::testing::Values(6), // num nodes = 6: Convert + Convolution + 4 Reorders on Convs in&outs
+        ::testing::Values(5), // num nodes = 5: Convert + Convolution + 3 Reorders on Convs in&outs
         ::testing::Values(0), // num subgraphs = 0: No subgraph since all ops eltwises fused into Convolution
         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
         ConvEltwise::getTestCaseName);
@@ -26,7 +26,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvMul, ConvEltwise,
         ::testing::Values(convInputShape),
         ::testing::Values(convInputShape),
         ::testing::Values(std::shared_ptr<ov::Node> (std::make_shared<ov::op::v1::Multiply>())), // fully-tokenizable
-        ::testing::Values(7), //num nodes = 7: Convert + Convolution + Subgraph + Reorders
+        ::testing::Values(6), //num nodes = 6: Convert + Convolution + Subgraph + Reorders
         ::testing::Values(1), // num subgraphs = 1: Mul (2 inputs) can't be fused into Conv => Subgraph is created
         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
         ConvEltwise::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp
index a231dd9a595c98..6333c339a3dfcb 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp
@@ -121,8 +121,8 @@ INSTANTIATE_TEST_SUITE_P(
     ::testing::Combine(
         ::testing::ValuesIn(testValuesLegacyFuse),
         ::testing::ValuesIn(operations),
-        // reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + reorder(ABcd16b16a) + Convolution + reorder(nchw)
-        ::testing::Values(std::pair<size_t, size_t>{6, 0}),
+        // reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + Convolution(with internal weight reordering) + reorder(nchw)
+        ::testing::Values(std::pair<size_t, size_t>{5, 0}),
         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
     FakeQuantizeDecompositionTest::getTestCaseName);
 
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp
index ec9eff605426ef..9e4ee465ee5d4b 100644
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp
@@ -71,9 +71,9 @@ namespace {
     TEST_P(ConcatConstantInPlaceTest, smoke_ConcatConstantInPlaceTest_CPU) {
         Run();
         if (this->GetParam() == Precision::BF16)
-            CheckNumberOfNodesWithType(executableNetwork, "Reorder", 4);
-        else
             CheckNumberOfNodesWithType(executableNetwork, "Reorder", 3);
+        else
+            CheckNumberOfNodesWithType(executableNetwork, "Reorder", 2);
     }
 
 INSTANTIATE_TEST_SUITE_P(smoke_ConcatConstantInPlaceTest_CPU, ConcatConstantInPlaceTest,

From 75c62ea320418c302e0571ac6d8a9164994c299e Mon Sep 17 00:00:00 2001
From: Yuan Hu <yuan2.hu@intel.com>
Date: Wed, 29 Mar 2023 17:26:49 +0800
Subject: [PATCH 153/296] [CPU] optimize shape infer of Reshape (#16537)

* add reshape shapeinfer in cpu plugin

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* add squeeze and unsqueeze

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* add precision i8 i64 on test

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* fix code out of bounds risk

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* test performance of this PR

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* fix code issue

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* Revert "test performance of this PR"

This reverts commit f4f9f002de28d03bc1c55c24067f75b74824904c.

* fix reviewer comment

fix throw message
not create ov::shape instance
remove i8 test case

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* fix pytorch layer test failed issue

inputShape(1,0) outpattern(-1) is a valid input

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* fix windows compile issue

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

* fix rebase mistaken

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>

---------

Signed-off-by: Hu Yuan2 <yuan2.hu@intel.com>
---
 src/plugins/intel_cpu/src/nodes/reshape.cpp   | 189 +++++++++++++++++-
 .../single_layer_tests/shape_ops.cpp          |  40 +++-
 2 files changed, 219 insertions(+), 10 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp
index f9cb43ab7a677c..2398140996a7a7 100644
--- a/src/plugins/intel_cpu/src/nodes/reshape.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "reshape.h"
+#include "utils.hpp"
 #include <string>
 #include <dnnl_types.h>
 #include <dnnl_extension_utils.h>
@@ -10,6 +11,7 @@
 #include <ie_ngraph_utils.hpp>
 #include <utils/shape_inference/static_shape.hpp>
 #include <utils/shape_inference/shape_inference.hpp>
+#include "utils/shape_inference/shape_inference_cpu.hpp"
 
 #include "common/cpu_memcpy.h"
 
@@ -34,8 +36,193 @@ bool Reshape::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
+namespace {
+class ReshapeShapeInfer : public ShapeInferEmptyPads {
+public:
+    ReshapeShapeInfer(bool specialZero) : m_specialZero(specialZero) {}
+    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
+                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
+        static constexpr size_t RESHAPE_SRC = 0, RESHAPE_PATTERN = 1;
+        const auto& inputShape = input_shapes[RESHAPE_SRC].get();
+        const size_t inputShapeSize = inputShape.size();
+        const auto memPtr = data_dependency.at(RESHAPE_PATTERN);
+        const auto data = memPtr->GetPtr();
+        // const auto outputPatternSize = shape_size(ov::Shape(memPtr->getStaticDims()));
+        const auto& dims = memPtr->getStaticDims();
+        const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
+        std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
+                                              InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
+                                              data,
+                                              outputPatternSize,
+                                              ov::util::Cast<int64_t>());
+        VectorDims outputShape(outputPatternSize);
+        size_t outputProduct(1);
+        int32_t minusOneIdx = -1;
+        int32_t minusOneCount = 0;
+        for (size_t i = 0; i < outputPatternSize; ++i) {
+            if (outPattern[i] == 0 && m_specialZero && i < inputShapeSize) {
+                outputShape[i] = inputShape[i];
+                outputProduct *= outputShape[i];
+            } else if (outPattern[i] == -1) {
+                minusOneIdx = i;
+                minusOneCount++;
+            } else {
+                outputShape[i] = outPattern[i];
+                outputProduct *= outputShape[i];
+            }
+        }
+        size_t inputProduct(1);
+        for (size_t i = 0; i < inputShapeSize; ++i) {
+            inputProduct *= inputShape[i];
+        }
+        if (outputProduct != 0 && minusOneIdx >= 0) {
+            outputShape[minusOneIdx] = inputProduct / outputProduct;
+            outputProduct *= outputShape[minusOneIdx];
+        }
+        if (minusOneCount > 1  || inputProduct != outputProduct) {
+            IE_THROW(Unexpected) << "[cpu]reshape: the shape of input data conflicts with the reshape pattern";
+        }
+        return {{std::move(outputShape)}, ShapeInferStatus::success};
+    }
+    port_mask_t get_port_mask() const override {
+        return PortMask(1);
+    }
+
+private:
+    bool m_specialZero;
+};
+
+class SqueezeShapeInfer : public ShapeInferEmptyPads {
+public:
+    SqueezeShapeInfer() {}
+    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
+                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
+        static constexpr size_t SQUEEZE_SRC = 0, SQUEEZE_PATTERN = 1;
+        const auto& inputShape = input_shapes[SQUEEZE_SRC].get();
+        const size_t inputShapeSize = inputShape.size();
+        auto itr = data_dependency.find(SQUEEZE_PATTERN);
+        VectorDims outputShape;
+        if (itr != data_dependency.end()) {
+            const auto memPtr = data_dependency.at(SQUEEZE_PATTERN);
+            const auto data = memPtr->GetPtr();
+            const auto& dims = memPtr->getStaticDims();
+            const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
+            std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
+                                                  InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
+                                                  data,
+                                                  outputPatternSize,
+                                                  ov::util::Cast<int64_t>());
+            std::vector<bool> removeMask(inputShapeSize, false);
+            bool existError = false;
+            for (size_t i = 0; i < outputPatternSize; i++) {
+                if (outPattern[i] < 0) {
+                    outPattern[i] = inputShapeSize + outPattern[i];
+                }
+                if (outPattern[i] >= 0 && outPattern[i] < static_cast<int64_t>(inputShapeSize)) {
+                    removeMask[outPattern[i]] = true;
+                } else {
+                    existError = true;
+                    break;
+                }
+            }
+            for (size_t i = 0; i < inputShapeSize; i++) {
+                if (!removeMask[i]) {
+                    outputShape.push_back(inputShape[i]);
+                } else if (inputShape[i] != 1) {
+                    existError = true;
+                    break;
+                }
+            }
+            if (existError) {
+                IE_THROW(Unexpected) << "[cpu]squeeze: the shape of input data conflict with the squeeze pattern";
+            }
+        } else {
+            for (size_t i = 0; i < inputShapeSize; i++) {
+                if (inputShape[i] != 1) {
+                    outputShape.push_back(inputShape[i]);
+                }
+            }
+        }
+        return {{std::move(outputShape)}, ShapeInferStatus::success};
+    }
+    port_mask_t get_port_mask() const override {
+        return PortMask(1);
+    }
+};
+
+class UnsqueezeShapeInfer : public ShapeInferEmptyPads {
+public:
+    UnsqueezeShapeInfer() {}
+    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
+                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
+        static constexpr size_t UNSQUEEZE_SRC = 0, UNSQUEEZE_PATTERN = 1;
+        const auto& inputShape = input_shapes[UNSQUEEZE_SRC].get();
+        const size_t inputShapeSize = inputShape.size();
+        const auto memPtr = data_dependency.at(UNSQUEEZE_PATTERN);
+        const auto data = memPtr->GetPtr();
+        const auto& dims = memPtr->getStaticDims();
+        const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
+        std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
+                                              InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
+                                              data,
+                                              outputPatternSize,
+                                              ov::util::Cast<int64_t>());
+        size_t outputShapeSize = inputShapeSize + outputPatternSize;
+        VectorDims outputShape(outputShapeSize, 0);
+        bool existError = false;
+        for (size_t i = 0; i < outputPatternSize; i++) {
+            if (outPattern[i] < 0) {
+                outPattern[i] = outputShapeSize + outPattern[i];
+            }
+            if (outPattern[i] >= 0 && outPattern[i] < static_cast<int64_t>(outputShapeSize)) {
+                outputShape[outPattern[i]] = 1;
+            } else {
+                existError = true;
+                break;
+            }
+        }
+        for (size_t i = 0, y = 0; i < outputShapeSize; i++) {
+            if (outputShape[i] == 0) {
+                if (y < inputShapeSize) {
+                    outputShape[i] = inputShape[y];
+                    y++;
+                } else {
+                    existError = true;
+                    break;
+                }
+            }
+        }
+        if (existError) {
+            IE_THROW(Unexpected) << "[cpu]unsqueeze: the shape of input data conflicts with the unsqueeze pattern";
+        }
+        return {{std::move(outputShape)}, ShapeInferStatus::success};
+    }
+    port_mask_t get_port_mask() const override {
+        return PortMask(1);
+    }
+};
+
+class ReshapeShapeInferFactory : public ShapeInferFactory {
+public:
+    ReshapeShapeInferFactory(std::shared_ptr<ov::Node> op) : m_op(op) {}
+    ShapeInferPtr makeShapeInfer() const override {
+        if (const auto reshapeOp = ov::as_type_ptr<const ov::op::v1::Reshape>(m_op)) {
+            return std::make_shared<ReshapeShapeInfer>(reshapeOp->get_special_zero());
+        } else if (ov::is_type<ov::op::v0::Squeeze>(m_op)) {
+            return std::make_shared<SqueezeShapeInfer>();
+        } else if (ov::is_type<ov::op::v0::Unsqueeze>(m_op)) {
+            return std::make_shared<UnsqueezeShapeInfer>();
+        } else {
+            IE_THROW(Unexpected) << "[cpu]reshape: " << m_op->get_type_name() << "is not implemented";
+        }
+    }
+private:
+    std::shared_ptr<ov::Node> m_op;
+};
+} // namespace
+
 Reshape::Reshape(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
-        Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) {
+        Node(op, context, ReshapeShapeInferFactory(op)) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
index 8faeb80f634c66..775579fd0ed5e3 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
@@ -45,6 +45,7 @@ using shapeOpsParams = std::tuple<
     ngraph::helpers::InputLayerType,   // second input type
     shapeNodeType,                     // node type
     Precision,                         // precision
+    ngraph::element::Type_t,           // second input precision
     bool>;                             // special zero
 
 class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
@@ -55,7 +56,8 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         shapeNodeType nodeType;
         Precision prc;
         bool specialZero;
-        std::tie(inpDesc, secondType, nodeType, prc, specialZero) = obj.param;
+        element::Type_t tmpSecondInPrc;
+        std::tie(inpDesc, secondType, nodeType, prc, tmpSecondInPrc, specialZero) = obj.param;
 
         std::ostringstream result;
         result << nodeType << "_";
@@ -72,6 +74,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         }
         result << "PRC=" << prc << "_";
         result << "specialZero=" << specialZero;
+        result << "_secondInPrc=" << tmpSecondInPrc;
 
         return result.str();
     }
@@ -84,10 +87,21 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
             const auto& funcInput = funcInputs[i];
             ov::runtime::Tensor tensor;
             if (i == 1) {
-                tensor = ov::runtime::Tensor{ov::element::i32, targetInputStaticShapes[i]};
-                auto inputData = tensor.data<ov::element_type_traits<ov::element::i32>::value_type>();
-                for (size_t j = 0lu; j < data[idx].size(); ++j) {
-                    inputData[j] =  data[idx][j];
+#define RESHAPE_TEST_CASE(INT_TYPE) \
+        case ov::element::Type_t::INT_TYPE: { \
+                    tensor = ov::runtime::Tensor{ov::element::INT_TYPE, targetInputStaticShapes[i]}; \
+                    auto inputData = tensor.data<ov::element_type_traits<ov::element::INT_TYPE>::value_type>(); \
+                    for (size_t j = 0lu; j < data[idx].size(); ++j) { \
+                            inputData[j] =  data[idx][j]; \
+                    } \
+                    break; \
+             }
+                switch (secondInPrc) {
+                    RESHAPE_TEST_CASE(i64)
+                    RESHAPE_TEST_CASE(i32)
+                    default:
+                          FAIL() << "We shouldn't get here.";
+#undef RESHAPE_TEST_CASE
                 }
             } else {
                 if (funcInput.get_element_type().is_real()) {
@@ -110,7 +124,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         shapeNodeType nodeType;
         Precision prc;
         bool specialZero;
-        std::tie(inpDesc, secondType, nodeType, prc, specialZero) = this->GetParam();
+        std::tie(inpDesc, secondType, nodeType, prc, secondInPrc, specialZero) = this->GetParam();
 
         selectedType = std::string("unknown_") + prc.name();
 
@@ -123,7 +137,6 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         init_input_shapes(inputShapes);
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
-        const auto secondInPrc = ngraph::element::Type_t::i32;
         auto inputs = ngraph::builder::makeDynamicParams(ngPrc, {inputDynamicShapes.front()});
         auto dataInput = inputs.front();
         dataInput->set_friendly_name("param_1");
@@ -158,6 +171,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
 private:
     std::vector<std::vector<int>> data;
     size_t idx;
+    element::Type_t secondInPrc;
 };
 
 TEST_P(ShapeOpsCPUTest, CompareWithRefs) {
@@ -166,6 +180,7 @@ TEST_P(ShapeOpsCPUTest, CompareWithRefs) {
 }
 
 namespace reshapeTest {
+const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
 
 inputDescription noBounds{{{-1, -1, -1, -1},
                            {ngraph::Shape{2, 5, 7, 3}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{1, 2, 5, 5}}},
@@ -175,6 +190,7 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Reshape),
                                        ::testing::Values(Precision::FP32),
+                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic, ShapeOpsCPUTest, params, ShapeOpsCPUTest::getTestCaseName);
@@ -187,6 +203,7 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Reshape),
                                              ::testing::Values(Precision::FP32),
+                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);
@@ -199,6 +216,7 @@ const auto params_dynBatch = ::testing::Combine(::testing::Values(shape_dynBatch
                                                 ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                                 ::testing::Values(shapeNodeType::Reshape),
                                                 ::testing::Values(Precision::FP32),
+                                                ::testing::ValuesIn(secondInPrcs),
                                                 ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynBatch, ShapeOpsCPUTest, params_dynBatch, ShapeOpsCPUTest::getTestCaseName);
@@ -206,7 +224,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynBatch, ShapeOpsCPUTest, params
 } // namespace reshapeTest
 
 namespace squeezeTest {
-
+const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
 inputDescription noBounds{{{-1, -1, -1, -1, -1, -1},
                            {
                                 ngraph::Shape{2, 5, 1, 7, 3, 1},
@@ -220,6 +238,7 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Squeeze),
                                        ::testing::Values(Precision::FP32),
+                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 // at this momemnt squeze produce dynamic output rank, if second input is not constant
@@ -234,6 +253,7 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Squeeze),
                                              ::testing::Values(Precision::FP32),
+                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);
@@ -241,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, p
 } // namespace squeezeTest
 
 namespace unsqueezeTest {
-
+const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
 inputDescription noBounds{{{-1, -1, -1, -1},
                            {ngraph::Shape{2, 5, 7, 3}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{5, 1, 5}}},
                            {std::vector<int>{2, 5}, std::vector<int>{1, 2}, std::vector<int>{4, 5}, std::vector<int>{0, 1}}};
@@ -250,6 +270,7 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Unsqueeze),
                                        ::testing::Values(Precision::FP32),
+                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 // at this momemnt unsqueze produce dynamic output rank, if second input is not constant
@@ -264,6 +285,7 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Unsqueeze),
                                              ::testing::Values(Precision::FP32),
+                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);

From f4da729a197c68b6ce54c7ca57e623b677c83af5 Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Wed, 29 Mar 2023 11:27:12 +0200
Subject: [PATCH 154/296] [DOCS] prerelease notes 0329 (#16584)

---
 docs/resources/prerelease_information.md | 31 +++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/docs/resources/prerelease_information.md b/docs/resources/prerelease_information.md
index 2fc0974f3a40bd..1e8f0f3fb76a6b 100644
--- a/docs/resources/prerelease_information.md
+++ b/docs/resources/prerelease_information.md
@@ -15,11 +15,40 @@ a general changelog and the schedule for all versions for the current year.
    * Introduced to allow early testing and get early feedback from the community.
  
 
-.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230217
+
+.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230229
    :open:
    :animate: fade-in-slide-down
    :color: primary
 
+   * Added remote tensor support (accepting tensor located in graph memory) in C API 2.0
+   * Introduced model caching on GPU. Model Caching which reduces First Inference Latency (FIL) is 
+     extended to work as a single method on both CPU and GPU plug-ins.
+   * Added the post-training Accuracy-Aware Quantization mechanism for OpenVINO IR. By using this mechanism 
+     the user can define the accuracy drop criteria and NNCF will consider it during the quantization.
+   * CPU plugin migrated to OneDNN 3.1.
+   * AUTO supports fall-back to CPU device in case of run-time failure of networks on accelerator devices. 
+   * AUTO supports the option to disable CPU as the initial acceleration device to speed up first-inference latency.
+   * Implementing a parameter enabling network inference independently of IR precision. The default mode will be FP16 
+     with FP32 as an option to increase accuracy. General hints (performance vs accuracy) to be provided.
+   * Adding support for new model use cases or optimizing the existing support (better accuracy or performance).
+   * Performance on dGPU optimized with Intel oneDNN v3.1, especially for transformer models. 
+   * Dynamic shapes supported on iGPU and dGPU for Transformer(NLP) models. Not all dynamic models are supported but model coverage will be expanded in future releases.
+   * Performance improvement for Transformer models for NLP pipelines on CPU. 
+   * Models:
+
+     * Enabled MLPerf RNN-T model.
+     * Enabled Detectron2 MaskRCNN.
+     * Enabled OpenSeeFace models.
+     * Enabled Clip model.
+     * Optimized WeNet model.
+
+
+
+.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230217
+   :animate: fade-in-slide-down
+   :color: secondary
+
    OpenVINO™ repository tag: `2023.0.0.dev20230217 <https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0.dev20230217>`__
 
    * Enabled PaddlePaddle Framework 2.4

From df3c06ecb4088d184afe634f51f252536034f9a8 Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Wed, 29 Mar 2023 11:58:41 +0200
Subject: [PATCH 155/296] [CPU] Ngram node fusion (#16131)

---
 src/frontends/tensorflow/tests/CMakeLists.txt |   11 +-
 .../tensorflow/tests/compilation.cpp          |   44 +
 .../models_pbtxt/model_ngram.pbtxt            | 1821 +++++++++++++++++
 src/plugins/intel_cpu/src/cpu_types.cpp       |    5 +-
 src/plugins/intel_cpu/src/cpu_types.h         |    3 +-
 src/plugins/intel_cpu/src/extension.cpp       |    2 +
 .../convert_to_cpu_specific_opset.hpp         |    2 +
 .../ngraph_transformations/ngram_fusion.cpp   |  185 ++
 .../ngraph_transformations/ngram_fusion.hpp   |   19 +
 .../src/ngraph_transformations/op/ngram.cpp   |   46 +
 .../src/ngraph_transformations/op/ngram.hpp   |   38 +
 src/plugins/intel_cpu/src/nodes/ngram.cpp     |  182 ++
 src/plugins/intel_cpu/src/nodes/ngram.h       |   55 +
 src/plugins/intel_cpu/src/nodes_factory.cpp   |    2 +
 .../functional/subgraph_tests/src/ngram.cpp   |  240 +++
 15 files changed, 2650 insertions(+), 5 deletions(-)
 create mode 100644 src/frontends/tensorflow/tests/compilation.cpp
 create mode 100644 src/frontends/tensorflow/tests/test_models/models_pbtxt/model_ngram.pbtxt
 create mode 100644 src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.cpp
 create mode 100644 src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.hpp
 create mode 100644 src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.cpp
 create mode 100644 src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.hpp
 create mode 100644 src/plugins/intel_cpu/src/nodes/ngram.cpp
 create mode 100644 src/plugins/intel_cpu/src/nodes/ngram.h
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/ngram.cpp

diff --git a/src/frontends/tensorflow/tests/CMakeLists.txt b/src/frontends/tensorflow/tests/CMakeLists.txt
index d9f477b1b00ab7..89552e8deae6f7 100644
--- a/src/frontends/tensorflow/tests/CMakeLists.txt
+++ b/src/frontends/tensorflow/tests/CMakeLists.txt
@@ -4,12 +4,17 @@
 
 set(TARGET_NAME "ov_tensorflow_frontend_tests")
 
+list(APPEND TF_TESTS_DEPENDENCIES tensorflow_test_models tensorflow_fe_standalone_build_test openvino_intel_cpu_plugin)
+if (NOT ENABLE_INTEL_CPU)
+    list(REMOVE_ITEM TF_TESTS_DEPENDENCIES openvino_intel_cpu_plugin)
+    set(EXCLUDED_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/compilation.cpp)
+endif()
+
 ov_add_test_target(
     NAME ${TARGET_NAME}
         ROOT ${CMAKE_CURRENT_SOURCE_DIR}
-        DEPENDENCIES
-            tensorflow_test_models
-            tensorflow_fe_standalone_build_test
+        EXCLUDED_SOURCE_PATHS ${EXCLUDED_TESTS}
+        DEPENDENCIES ${TF_TESTS_DEPENDENCIES}
         LINK_LIBRARIES
             gtest_main_manifest 
             frontend_shared_test_classes 
diff --git a/src/frontends/tensorflow/tests/compilation.cpp b/src/frontends/tensorflow/tests/compilation.cpp
new file mode 100644
index 00000000000000..85de31971e8274
--- /dev/null
+++ b/src/frontends/tensorflow/tests/compilation.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/frontend/manager.hpp>
+#include <openvino/openvino.hpp>
+
+#include "gtest/gtest.h"
+#include "tf_utils.hpp"
+#include "utils.hpp"
+
+namespace {
+std::shared_ptr<ov::Model> convert_model(const std::string& model_path) {
+    ov::frontend::FrontEndManager fem;
+    auto front_end = fem.load_by_framework(TF_FE);
+    if (!front_end) {
+        throw "TensorFlow Frontend is not initialized";
+    }
+    auto model_filename = FrontEndTestUtils::make_model_path(std::string(TEST_TENSORFLOW_MODELS_DIRNAME) + model_path);
+    auto input_model = front_end->load(model_filename);
+    if (!input_model) {
+        throw "Input model is not read";
+    }
+    auto model = front_end->convert(input_model);
+    if (!model) {
+        throw "Model is not converted";
+    }
+
+    return model;
+}
+}  // namespace
+
+class CompileModelsTests : public ::testing::Test {};
+
+TEST_F(CompileModelsTests, NgramCompilation) {
+    ov::Core core;
+    auto model = convert_model("model_ngram/model_ngram.pb");
+    ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
+    const auto runtime_model = compiled_model.get_runtime_model();
+
+    EXPECT_EQ(runtime_model->get_ordered_ops().size(), 4);
+    EXPECT_EQ(runtime_model->get_parameters().size(), 2);
+    EXPECT_EQ(runtime_model->get_results().size(), 1);
+}
\ No newline at end of file
diff --git a/src/frontends/tensorflow/tests/test_models/models_pbtxt/model_ngram.pbtxt b/src/frontends/tensorflow/tests/test_models/models_pbtxt/model_ngram.pbtxt
new file mode 100644
index 00000000000000..f13ca814b6774f
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/models_pbtxt/model_ngram.pbtxt
@@ -0,0 +1,1821 @@
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/LayerNorm/batchnorm/add_1"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/text_preprocessor/RaggedToSparse/RaggedTensorToSparse"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 2
+        }
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill/dims"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Cast/x"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Cast"
+  op: "Cast"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Cast/x"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Truncate"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill"
+  op: "Fill"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill/dims"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill_1/dims"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill_1"
+  op: "Fill"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill_1/dims"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat/axis"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat"
+  op: "ConcatV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill"
+  input: "StatefulPartitionedCall/text_preprocessor/RaggedToSparse/RaggedTensorToSparse"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Fill_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Shape"
+  op: "Shape"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/LayerNorm/batchnorm/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Shape"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add/y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_1/1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_1/x"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_1"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_1/x"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_1/1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/Equal"
+  op: "Equal"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "incompatible_shape_error"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1/axis"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1"
+  op: "ConcatV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/LayerNorm/batchnorm/add_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_2/y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_2"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/add_2"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/packed/1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/packed"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/packed/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2"
+  op: "Fill"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/packed"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/Select"
+  op: "Select"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/Equal"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/strided_slice_2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/add/y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/add"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add/y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_1/1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_1/x"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_1"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_1/x"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_1/1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/Equal"
+  op: "Equal"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "incompatible_shape_error"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_2/y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_2"
+  op: "AddV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack_1"
+  op: "Pack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/add_2"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2"
+  op: "StridedSlice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack_1"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/Select"
+  op: "Select"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/Equal"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/strided_slice_2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/zeros_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_2/axis"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_2"
+  op: "ConcatV2"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-0/Select"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-1/strided_slice"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/Window-2/Select"
+  input: "StatefulPartitionedCall/EncoderDNN/CNN_layers/ngram_order_3/Ngram-3-Conv/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp
index d62ca6b9d7324d..818d947311d256 100644
--- a/src/plugins/intel_cpu/src/cpu_types.cpp
+++ b/src/plugins/intel_cpu/src/cpu_types.cpp
@@ -205,7 +205,8 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
         { "PriorBoxClustered", Type::PriorBoxClustered},
         {"Interaction", Type::Interaction},
         { "MHA", Type::MHA},
-        { "Unique", Type::Unique}
+        { "Unique", Type::Unique},
+        { "Ngram", Type::Ngram}
 };
 
 Type TypeFromName(const std::string& type) {
@@ -403,6 +404,8 @@ std::string NameFromType(const Type type) {
             return "MHA";
         case Type::Unique:
             return "Unique";
+        case Type::Ngram:
+            return "Ngram";
         default:
             return "Unknown";
     }
diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h
index e6e2dddced82a3..070d08c1371a67 100644
--- a/src/plugins/intel_cpu/src/cpu_types.h
+++ b/src/plugins/intel_cpu/src/cpu_types.h
@@ -110,7 +110,8 @@ enum class Type {
     PriorBoxClustered,
     Interaction,
     MHA,
-    Unique
+    Unique,
+    Ngram
 };
 
 enum class Algorithm {
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index a9d6e08377c971..f40e770accc43f 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -9,6 +9,7 @@
 #include "ngraph_transformations/op/power_static.hpp"
 #include "ngraph_transformations/op/swish_cpu.hpp"
 #include "ngraph_transformations/op/mha.hpp"
+#include "ngraph_transformations/op/ngram.hpp"
 #include "snippets_transformations/op/load_convert.hpp"
 #include "snippets_transformations/op/store_convert.hpp"
 #include "snippets_transformations/op/brgemm_cpu.hpp"
@@ -52,6 +53,7 @@ std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
         NGRAPH_OP(PowerStaticNode, ov::intel_cpu)
         NGRAPH_OP(SwishNode, ov::intel_cpu)
         NGRAPH_OP(MHANode, ov::intel_cpu)
+        NGRAPH_OP(NgramNode, ov::intel_cpu)
         NGRAPH_OP(LoadConvertSaturation, ov::intel_cpu)
         NGRAPH_OP(LoadConvertTruncation, ov::intel_cpu)
         NGRAPH_OP(StoreConvertSaturation, ov::intel_cpu)
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
index 8cd30f4a82344d..ad687888b709cf 100644
--- a/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@@ -19,6 +19,7 @@
 #include "transformations/utils/utils.hpp"
 #include "rnn_sequences_optimization.hpp"
 #include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
+#include "ngram_fusion.hpp"
 
 #include "itt.hpp"
 
@@ -45,6 +46,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
     manager.register_pass<ov::pass::ReshapeSequenceFusion>();
     manager.register_pass<ngraph::pass::ConstantFolding>();
     manager.register_pass<ov::pass::ConvertPrecision>(precisions_map {{ ngraph::element::i64, ngraph::element::i32 }});
+    manager.register_pass<NgramFusion>();
     manager.register_pass<ov::pass::Validate>();
 
     manager.run_passes(nGraphFunc);
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.cpp
new file mode 100644
index 00000000000000..3038c95a5c0352
--- /dev/null
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.cpp
@@ -0,0 +1,185 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngram_fusion.hpp"
+#include "op/ngram.hpp"
+#include <openvino/opsets/opset1.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/core/graph_util.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+#include <openvino/pass/pattern/op/or.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "itt.hpp"
+
+using namespace ov::pass::pattern;
+ov::intel_cpu::NgramFusion::NgramFusion() {
+    MATCHER_SCOPE(NgramFusion);
+    auto concat_matches = [](ov::Output<ov::Node> output) -> bool {
+        if (auto concat = ov::as_type_ptr<ov::opset1::Concat>(output.get_node_shared_ptr())) {
+            return ov::pass::pattern::rank_equals(2)(output) && concat->get_axis() == 1;
+        }
+        return false;
+    };
+    auto concat_m = ov::pass::pattern::wrap_type<ov::opset1::Concat>(concat_matches);
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto concat = m.get_match_root();
+        const auto& inputs = concat->input_values();
+
+        const size_t k = inputs.size();
+        const size_t as_is_idx = k % 2 == 0 ? (k - 1) / 2 : k / 2;
+        // Ngram must contain only one non-select branch (that propagates original tensor as is)
+        for (size_t i = 0; i < inputs.size(); ++i) {
+            const bool is_select = ov::is_type<ov::opset1::Select>(inputs[i].get_node());
+            if ((i == as_is_idx && is_select) || (i != as_is_idx && !is_select)) {
+                return false;
+            }
+        }
+
+        auto check_bias = [](const PatternValueMap& pattern_map,
+                             const std::shared_ptr<ov::Node> matched_constant_to_check,
+                             const size_t expected_bias) {
+            auto out_it = pattern_map.find(matched_constant_to_check);
+            if (expected_bias == 0) {
+                return out_it == pattern_map.end();
+            } else if (out_it == pattern_map.end()) {
+                return false;
+            }
+            const auto constant = ov::as_type_ptr<ov::opset1::Constant>(out_it->second.get_node_shared_ptr());
+            return constant != nullptr && ov::op::util::constantIsEqualTo(constant, expected_bias);
+        };
+
+        auto tokens_match = [](ov::Output<ov::Node> output) -> bool {
+            return ov::pass::pattern::rank_equals(2)(output) && ov::pass::pattern::type_matches(ov::element::f32)(output);
+        };
+        auto idces_match = [](ov::Output<ov::Node> output) -> bool {
+            return ov::pass::pattern::rank_equals(2)(output) && ov::pass::pattern::type_matches(ov::element::i32)(output);
+        };
+
+        ov::Output<ov::Node> tokens;
+        // "as_is" input validation
+        {
+            using namespace ov::opset1;
+            auto tokens_m = any_input(tokens_match);
+            auto padded_tokens_inputs = as_is_idx == 0
+                                            ? ov::OutputVector{tokens_m, wrap_type<Constant>()}
+                                            : ov::OutputVector{wrap_type<Constant>(), tokens_m, wrap_type<Constant>()};
+            auto padded_tokens_m = wrap_type<Concat>(padded_tokens_inputs);
+            auto shape_of_m = wrap_type<ShapeOf>({tokens_m});
+            auto cropped_shape_m = wrap_type<StridedSlice>({shape_of_m, wrap_type<Constant>(), wrap_type<Constant>(), wrap_type<Constant>()});
+
+            auto ss_bias = wrap_type<Constant>();
+            auto ss_biased_shape_m =
+                as_is_idx == 0 ? cropped_shape_m : wrap_type<Add>({cropped_shape_m, ss_bias});
+            auto ss_end_reshape_m = wrap_type<Reshape>({ss_biased_shape_m, any_input()});
+            auto cropped_tokens_m = wrap_type<StridedSlice>({padded_tokens_m, wrap_type<Constant>(), ss_end_reshape_m, wrap_type<Constant>()});
+            Matcher matcher(cropped_tokens_m);
+
+            if (!matcher.match(inputs[as_is_idx])) {
+                return false;
+            }
+
+            const auto& pattern_map = matcher.get_pattern_value_map();
+            tokens = pattern_map.at(tokens_m);
+            const auto& concat_shape = concat->get_output_partial_shape(0);
+            const auto& tokens_shape = tokens.get_partial_shape();
+
+            // To confirm that a subgraph can be replaced with NgramNode we only need to
+            // 1. Check Add's constant to make sure that data values have a right bias in result tensor
+            // 2. Check subgraph input and output shapes to make sure that all rest constants in the subgraph are correct
+            if (!check_bias(pattern_map, ss_bias, as_is_idx) ||
+                concat_shape.rank() != tokens_shape.rank() || tokens_shape[1] * k != concat_shape[1]) {
+                return false;
+            }
+        }
+
+        ov::Output<ov::Node> indices;
+        // select branches validation
+        {
+            using namespace ov::opset1;
+            auto tokens_m = any_input(tokens_match);
+            auto shape_of_m = wrap_type<ShapeOf>({tokens_m});
+            auto cropped_shape_m = wrap_type<StridedSlice>({shape_of_m, wrap_type<Constant>(), wrap_type<Constant>(), wrap_type<Constant>()});
+            auto idces_m = any_input(idces_match);
+            auto idces_concat_inputs = as_is_idx == 0
+                                            ? ov::OutputVector{idces_m, wrap_type<Constant>()}
+                                            : ov::OutputVector{wrap_type<Constant>(), idces_m, wrap_type<Constant>()};
+            auto idces_concat_m = wrap_type<Concat>(idces_concat_inputs);
+
+            // left equal branch
+            auto crop_left_bias_m = wrap_type<Constant>();
+            auto crop_left_cropped_shape_m = std::make_shared<ov::pass::pattern::op::Or>(
+                ov::OutputVector{cropped_shape_m, wrap_type<Add>({cropped_shape_m, crop_left_bias_m})});
+            auto crop_left_reshape_m = wrap_type<Reshape>({crop_left_cropped_shape_m, any_input()});
+            auto idxes_crop_left_concat_m = wrap_type<Concat>({crop_left_reshape_m, wrap_type<Constant>()});
+            auto idxes_crop_left_m = wrap_type<StridedSlice>({idces_concat_m, wrap_type<Constant>(), idxes_crop_left_concat_m, wrap_type<Constant>()});
+
+            // right equal branch
+            auto crop_right_bias_m = wrap_type<Constant>();
+            auto crop_right_cropped_shape_m = std::make_shared<ov::pass::pattern::op::Or>(
+                ov::OutputVector{cropped_shape_m, wrap_type<Add>({cropped_shape_m, crop_right_bias_m})});
+            auto crop_right_reshape_m = wrap_type<Reshape>({crop_right_cropped_shape_m, any_input()});
+            auto idxes_crop_right_concat_m = wrap_type<Concat>({crop_right_reshape_m, wrap_type<Constant>()});
+            auto idxes_crop_right_m = wrap_type<StridedSlice>({idces_concat_m, wrap_type<Constant>(), idxes_crop_right_concat_m, wrap_type<Constant>()});
+
+            auto equal_m = wrap_type<Equal>({idxes_crop_left_m, idxes_crop_right_m});
+            auto condition_m = wrap_type<Reshape>({equal_m, any_input()});
+
+            // then branch
+            auto padded_tokens_inputs = as_is_idx == 0
+                                            ? ov::OutputVector{tokens_m, wrap_type<Constant>()}
+                                            : ov::OutputVector{wrap_type<Constant>(), tokens_m, wrap_type<Constant>()};
+            auto padded_tokens_m = wrap_type<Concat>(padded_tokens_inputs);
+
+            auto then_cropped_shape_bias_m = wrap_type<Constant>();
+            auto then_cropped_shape_m = std::make_shared<ov::pass::pattern::op::Or>(
+                ov::OutputVector{cropped_shape_m, wrap_type<Add>({cropped_shape_m, then_cropped_shape_bias_m})});
+            auto then_crop_end_reshape_m = wrap_type<Reshape>({then_cropped_shape_m, wrap_type<Constant>()});
+            auto then_m = wrap_type<StridedSlice>({padded_tokens_m, wrap_type<Constant>(), then_crop_end_reshape_m, wrap_type<Constant>()});
+
+            // else branch
+            auto else_target_shape_reshape_m = wrap_type<Reshape>({cropped_shape_m, any_input()});
+            auto else_target_shape_concat_m = wrap_type<Concat>({else_target_shape_reshape_m, wrap_type<Constant>()});
+            auto else_m = wrap_type<Broadcast>({wrap_type<Constant>(), else_target_shape_concat_m, wrap_type<Constant>()});
+            auto select_m = wrap_type<Select>({condition_m, then_m, else_m});
+            Matcher select_matcher(select_m);
+
+            for (size_t i = 0; i < inputs.size(); ++i) {
+                if (i == as_is_idx)
+                    continue;
+                if (!select_matcher.match(inputs[i])) {
+                    return false;
+                }
+
+                const auto& pattern_map = select_matcher.get_pattern_value_map();
+                const auto& cur_tokens_input = pattern_map.at(tokens_m);
+                const auto& cur_indices_input = pattern_map.at(idces_m);
+                if (indices.get_node() == nullptr) {
+                    indices = cur_indices_input;
+                }
+
+                // To confirm that a subgraph can be replaced with NgramNode we need to
+                // 1. Check that "tokens" input is equal to the input that was matched earlier
+                // 2. Check that "indices" input for all Select branches is the same
+                // 3. Check Add's constants to make sure that data values have a right bias in result tensor
+                const bool validate_eq_biases = (check_bias(pattern_map, crop_left_bias_m, i) && check_bias(pattern_map, crop_right_bias_m, as_is_idx)) ||
+                                                (check_bias(pattern_map, crop_right_bias_m, i) && check_bias(pattern_map, crop_left_bias_m, as_is_idx));
+                if (cur_tokens_input != tokens || cur_indices_input != indices ||
+                    !validate_eq_biases || !check_bias(pattern_map, then_cropped_shape_bias_m, i)) {
+                    return false;
+                }
+            }
+        }
+
+        const auto ngram = std::make_shared<ov::intel_cpu::NgramNode>(tokens, indices, k);
+        ngram->set_friendly_name(concat->get_friendly_name());
+        ov::copy_runtime_info(concat, ngram);
+        ov::replace_node(concat, ngram);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(concat_m, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.hpp b/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.hpp
new file mode 100644
index 00000000000000..f22a7f6a3291ce
--- /dev/null
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/ngram_fusion.hpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+
+namespace ov {
+namespace intel_cpu {
+
+class NgramFusion: public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("NgramFusion", "0");
+    NgramFusion();
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.cpp
new file mode 100644
index 00000000000000..b099b02945af3d
--- /dev/null
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.cpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngram.hpp"
+#include "../itt.hpp"
+
+ov::intel_cpu::NgramNode::NgramNode(const ov::Output<Node>& embeddings, const ov::Output<Node>& batch_idces, const size_t k)
+    : Op({embeddings, batch_idces}), m_k(k) {
+    validate_and_infer_types();
+}
+
+std::shared_ptr<ov::Node> ov::intel_cpu::NgramNode::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(NgramNode_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    return std::make_shared<ov::intel_cpu::NgramNode>(new_args.at(0), new_args.at(1), m_k);
+}
+
+bool ov::intel_cpu::NgramNode::visit_attributes(ov::AttributeVisitor &visitor) {
+    INTERNAL_OP_SCOPE(NgramNode_visit_attributes);
+    visitor.on_attribute("k", m_k);
+    return true;
+}
+
+void ov::intel_cpu::NgramNode::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(NgramNode_validate_and_infer_types);
+    NGRAPH_CHECK(m_k > 0, "k attribute must be greater than zero");
+
+    const auto& idces_et = get_input_element_type(1);
+    const auto& idces_shape = get_input_partial_shape(0);
+    NGRAPH_CHECK(idces_shape.rank() == 2, "'batch_idces' input must have 2D shape whereas current shape is", idces_shape);
+    NGRAPH_CHECK(idces_et.is_integral_number(), "'batch_idces' input must be integer whereas current element type is", idces_et);
+
+    const auto& embeddings_et = get_input_element_type(0);
+    const auto& embeddings_shape = get_input_partial_shape(0);
+    NGRAPH_CHECK(embeddings_et.is_real(), "'embeddings' input must be real whereas current element type is", embeddings_et);
+    NGRAPH_CHECK(embeddings_shape.rank() == 2, "'embeddings' input must have 2D shape whereas current shape is", embeddings_shape);
+
+    auto out_shape = embeddings_shape;
+    out_shape[1] *= m_k;
+    set_output_type(0, embeddings_et, out_shape);
+}
+
+size_t ov::intel_cpu::NgramNode::get_k() const {
+    return m_k;
+}
diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.hpp b/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.hpp
new file mode 100644
index 00000000000000..fbc8c29163abe1
--- /dev/null
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/op/ngram.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/core/node.hpp>
+#include <openvino/op/op.hpp>
+
+namespace ov {
+namespace intel_cpu {
+/**
+ * The operation flattens embedding tensor of token vectors and traverse it by a sliding window of size times the original embedding sizes.
+ * Inputs:
+ *     1. Embedding vectors of type T1 - shape [N, m], where N - number of tokens, m - embedding size. Required
+ *     2. Indices of type T2 - shape [N, 2]. Contains pairs <batch_idx;idx> for the corresponding tokens. This op uses only batch indices. Required
+ * Outputs:
+ *     1. New embedding vector of type T1 and of shape [N, m * k], where k - operation attribute.
+ * Types:
+ *     T1 - only FP32 is supported
+ *     T2 - I32 and I64 are supported
+ */
+class NgramNode : public ov::op::Op {
+public:
+    OPENVINO_OP("Ngram", "cpu_plugin_opset");
+
+    NgramNode() = default;
+    NgramNode(const ov::Output<Node>& embeddings, const ov::Output<Node>& batch_idces, const size_t k);
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
+    bool visit_attributes(ov::AttributeVisitor& visitor) override;
+    void validate_and_infer_types() override;
+    size_t get_k() const;
+
+private:
+    size_t m_k;
+};
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/ngram.cpp b/src/plugins/intel_cpu/src/nodes/ngram.cpp
new file mode 100644
index 00000000000000..0c87d5c9266f7e
--- /dev/null
+++ b/src/plugins/intel_cpu/src/nodes/ngram.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+
+#include "ngram.h"
+#include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"
+#include "ngraph_transformations/op/ngram.hpp"
+
+namespace ov {
+namespace intel_cpu {
+namespace node {
+namespace {
+class NgramShapeInfer : public ShapeInferEmptyPads {
+public:
+    NgramShapeInfer(const size_t k) : m_k(k) {}
+    Result infer(
+        const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
+        const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
+        auto output_shape = input_shapes[0].get();
+        output_shape[1] *= m_k;
+        return {{std::move(output_shape)}, ShapeInferStatus::success};
+    }
+    port_mask_t get_port_mask() const override {
+        return EMPTY_PORT_MASK;
+    }
+
+private:
+    size_t m_k;
+};
+
+class NgramShapeInferFactory : public ShapeInferFactory {
+public:
+    NgramShapeInferFactory(const std::shared_ptr<ov::Node>& op) : m_op(op) {}
+    ShapeInferPtr makeShapeInfer() const override {
+        auto ngram = ov::as_type_ptr<NgramNode>(m_op);
+        if (!ngram) {
+            IE_THROW(Unexpected) << "Wrong operation type";
+        }
+        return std::make_shared<NgramShapeInfer>(ngram->get_k());
+    }
+private:
+    std::shared_ptr<ov::Node> m_op;
+};
+}   // namespace
+
+bool Ngram::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto ngram = ov::as_type_ptr<const NgramNode>(op);
+        if (!ngram) {
+            errorMessage = "Only Ngram from CPU internal opset is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+Ngram::Ngram(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context)
+    : Node(op, context, NgramShapeInferFactory(op)) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    const auto ngram = ov::as_type_ptr<const NgramNode>(op);
+    k = ngram->get_k();
+    leftPad = k % 2 == 0 ? (k - 1) / 2 : k / 2;
+    rightPad = k / 2;
+
+    const auto& windowStrideDim = ngram->get_input_partial_shape(0)[1];
+    if (windowStrideDim.is_static()) {
+        windowStride = windowStrideDim.get_length();
+        windowSize = k * windowStride;
+        leftPaddingSize = windowStride * leftPad;
+        rightPaddingSize = windowStride * rightPad;
+    }
+}
+
+void Ngram::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    idcesPrecision = getOriginalInputPrecisionAtPort(1);
+    if (idcesPrecision != InferenceEngine::Precision::I32 && idcesPrecision != InferenceEngine::Precision::I64) {
+        idcesPrecision = InferenceEngine::Precision::I32;
+    }
+
+    addSupportedPrimDesc({{LayoutType::ncsp, InferenceEngine::Precision::FP32},
+                          {LayoutType::ncsp, idcesPrecision}},
+                         {{LayoutType::ncsp, InferenceEngine::Precision::FP32}},
+                         ref_any,
+                         isDynamicNode());
+}
+
+void Ngram::prepareParams() {
+    const auto& srcDataDims = getParentEdgeAt(0)->getMemoryPtr()->getStaticDims();
+    const auto& srcIndicesDims = getParentEdgeAt(1)->getMemoryPtr()->getStaticDims();
+    const auto& outDims = getChildEdgeAt(0)->getMemoryPtr()->getStaticDims();;
+
+    idcesShapeSize = std::accumulate(srcIndicesDims.begin(), srcIndicesDims.end(), 1, std::multiplies<size_t>());
+    numOutElems = std::accumulate(outDims.begin(), outDims.end(), 1, std::multiplies<size_t>());
+    idcesStride = getParentEdgeAt(1)->getMemoryPtr()->GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
+    numIdces = srcIndicesDims[0];
+
+    windowStride = srcDataDims[1];
+    windowSize = k * windowStride;
+    leftPaddingSize = windowStride * leftPad;
+    rightPaddingSize = windowStride * rightPad;
+}
+
+template <typename idces_type>
+std::vector<size_t> Ngram::computeBatchLenghts() {
+    auto* srcIndices = reinterpret_cast<const idces_type*>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
+
+    std::vector<size_t> batchLenghts{0};
+    batchLenghts.reserve(numIdces + 1);
+    for (size_t i = idcesStride; i < idcesShapeSize; i += idcesStride) {
+        if (srcIndices[i - idcesStride] != srcIndices[i]) {
+            batchLenghts.push_back(i / idcesStride);
+        }
+    }
+    batchLenghts.push_back(idcesShapeSize / idcesStride);
+
+    return batchLenghts;
+}
+
+void Ngram::execute(dnnl::stream strm) {
+    auto* srcData = reinterpret_cast<const float*>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto* dstData = reinterpret_cast<float*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    std::vector<size_t> batchLenghts;
+    if (idcesPrecision == InferenceEngine::Precision::I32) {
+        batchLenghts = computeBatchLenghts<std::int32_t>();
+    } else if (idcesPrecision == InferenceEngine::Precision::I64) {
+        batchLenghts = computeBatchLenghts<std::int64_t>();
+    } else {
+        IE_THROW() << "Unsupported idces precision: " << idcesPrecision;
+    }
+
+    /* The following procedure applied to each batch:
+       1. Pad both corners of current embedding with zeros. Left/Right pad are computed depending on k.
+       2. Apply sliding window of windowSize with a step windowStride and form k new embedding vectors for the embedding
+    */
+    memset(dstData, 0, numOutElems * sizeof(float));
+    parallel_for(batchLenghts.size() - 1, [&](const size_t batchIdx) {
+        size_t srcWindowBias = 0;
+        size_t dstWindowBias = 0;
+
+        const size_t niter = batchLenghts[batchIdx + 1] - batchLenghts[batchIdx];
+        const size_t srcBatchBias = batchLenghts[batchIdx] * windowStride;
+        const size_t dstBatchBias = batchLenghts[batchIdx] * windowStride * k;
+        for (size_t i = 0; i < niter; ++i) {
+            const size_t curLeftPad = leftPad >= i ? leftPaddingSize - i * windowStride : 0;
+            const size_t curRightPad = rightPad >= niter - 1 - i ? rightPaddingSize - (niter - 1 - i) * windowStride : 0;
+            const size_t dataSize = windowSize - curLeftPad - curRightPad;
+
+            dstWindowBias += curLeftPad;
+            cpu_memcpy(dstData + dstBatchBias + dstWindowBias, srcData + srcBatchBias + srcWindowBias, dataSize * sizeof(float));
+            dstWindowBias += dataSize + curRightPad;
+            if (curLeftPad == 0)
+                srcWindowBias += windowStride;
+        }
+    });
+}
+
+void Ngram::executeDynamicImpl(dnnl::stream strm) {
+    execute(strm);
+}
+
+bool Ngram::created() const {
+    return getType() == Type::Ngram;
+}
+
+}   // namespace node
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/ngram.h b/src/plugins/intel_cpu/src/nodes/ngram.h
new file mode 100644
index 00000000000000..609c18de4d266a
--- /dev/null
+++ b/src/plugins/intel_cpu/src/nodes/ngram.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <node.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ov {
+namespace intel_cpu {
+namespace node {
+
+class Ngram : public Node {
+public:
+    Ngram(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void execute(dnnl::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
+
+protected:
+    void executeDynamicImpl(dnnl::stream strm) override;
+    void prepareParams() override;
+
+private:
+    template <typename idces_type>
+    std::vector<size_t> computeBatchLenghts();
+
+    size_t k = 0;
+    size_t windowSize = 0;
+    size_t windowStride = 0;
+
+    size_t leftPad = 0;
+    size_t rightPad = 0;
+    size_t leftPaddingSize = 0;
+    size_t rightPaddingSize = 0;
+
+    size_t idcesShapeSize = 0;
+    size_t idcesStride = 0;
+    size_t numIdces = 0;
+    size_t numOutElems = 0;
+
+    InferenceEngine::Precision idcesPrecision;
+};
+
+}   // namespace node
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp
index 4ab0514f850426..85a216c0560c14 100644
--- a/src/plugins/intel_cpu/src/nodes_factory.cpp
+++ b/src/plugins/intel_cpu/src/nodes_factory.cpp
@@ -91,6 +91,7 @@
 #include "nodes/interaction.h"
 #include "nodes/mha.h"
 #include "nodes/unique.hpp"
+#include "nodes/ngram.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -194,6 +195,7 @@ Node::NodesFactory::NodesFactory()
     INTEL_CPU_NODE(Interaction, Type::Interaction);
     INTEL_CPU_NODE(MHA, Type::MHA);
     INTEL_CPU_NODE(Unique, Type::Unique);
+    INTEL_CPU_NODE(Ngram, Type::Ngram);
 }
 
 #undef INTEL_CPU_NODE
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/ngram.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/ngram.cpp
new file mode 100644
index 00000000000000..73ad62b04c1232
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/ngram.cpp
@@ -0,0 +1,240 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <debug.h>
+#include <shared_test_classes/base/ov_subgraph.hpp>
+#include <ngraph_functions/builders.hpp>
+#include "common_test_utils/common_utils.hpp"
+#include <common_test_utils/ov_tensor_utils.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+#include <openvino/opsets/opset1.hpp>
+
+using namespace CPUTestUtils;
+using namespace ov::test;
+using namespace ngraph::helpers;
+
+namespace CPUSubgraphTestsDefinitions {
+
+typedef std::tuple<
+    std::vector<InputShape>,
+    ElementType,
+    ElementType,
+    size_t
+> NgramTestParams;
+
+static std::shared_ptr<ov::Node> getStridedSlice(const std::shared_ptr<ov::Node>& data,
+                                                 const std::shared_ptr<ov::Node>& begin,
+                                                 const std::shared_ptr<ov::Node>& end,
+                                                 const std::vector<int64_t>& shrink_axis_mask = {}) {
+    std::vector<int64_t> default_mask(begin->get_shape()[0], 0);
+    return std::make_shared<ov::opset1::StridedSlice>(data, begin, end, default_mask, default_mask,
+                                                      std::vector<int64_t>{}, shrink_axis_mask);
+}
+
+static std::shared_ptr<ov::Node> getReshape(const std::shared_ptr<ov::Node>& data,
+                                            const std::vector<int64_t>& requested_shape,
+                                            const ov::element::Type& prc) {
+    auto requested_shape_node = ov::opset1::Constant::create(prc, {requested_shape.size()}, requested_shape);
+    return std::make_shared<ov::opset1::Reshape>(data, requested_shape_node, true);
+}
+
+static std::shared_ptr<ov::Model> initNgram(std::vector<ov::PartialShape>& input_shapes,
+                                            const ov::element::Type& data_et,
+                                            const ov::element::Type& idces_et,
+                                            const size_t k) {
+    const size_t left_pad = k % 2 == 0 ? (k - 1) / 2 : k / 2;
+    const size_t right_pad = k / 2;
+    const size_t mid_idx = left_pad;
+
+    ov::element::TypeVector input_precisions{data_et, idces_et};
+    auto params = ngraph::builder::makeDynamicParams(input_precisions, input_shapes);
+
+    auto shape_of = std::make_shared<ov::opset10::ShapeOf>(params[0], idces_et);
+    auto shape_ss_begin = ov::opset1::Constant::create(idces_et, {1}, {0});
+    auto shape_ss_end = ov::opset1::Constant::create(idces_et, {1}, {1});
+    auto shape_ss = getStridedSlice(shape_of, shape_ss_begin, shape_ss_end, {1});
+
+    auto getInputsToPad = [&](const ov::Output<ov::Node> data, const int pad_value) {
+        const size_t length = data.get_partial_shape()[1].get_length();
+        ov::OutputVector inputs;
+        if (left_pad > 0) {
+            inputs.push_back(ov::opset1::Constant::create(data.get_element_type(), {left_pad, length}, {pad_value}));
+        }
+        inputs.push_back(data);
+        if (right_pad > 0) {
+            inputs.push_back(ov::opset1::Constant::create(data.get_element_type(), {right_pad, length}, {pad_value}));
+        }
+        return inputs;
+    };
+
+    auto data_padded = std::make_shared<ov::opset1::Concat>(getInputsToPad(params[0], 0), 0);
+    auto idces_padded = std::make_shared<ov::opset1::Concat>(getInputsToPad(params[1], -1), 0);
+
+    std::shared_ptr<ov::Node> as_is_bias = shape_ss;
+    if (mid_idx != 0) {
+        auto bias_const = ov::opset1::Constant::create(idces_et, {}, {mid_idx});
+        as_is_bias = std::make_shared<ov::opset1::Add>(shape_ss, bias_const);
+    }
+    auto as_is_ss_begin = ov::opset1::Constant::create(idces_et, {1}, {mid_idx});
+    auto as_is_ss_end = getReshape(as_is_bias, {1}, idces_et);
+    auto as_is_ss = getStridedSlice(data_padded, as_is_ss_begin, as_is_ss_end);
+
+    auto getSelectBranch = [&](const size_t cur_idx, const size_t mid_idx) {
+        std::shared_ptr<ov::Node> eq_left_bias = shape_ss;
+        if (cur_idx != 0) {
+            auto bias_const = ov::opset1::Constant::create(idces_et, {}, {cur_idx});
+            eq_left_bias = std::make_shared<ov::opset1::Add>(shape_ss, bias_const);
+        }
+        auto eq_left_reshape = getReshape(eq_left_bias, {1}, idces_et);
+        auto eq_left_concat_const = ov::opset1::Constant::create(idces_et, {1}, {1});
+        auto eq_left_concat = std::make_shared<ov::opset1::Concat>(ov::OutputVector{eq_left_reshape, eq_left_concat_const}, 0);
+        auto eq_left_ss_begin = ov::opset1::Constant::create(idces_et, {2}, std::vector<size_t>{cur_idx, 0ul});
+        auto eq_left_ss = getStridedSlice(idces_padded, eq_left_ss_begin, eq_left_concat, {0, 1});
+
+        std::shared_ptr<ov::Node> eq_right_bias = shape_ss;
+        if (mid_idx != 0) {
+            auto bias_const = ov::opset1::Constant::create(idces_et, {}, {mid_idx});
+            eq_right_bias = std::make_shared<ov::opset1::Add>(shape_ss, bias_const);
+        }
+        auto eq_right_reshape = getReshape(eq_right_bias, {1}, idces_et);
+        auto eq_right_concat_const = ov::opset1::Constant::create(idces_et, {1}, {1});
+        auto eq_right_concat = std::make_shared<ov::opset1::Concat>(ov::OutputVector{eq_right_reshape, eq_right_concat_const}, 0);
+        auto eq_right_ss_begin = ov::opset1::Constant::create(idces_et, {2}, std::vector<size_t>{mid_idx, 0ul});
+        auto eq_right_ss = getStridedSlice(idces_padded, eq_right_ss_begin, eq_right_concat, {0, 1});
+
+        auto equal = std::make_shared<ov::opset1::Equal>(eq_left_ss, eq_right_ss);
+        auto cond = getReshape(equal, {-1, 1}, idces_et);
+
+        std::shared_ptr<ov::Node> then_bias = shape_ss;
+        if (cur_idx != 0) {
+            auto bias_const = ov::opset1::Constant::create(idces_et, {}, {cur_idx});
+            then_bias = std::make_shared<ov::opset1::Add>(shape_ss, bias_const);
+        }
+        auto then_reshape = getReshape(then_bias, {1}, idces_et);
+        auto then_ss_begin = ov::opset1::Constant::create(idces_et, {1}, {cur_idx});
+        auto then = getStridedSlice(data_padded, then_ss_begin, then_reshape);
+
+        auto else_reshape = getReshape(shape_ss, {1}, idces_et);
+        auto else_concat_const = ov::opset1::Constant::create(idces_et, {1}, {input_shapes[0][1].get_length()});
+        auto else_concat = std::make_shared<ov::opset1::Concat>(ov::OutputVector{else_reshape, else_concat_const}, 0);
+        auto else_bcast_const = ov::opset1::Constant::create(data_et, {}, {0});
+        auto else_bcast = std::make_shared<ov::opset1::Broadcast>(else_bcast_const, else_concat);
+
+        auto select = std::make_shared<ov::opset1::Select>(cond, then, else_bcast);
+        return select;
+    };
+
+    ov::OutputVector concat_inputs(k);
+    concat_inputs[mid_idx] = as_is_ss;
+    for (size_t i = 0; i < k; ++i) {
+        if (i == mid_idx)
+            continue;
+        concat_inputs[i] = getSelectBranch(i, mid_idx);
+    }
+
+    auto final_concat = std::make_shared<ov::opset10::Concat>(concat_inputs, 1);
+    return std::make_shared<ov::Model>(final_concat, params, "ngram");
+}
+
+class NgramCPUTest : public testing::WithParamInterface<NgramTestParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<NgramTestParams> &obj) {
+        std::vector<InputShape> input_shapes;
+        size_t k;
+        ElementType data_et;
+        ElementType idces_et;
+        std::tie(input_shapes, data_et, idces_et, k) = obj.param;
+        std::ostringstream results;
+
+        results << "IS=(";
+        for (const auto& shape : input_shapes) {
+            results << CommonTestUtils::partialShape2str({shape.first}) << "_";
+        }
+        results << ")_TS=(";
+        for (const auto& shape : input_shapes) {
+            for (const auto& item : shape.second) {
+                results << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        results << ")_data_prc=" << data_et << "_idces_prc=" << idces_et << "_k=" << k;
+        return results.str();
+    }
+
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& model_inputs = function->inputs();
+
+        const auto& data_et = model_inputs[0].get_element_type();
+        const auto& data_shape = targetInputStaticShapes[0];
+        auto embeddings_tensor = ov::test::utils::create_and_fill_tensor_consistently(data_et, data_shape, 100, 1, 1);
+        inputs.insert({model_inputs[0].get_node_shared_ptr(), embeddings_tensor});
+
+
+        const auto& indices_et = model_inputs[1].get_element_type();
+        const auto& indices_shape = targetInputStaticShapes[1];
+        const size_t batch_size = data_shape[0];
+        auto indices_tensor = ov::test::utils::create_and_fill_tensor(indices_et, indices_shape, batch_size, 0);
+
+        if (indices_et == ov::element::i32) {
+            auto* indices_data = indices_tensor.data<int32_t>();
+            std::sort(indices_data, indices_data + indices_tensor.get_size());
+        } else if (indices_et == ov::element::i64) {
+            auto* indices_data = indices_tensor.data<int64_t>();
+            std::sort(indices_data, indices_data + indices_tensor.get_size());
+        } else {
+            IE_THROW() << "Unexpected indices precision: " << indices_et;
+        }
+        inputs.insert({model_inputs[1].get_node_shared_ptr(), indices_tensor});
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        std::vector<InputShape> inputShapes;
+        ElementType data_et;
+        ElementType idces_et;
+        size_t k;
+        std::tie(inputShapes, data_et, idces_et, k) = this->GetParam();
+        init_input_shapes(inputShapes);
+        function = initNgram(inputDynamicShapes, data_et, idces_et, k);
+    }
+};
+
+TEST_P(NgramCPUTest, CompareWithRefs) {
+    run();
+    CheckNumberOfNodesWithType(compiledModel, "Ngram", 1);
+}
+
+namespace {
+
+std::vector<std::vector<InputShape>> inputShapes = {
+    {
+        InputShape{{-1, 2}, {{3, 2}, {5, 2}, {7, 2}}},
+        InputShape{{-1, 2}, {{3, 2}, {5, 2}, {7, 2}}}
+    },
+    {
+        InputShape{{-1, 256}, {{12, 256}, {25, 256}}},
+        InputShape{{-1, 2}, {{12, 2}, {25, 2}}}
+    },
+    {
+        InputShape{{-1, 1}, {{12, 1}}},
+        InputShape{{-1, 2}, {{12, 2}}}
+    },
+};
+
+std::vector<size_t> k_values = {2, 3, 5, 7};
+std::vector<ElementType> idces_precisions = {ElementType::i32, ElementType::i64};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Ngram, NgramCPUTest,
+                        ::testing::Combine(::testing::ValuesIn(inputShapes),
+                                           ::testing::Values(ElementType::f32),
+                                           ::testing::ValuesIn(idces_precisions),
+                                           ::testing::ValuesIn(k_values)),
+                        NgramCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPUSubgraphTestsDefinitions

From f3dcf93f966a63e2d89dc4ae285c379f0e42a0b3 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Wed, 29 Mar 2023 14:19:30 +0400
Subject: [PATCH 156/296] Remove suppression
 Wno-delete-non-abstract-non-virtual-dtor (#16560)

* Remove suppression Wno-delete-non-abstract-non-virtual-dtor

* Fixed Allocator warning

* Suppress warning for GPU plugin

* Skip warning for GNA

* Fixed preprocessing

* Added virtual constructor for base plugin class

* Some fix for CPU

* Suppress for CPU

* Fixed any

* Fixed meta

* Disable warning for paddle

* Fixed Allocator tests

* Move suppress to paddle

* Fixed benchmark_app
---
 .../compile_flags/os_flags.cmake                 |  8 --------
 samples/cpp/benchmark_app/statistics_report.hpp  |  2 ++
 .../preprocessing/src/ie_preprocess_data.hpp     |  2 +-
 src/core/dev_api/meta_data.hpp                   |  5 +++++
 src/core/include/openvino/core/any.hpp           |  2 +-
 src/core/include/openvino/runtime/allocator.hpp  |  4 ++--
 src/frontends/CMakeLists.txt                     | 16 ++++++++--------
 src/frontends/paddle/src/CMakeLists.txt          |  2 ++
 .../interface/ie_iplugin_internal.hpp            |  2 +-
 .../include/ie/ie_iexecutable_network.hpp        |  2 +-
 src/inference/include/ie/ie_iinfer_request.hpp   |  2 +-
 src/inference/src/dev/converter_utils.cpp        |  4 ++++
 .../src/dev/icompiled_model_wrapper.hpp          |  1 +
 src/inference/src/dev/iplugin_wrapper.hpp        |  5 +++++
 src/plugins/intel_cpu/CMakeLists.txt             |  2 ++
 .../shape_inference/shape_inference_cpu.hpp      |  2 +-
 src/plugins/intel_gpu/CMakeLists.txt             |  4 ++++
 17 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake
index 03564d6b142671..a5dbb05de41685 100644
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@@ -455,14 +455,6 @@ else()
     endif()
 endif()
 
-# if(OV_COMPILER_IS_CLANG)
-#     ie_add_compiler_flags(-Wshorten-64-to-32)
-# endif()
-# TODO
-if(OV_COMPILER_IS_CLANG)
-    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
-endif()
-
 check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
 if(SUGGEST_OVERRIDE_SUPPORTED)
     set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
diff --git a/samples/cpp/benchmark_app/statistics_report.hpp b/samples/cpp/benchmark_app/statistics_report.hpp
index d26db4e050d682..d346d89944405f 100644
--- a/samples/cpp/benchmark_app/statistics_report.hpp
+++ b/samples/cpp/benchmark_app/statistics_report.hpp
@@ -98,6 +98,8 @@ class StatisticsReport {
 
     enum class Category { COMMAND_LINE_PARAMETERS, RUNTIME_CONFIG, EXECUTION_RESULTS, EXECUTION_RESULTS_GROUPPED };
 
+    virtual ~StatisticsReport() = default;
+
     explicit StatisticsReport(Config config) : _config(std::move(config)) {
         _separator =
 #if defined _WIN32 || defined __CYGWIN__
diff --git a/src/common/preprocessing/src/ie_preprocess_data.hpp b/src/common/preprocessing/src/ie_preprocess_data.hpp
index e03c1a8dbf37ec..3145c016c6cf9d 100644
--- a/src/common/preprocessing/src/ie_preprocess_data.hpp
+++ b/src/common/preprocessing/src/ie_preprocess_data.hpp
@@ -48,7 +48,7 @@ class IPreProcessData : public std::enable_shared_from_this<IPreProcessData> {
     virtual void isApplicable(const Blob::Ptr &src, const Blob::Ptr &dst) = 0;
 
 protected:
-    ~IPreProcessData() = default;
+    virtual ~IPreProcessData() = default;
 };
 
 OPENVINO_PLUGIN_API void CreatePreProcessData(std::shared_ptr<IPreProcessData>& data);
diff --git a/src/core/dev_api/meta_data.hpp b/src/core/dev_api/meta_data.hpp
index 57b262debb4ca3..3f992ef6f1b462 100644
--- a/src/core/dev_api/meta_data.hpp
+++ b/src/core/dev_api/meta_data.hpp
@@ -25,6 +25,11 @@ class OPENVINO_API Meta {
      * @return const ov::AnyMap with meta information
      */
     virtual operator const ov::AnyMap&() const = 0;
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~Meta() = default;
 };
 
 }  // namespace ov
diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp
index 0e070f3a0e7c71..00df6f531be4d4 100644
--- a/src/core/include/openvino/core/any.hpp
+++ b/src/core/include/openvino/core/any.hpp
@@ -496,7 +496,7 @@ class OPENVINO_API Any {
         }
 
     protected:
-        ~Base() = default;
+        virtual ~Base() = default;
     };
 
     template <class T, typename = void>
diff --git a/src/core/include/openvino/runtime/allocator.hpp b/src/core/include/openvino/runtime/allocator.hpp
index fdf409eb8a33c4..650fc8f77c36b9 100644
--- a/src/core/include/openvino/runtime/allocator.hpp
+++ b/src/core/include/openvino/runtime/allocator.hpp
@@ -58,7 +58,7 @@ struct OPENVINO_DEPRECATED("Do not inherit from AllocatorImpl. This class will b
     virtual bool is_equal(const AllocatorImpl& other) const = 0;
 
 protected:
-    ~AllocatorImpl() = default;
+    virtual ~AllocatorImpl() = default;
 };
 
 class Tensor;
@@ -92,7 +92,7 @@ class OPENVINO_API Allocator {
         virtual bool is_equal(const Base& other) const = 0;
 
     protected:
-        ~Base() = default;
+        virtual ~Base() = default;
     };
 
     template <typename A>
diff --git a/src/frontends/CMakeLists.txt b/src/frontends/CMakeLists.txt
index 41902d02d24b4e..3172c51d072e15 100644
--- a/src/frontends/CMakeLists.txt
+++ b/src/frontends/CMakeLists.txt
@@ -8,22 +8,18 @@ if(ENABLE_TESTS)
     add_subdirectory(tests)
 endif()
 
-if(ENABLE_OV_ONNX_FRONTEND)
-    add_subdirectory(onnx)
+if(ENABLE_OV_IR_FRONTEND)
+    add_subdirectory(ir)
 endif()
 
-if(ENABLE_OV_PADDLE_FRONTEND)
-    add_subdirectory(paddle)
+if(ENABLE_OV_ONNX_FRONTEND)
+    add_subdirectory(onnx)
 endif()
 
 if(ENABLE_OV_PYTORCH_FRONTEND)
     add_subdirectory(pytorch)
 endif()
 
-if(ENABLE_OV_IR_FRONTEND)
-    add_subdirectory(ir)
-endif()
-
 if(ENABLE_OV_TF_FRONTEND OR ENABLE_OV_TF_LITE_FRONTEND)
     add_subdirectory(tensorflow_common)
 endif()
@@ -35,3 +31,7 @@ endif()
 if (ENABLE_OV_TF_LITE_FRONTEND)
     add_subdirectory(tensorflow_lite)
 endif()
+
+if(ENABLE_OV_PADDLE_FRONTEND)
+    add_subdirectory(paddle)
+endif()
diff --git a/src/frontends/paddle/src/CMakeLists.txt b/src/frontends/paddle/src/CMakeLists.txt
index 8ce804f87ce833..ec03bc7b4c741d 100644
--- a/src/frontends/paddle/src/CMakeLists.txt
+++ b/src/frontends/paddle/src/CMakeLists.txt
@@ -4,6 +4,8 @@
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     ie_add_compiler_flags(/wd4305)
+elseif(OV_COMPILER_IS_CLANG)
+    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
 endif()
 
 ov_add_frontend(NAME paddle
diff --git a/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
index 23898911263565..1a35764c109573 100644
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -316,7 +316,7 @@ class INFERENCE_ENGINE_API_CLASS(IInferencePlugin) : public std::enable_shared_f
 
 protected:
     IInferencePlugin();
-    ~IInferencePlugin() = default;
+    virtual ~IInferencePlugin() = default;
 
     /**
      * @brief Creates an executable network from a parsed network object, users can create as many networks as they need
diff --git a/src/inference/include/ie/ie_iexecutable_network.hpp b/src/inference/include/ie/ie_iexecutable_network.hpp
index 73106ef89900a8..c1299fea0c1a12 100644
--- a/src/inference/include/ie/ie_iexecutable_network.hpp
+++ b/src/inference/include/ie/ie_iexecutable_network.hpp
@@ -159,7 +159,7 @@ class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork instea
     virtual StatusCode GetContext(RemoteContext::Ptr& pContext, ResponseDesc* resp) const noexcept = 0;
 
 protected:
-    ~IExecutableNetwork() = default;
+    virtual ~IExecutableNetwork() = default;
 };
 
 _IE_SUPPRESS_DEPRECATED_END_GCC
diff --git a/src/inference/include/ie/ie_iinfer_request.hpp b/src/inference/include/ie/ie_iinfer_request.hpp
index 04ff00d94a94c1..05da3ed1442bbb 100644
--- a/src/inference/include/ie/ie_iinfer_request.hpp
+++ b/src/inference/include/ie/ie_iinfer_request.hpp
@@ -202,7 +202,7 @@ class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper
     virtual InferenceEngine::StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept = 0;
 
 protected:
-    ~IInferRequest() = default;
+    virtual ~IInferRequest() = default;
 };
 
 _IE_SUPPRESS_DEPRECATED_END_GCC
diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp
index a8c1999a3fb1ba..e54b8f64d1e550 100644
--- a/src/inference/src/dev/converter_utils.cpp
+++ b/src/inference/src/dev/converter_utils.cpp
@@ -227,6 +227,9 @@ class IInferencePluginWrapper : public InferenceEngine::IInferencePlugin {
         _isNewAPI = plugin->is_new_api();
         _executorManager = InferenceEngine::create_old_manager(plugin->get_executor_manager());
     }
+
+    virtual ~IInferencePluginWrapper() = default;
+
     std::string GetName() const noexcept override {
         return m_plugin->get_device_name();
     }
@@ -840,6 +843,7 @@ class IRemoteContextWrapper : public ov::IRemoteContext {
 
 public:
     IRemoteContextWrapper(const std::shared_ptr<InferenceEngine::RemoteContext>& context) : m_context(context) {}
+    virtual ~IRemoteContextWrapper() = default;
     const std::shared_ptr<InferenceEngine::RemoteContext>& get_context() {
         return m_context;
     }
diff --git a/src/inference/src/dev/icompiled_model_wrapper.hpp b/src/inference/src/dev/icompiled_model_wrapper.hpp
index 27d77724e93738..09c4fbb8dafd92 100644
--- a/src/inference/src/dev/icompiled_model_wrapper.hpp
+++ b/src/inference/src/dev/icompiled_model_wrapper.hpp
@@ -13,6 +13,7 @@ namespace InferenceEngine {
 class ICompiledModelWrapper : public ov::ICompiledModel {
 public:
     ICompiledModelWrapper(const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model);
+    virtual ~ICompiledModelWrapper() = default;
     std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
 
     void export_model(std::ostream& model) const override;
diff --git a/src/inference/src/dev/iplugin_wrapper.hpp b/src/inference/src/dev/iplugin_wrapper.hpp
index 403c48ce4478e8..0d40b75da55c60 100644
--- a/src/inference/src/dev/iplugin_wrapper.hpp
+++ b/src/inference/src/dev/iplugin_wrapper.hpp
@@ -21,6 +21,11 @@ class IPluginWrapper : public ov::IPlugin {
      */
     IPluginWrapper(const std::shared_ptr<InferenceEngine::IInferencePlugin>& ptr);
 
+    /**
+     * @brief Destructor
+     */
+    virtual ~IPluginWrapper() = default;
+
     /**
      * @brief Create compiled model based on model and properties
      *
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
index 68f2b77ff264d1..3410d7f3d43805 100644
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@@ -16,6 +16,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     ie_add_compiler_flags(/wd4244)
     # mkldnn headers: '<<': result of 32-bit shift implicitly converted to 64 bits
     ie_add_compiler_flags(/wd4334)
+elseif(OV_COMPILER_IS_CLANG)
+    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
 endif()
 
 if (WIN32)
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_cpu.hpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_cpu.hpp
index 13d4eedcf68ddd..3c7e56c35d769b 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_cpu.hpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_cpu.hpp
@@ -26,7 +26,7 @@ class IShapeInfer {
     };
 
 public:
-    ~IShapeInfer() = default;
+    virtual ~IShapeInfer() = default;
 
     /**
      * @brief This method actually performs all the necessary shape inference computations
diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt
index 56070bf26a83b7..2657518a6a967e 100644
--- a/src/plugins/intel_gpu/CMakeLists.txt
+++ b/src/plugins/intel_gpu/CMakeLists.txt
@@ -12,6 +12,10 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     ie_add_compiler_flags(-Wno-strict-aliasing)
 endif()
 
+if(OV_COMPILER_IS_CLANG)
+    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
+endif()
+
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # 4267 4244 conversion from 'XXX' to 'YYY', possible loss of data
     ie_add_compiler_flags(/wd4244)

From 988a8dd6a9e6aeec511120ab570493c378cb2347 Mon Sep 17 00:00:00 2001
From: Sun Xiaoxia <xiaoxia.sun@intel.com>
Date: Wed, 29 Mar 2023 18:26:27 +0800
Subject: [PATCH 157/296] Xiaoxia/Optimize the streams calculation process
 (#15777)

* add _big_core_logic_streams

* modify core binding with cpu mapping table

* get _cpu_ids with querying cpu_mapping_table

* fix mac build issue

* fix cpu func test issue

* fix clang-format issue

* remove getCoreOffset and getThreadStep

* motify return false from cpuMapAvailable on windows

* remove core binding in latency mode

* add bind core on windows

* add model prefer threads

* modify streams calculating schedule in ApplyPerformanceHints

* modify MakeDefaultMultiThreaded and Stream

* add unified cpu binding with cpu_mapping on linux and windows. add GPU core binding interface. modify streams calculation scheduling

* fix code style issue

* modify default streams to 1 to fix ci test issue

* add SetStreamtoConfig, modify getNumOfAvailableCPUCores to fix continuous call loadnetwork issue

* modify code according to comments

* fix build issue on macos

* fix macos error

* fix cputest issue

* fix build issue on macos

* move features about CPU to lin_system_config.cpp

* fix code style

* fix bebian_arm build failed issue

* fix macos build issue

* fix code style

* fix test issue on windows

* fix code style

* add latency in hybrid_aware condition

* add the condition used all cores in latency mode

* fix code style

* fix code style

* add init_cpu

* fix code style

* fix streams=2 issue

* fix multi gpu core bind issue

* modify interface

* fix debian arm build issue

* add bind core in different socket

* fix code style

* fix build issue on windows

* fix GPU set_executor_config sync issue

* fix latency issue

* fix bind_cores issue

* modify model prefer on tigerlake machine

* modify according to comments

* fix code style

* modify GPU reserve CPU interface, remove bind core on windows feater

* fix code style

* add 3rd type core in cpu_mapping_table

* fix build issue

* update test case

* modify core bind behavior in latency mode

* remove 3rd core type function

* update format

* add lock in get_task_flag

* not bind core in latency mode

* change model_prefer to 0 with latency mode on core machine. bind core with latency mode on core machine

* remove a void thread

* modify condition of create task_area

* modify comments

* fix according to comments

* fix spelling mistake

* fix according to comments

* fix code style

---------

Co-authored-by: Shen, Wanglei <wanglei.shen@intel.com>
---
 src/bindings/c/tests/ov_core_test.cpp         |   4 +-
 .../interface/ie_internal_plugin_config.hpp   |  19 +-
 src/inference/dev_api/ie_system_conf.h        |  50 ++++-
 .../dev_api/openvino/runtime/system_conf.hpp  |  70 ++++++-
 .../runtime/threading/istreams_executor.hpp   |   4 +
 .../dev_api/threading/ie_cpu_streams_info.hpp |   2 +-
 .../dev/threading/cpu_streams_executor.cpp    | 188 ++++++++++++++----
 .../src/dev/threading/istreams_executor.cpp   |  24 ++-
 .../src/dev/threading/thread_affinity.cpp     |  46 +++--
 .../src/dev/threading/thread_affinity.hpp     |   2 +
 src/inference/src/os/lin/lin_system_conf.cpp  | 162 ++++++---------
 src/inference/src/os/win/win_system_conf.cpp  |  43 ++--
 src/inference/src/streams_executor.hpp        |  19 ++
 src/inference/src/system_conf.cpp             | 161 +++++++++++++++
 .../src/threading/ie_thread_affinity.cpp      |   9 +-
 .../src/threading/ie_thread_affinity.hpp      |   8 +-
 .../intel_cpu/src/cpu_streams_calculation.cpp | 134 ++++++++++++-
 .../intel_cpu/src/cpu_streams_calculation.hpp |  41 ++++
 src/plugins/intel_cpu/src/exec_network.cpp    |   6 +-
 src/plugins/intel_cpu/src/plugin.cpp          |  98 +++++++--
 src/plugins/intel_cpu/src/plugin.h            |  14 +-
 21 files changed, 871 insertions(+), 233 deletions(-)

diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 20e116cd7b79bb..2b3a2143652182 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -133,13 +133,13 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) {
 
     ov_compiled_model_t* compiled_model = nullptr;
     const char* key = ov_property_key_num_streams;
-    const char* num = "11";
+    const char* num = "2";
     OV_EXPECT_OK(ov_core_compile_model(core, model, device_name.c_str(), 2, &compiled_model, key, num));
     EXPECT_NE(nullptr, compiled_model);
 
     char* property_value = nullptr;
     OV_EXPECT_OK(ov_compiled_model_get_property(compiled_model, key, &property_value));
-    EXPECT_STREQ(property_value, "11");
+    EXPECT_STREQ(property_value, "2");
     ov_free(property_value);
 
     ov_compiled_model_free(compiled_model);
diff --git a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
index d94aa978ffcfb9..f3d67bd98cfc59 100644
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
@@ -47,32 +47,35 @@ DECLARE_CONFIG_KEY(LP_TRANSFORMS_MODE);
 DECLARE_CONFIG_KEY(CPU_THREADS_PER_STREAM);
 
 /**
- * @brief Number of streams in Performance-core(big core) in HYBRID_AWARE and throughput
- *        The value is calculated in loadExeNetwork and is used by CPU Executor Streams to execute `parallel_for` calls
+ * @brief Number of streams in Performance-core(big core)
  * @ingroup ie_dev_api_plugin_api
  * @brief Shortcut for defining internal configuration values
  */
 DECLARE_CONFIG_KEY(BIG_CORE_STREAMS);
 
 /**
- * @brief Number of streams in Efficient-core(small core) in HYBRID_AWARE and throughput
- *        The value is calculated in loadExeNetwork and is used by CPU Executor Streams to execute `parallel_for` calls
+ * @brief Number of streams in Performance-logical core(big core)
+ * @ingroup ie_dev_api_plugin_api
+ * @brief Shortcut for defining internal configuration values
+ */
+DECLARE_CONFIG_KEY(BIG_CORE_LOGIC_STREAMS);
+
+/**
+ * @brief Number of streams in Efficient-core(small core) on hybrid cores machine
  * @ingroup ie_dev_api_plugin_api
  * @brief Shortcut for defining internal configuration values
  */
 DECLARE_CONFIG_KEY(SMALL_CORE_STREAMS);
 
 /**
- * @brief Threads per stream in big cores in HYBRID_AWARE and throughput
- *        The value is calculated in loadExeNetwork and is used by CPU Executor Streams to execute `parallel_for` calls
+ * @brief Number of threads per stream in big cores
  * @ingroup ie_dev_api_plugin_api
  * @brief Shortcut for defining internal configuration values
  */
 DECLARE_CONFIG_KEY(THREADS_PER_STREAM_BIG);
 
 /**
- * @brief Threads per stream in small cores in HYBRID_AWARE and throughput
- *        The value is calculated in loadExeNetwork and is used by CPU Executor Streams to execute `parallel_for` calls
+ * @brief Number of threads per stream in small cores on hybrid cores machine
  * @ingroup ie_dev_api_plugin_api
  * @brief Shortcut for defining internal configuration values
  */
diff --git a/src/inference/dev_api/ie_system_conf.h b/src/inference/dev_api/ie_system_conf.h
index 408c626accf436..fa4055538c2061 100644
--- a/src/inference/dev_api/ie_system_conf.h
+++ b/src/inference/dev_api/ie_system_conf.h
@@ -143,7 +143,48 @@ using ov::with_cpu_x86_avx512_core_amx_bf16;
 using ov::with_cpu_x86_avx512_core_amx;
 
 /**
- * @brief      This enum contains defination of each columns in processor type table which bases on cpu core types. Will
+ * @brief      Checks whether CPU mapping Available
+ * @ingroup    ie_dev_api_system_conf
+ * @return     `True` is CPU mapping is available, `false` otherwise
+ */
+using ov::is_cpu_map_available;
+
+/**
+ * @brief      Set flag bit 'Used' of CPU
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  cpu_ids cpus in cup_mapping.
+ * @param[in]  used flag bit
+ */
+using ov::set_cpu_used;
+
+/**
+ * @brief      Returns number of CPU cores on Linux/Windows
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  plugin_task plugin task.
+ * @return     Number of CPU cores with core_type.
+ */
+using ov::get_num_available_cpu_cores;
+
+/**
+ * @brief      Returns corresponding logical cores
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  cpu_ids physical cores
+ * @return     logical cores corresponding to physical core.
+ */
+using ov::get_logic_cores;
+
+/**
+ * @brief      Returns available cpu ids
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  core_type core type.
+ * @param[in]  num_cpus number of cpus.
+ * @param[in]  cpu_task is cpu task, not other plugin tasks
+ * @return     Array of available cpu ids.
+ */
+using ov::reserve_available_cpus;
+
+/**
+ * @brief      This enum contains definition of each columns in processor type table which bases on cpu core types. Will
  * extend to support other CPU core type like ARM.
  *
  * The following are two example of processor type table.
@@ -162,7 +203,7 @@ using ov::with_cpu_x86_avx512_core_amx;
 using ov::ColumnOfProcessorTypeTable;
 
 /**
- * @brief      This enum contains defination of each columns in CPU mapping table which use processor id as index.
+ * @brief      This enum contains definition of each columns in CPU mapping table which use processor id as index.
  *
  * GROUP_ID is generated according to the following rules.
  *  1. If one MAIN_CORE_PROC and one HYPER_THREADING_PROC are based on same Performance-cores, they are in one group.
@@ -185,4 +226,9 @@ using ov::ColumnOfProcessorTypeTable;
  */
 using ov::ColumnOfCPUMappingTable;
 
+/**
+ * @brief      definition of CPU_MAP_USED_FLAG column in CPU mapping table.
+ */
+using ov::ProcessorUseStatus;
+
 }  // namespace InferenceEngine
diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
index 216d059ed357d2..8277a9cd7cac64 100644
--- a/src/inference/dev_api/openvino/runtime/system_conf.hpp
+++ b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -131,9 +131,41 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_amx_bf16();
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_amx();
 
+/**
+ * @brief      Checks whether cpu_mapping Available
+ * @ingroup    ie_dev_api_system_conf
+ * @return     `True` is CPU mapping is available, `false` otherwise
+ */
+OPENVINO_RUNTIME_API bool is_cpu_map_available();
+
+/**
+ * @brief      Returns number of CPU cores on Linux/Windows
+ * @ingroup    ie_dev_api_system_conf
+ * @return     A table about number of CPU cores of different types defined with ColumnOfProcessorTypeTable
+ * The following are two example of processor type table.
+ *  1. Processor table of two socket CPUs XEON server
+ *  ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
+ *     96            48                 0                       48          // Total number of two sockets
+ *     48            24                 0                       24          // Number of socket one
+ *     48            24                 0                       24          // Number of socket two
+ *
+ * 2. Processor table of one socket CPU desktop
+ *  ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
+ *     32            8                 16                       8           // Total number of one socket
+ */
+OPENVINO_RUNTIME_API std::vector<std::vector<int>> get_num_available_cpu_cores();
+
+/**
+ * @brief      Returns corresponding logical cores
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  cpu_ids physical cores
+ * @return     logical cores corresponding to physical core.
+ */
+OPENVINO_RUNTIME_API std::vector<int> get_logic_cores(const std::vector<int> cpu_ids);
+
 /**
  * @enum       ColumnOfProcessorTypeTable
- * @brief      This enum contains defination of each columns in processor type table which bases on cpu core types. Will
+ * @brief      This enum contains definition of each columns in processor type table which bases on cpu core types. Will
  * extend to support other CPU core type like ARM.
  *
  * The following are two example of processor type table.
@@ -157,9 +189,43 @@ enum ColumnOfProcessorTypeTable {
     PROC_TYPE_TABLE_SIZE = 4   //!< Size of processor type table
 };
 
+/**
+ * @enum       ProcessorUseStatus
+ * @brief      Definition of CPU_MAP_USED_FLAG column in CPU mapping table.
+ */
+enum ProcessorUseStatus {
+    NOT_USED = -1,           //!< Processor is not bound to thread
+    CPU_USED = 1,            //!< CPU is in using
+    PLUGIN_USED_START = 100  //!< Plugin other than CPU needs to use. If more GPUs use CPUs, the CPU_MAP_USED_FLAG is
+                             //!< accumulated from PLUGIN_USED_START. For example: GPU.0:100, GPU.1:101
+};
+
+/**
+ * @brief      Get and reserve available cpu ids
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  core_type core type.
+ * @param[in]  num_cpus number of cpus.
+ * @param[in]  seek_status look for CPU_MAP_USED_FLAG of seek_status in CPU mapping table
+ * @param[in]  reset_status reset CPU_MAP_USED_FLAG with reset_status.
+ * @return     Array of available cpu ids.
+ */
+OPENVINO_RUNTIME_API std::vector<int> reserve_available_cpus(const ColumnOfProcessorTypeTable core_type,
+                                                             const int num_cpus,
+                                                             const int seek_status = NOT_USED,
+                                                             const int reset_status = CPU_USED,
+                                                             const bool reserve_logic_core = false);
+
+/**
+ * @brief      Set CPU_MAP_USED_FLAG of cpu_mapping
+ * @ingroup    ie_dev_api_system_conf
+ * @param[in]  cpu_ids cpus in cpu_mapping.
+ * @param[in]  used update CPU_MAP_USED_FLAG of cpu_mapping with this flag bit
+ */
+OPENVINO_RUNTIME_API void set_cpu_used(std::vector<int>& cpu_ids, int used);
+
 /**
  * @enum       ColumnOfCPUMappingTable
- * @brief      This enum contains defination of each columns in CPU mapping table which use processor id as index.
+ * @brief      This enum contains definition of each columns in CPU mapping table which use processor id as index.
  *
  * GROUP_ID is generated according to the following rules.
  *  1. If one MAIN_CORE_PROC and one HYPER_THREADING_PROC are based on same Performance-cores, they are in one group.
diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
index 2a64b69856d699..58ac9803043a6d 100644
--- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
@@ -14,6 +14,7 @@
 #include <vector>
 
 #include "openvino/runtime/common.hpp"
+#include "openvino/runtime/system_conf.hpp"
 #include "openvino/runtime/threading/itask_executor.hpp"
 
 namespace ov {
@@ -95,11 +96,14 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
         int _threads = 0;                   //!< Number of threads distributed between streams.
                                             //!< Reserved. Should not be used.
         int _big_core_streams = 0;          //!< Number of streams in Performance-core(big core)
+        int _big_core_logic_streams = 0;    //!< Number of streams in Logical-core
         int _small_core_streams = 0;        //!< Number of streams in Efficient-core(small core)
         int _threads_per_stream_big = 0;    //!< Threads per stream in big cores
         int _threads_per_stream_small = 0;  //!< Threads per stream in small cores
         int _small_core_offset = 0;         //!< Calculate small core start offset when binding cpu cores
         bool _enable_hyper_thread = true;   //!< enable hyper thread
+        bool _set_streams = false;
+        int _plugin_task = NOT_USED;
         enum StreamMode { DEFAULT, AGGRESSIVE, LESSAGGRESSIVE };
         enum PreferredCoreType {
             ANY,
diff --git a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
index 6bd3d8aad96f2e..119bc89a5cf263 100644
--- a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
+++ b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
@@ -37,4 +37,4 @@ typedef enum {
     CPU_STREAMS_TABLE_SIZE = 3  //!< Size of streams info table
 } column_of_cpu_streams_info_table;
 
-}  // namespace InferenceEngine
+}  // namespace InferenceEngine
\ No newline at end of file
diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp
index bc695e3718bdda..b1f588b7c0149b 100644
--- a/src/inference/src/dev/threading/cpu_streams_executor.cpp
+++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp
@@ -29,6 +29,7 @@ struct CPUStreamsExecutor::Impl {
             int _threadBindingStep = 0;
             int _offset = 0;
             int _cpuIdxOffset = 0;
+            std::vector<int> _cpu_ids;
             Observer(custom::task_arena& arena,
                      CpuSet mask,
                      int ncpus,
@@ -36,18 +37,21 @@ struct CPUStreamsExecutor::Impl {
                      const int threadsPerStream,
                      const int threadBindingStep,
                      const int threadBindingOffset,
-                     const int cpuIdxOffset = 0)
+                     const int cpuIdxOffset = 0,
+                     const std::vector<int> cpu_ids = {})
                 : custom::task_scheduler_observer(arena),
                   _mask{std::move(mask)},
                   _ncpus(ncpus),
                   _threadBindingStep(threadBindingStep),
                   _offset{streamId * threadsPerStream + threadBindingOffset},
-                  _cpuIdxOffset(cpuIdxOffset) {}
+                  _cpuIdxOffset(cpuIdxOffset),
+                  _cpu_ids(cpu_ids) {}
             void on_scheduler_entry(bool) override {
                 pin_thread_to_vacant_core(_offset + tbb::this_task_arena::current_thread_index(),
                                           _threadBindingStep,
                                           _ncpus,
                                           _mask,
+                                          _cpu_ids,
                                           _cpuIdxOffset);
             }
             void on_scheduler_exit(bool) override {
@@ -72,6 +76,121 @@ struct CPUStreamsExecutor::Impl {
                                                           _impl->_usedNumaNodes.size()))
                               : _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
 #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
+            if (is_cpu_map_available()) {
+                init_stream();
+            } else {
+                init_stream_legacy();
+            }
+#elif OV_THREAD == OV_THREAD_OMP
+            omp_set_num_threads(_impl->_config._threadsPerStream);
+            if (!check_open_mp_env_vars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
+                CpuSet processMask;
+                int ncpus = 0;
+                std::tie(processMask, ncpus) = get_process_mask();
+                if (nullptr != processMask) {
+                    parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
+                        int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
+                                     _impl->_config._threadBindingOffset;
+                        pin_thread_to_vacant_core(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
+                    });
+                }
+            }
+#elif OV_THREAD == OV_THREAD_SEQ
+            if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+                pin_current_thread_to_socket(_numaNodeId);
+            } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
+                CpuSet processMask;
+                int ncpus = 0;
+                std::tie(processMask, ncpus) = get_process_mask();
+                if (nullptr != processMask) {
+                    pin_thread_to_vacant_core(_streamId + _impl->_config._threadBindingOffset,
+                                              _impl->_config._threadBindingStep,
+                                              ncpus,
+                                              processMask);
+                }
+            }
+#endif
+        }
+        ~Stream() {
+            {
+                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
+                _impl->_streamIdQueue.push(_streamId);
+            }
+#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
+            set_cpu_used(_cpu_ids, NOT_USED);
+            if (nullptr != _observer) {
+                _observer->observe(false);
+            }
+#endif
+        }
+
+#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
+        void init_stream() {
+            std::lock_guard<std::mutex> lock{_impl->_cpumap_mutex};
+            const auto stream_id = _streamId >= _impl->_config._streams ? _impl->_config._streams - 1 : _streamId;
+            const auto concurrency =
+                _impl->any_cores
+                    ? _impl->_config._threads
+                    : (stream_id < _impl->_config._big_core_streams + _impl->_config._big_core_logic_streams
+                           ? _impl->_config._threads_per_stream_big
+                           : _impl->_config._threads_per_stream_small);
+            if (concurrency > 0 && (ThreadBindingType::CORES == _impl->_config._threadBindingType ||
+                                    ThreadBindingType::NONE == _impl->_config._threadBindingType || _impl->any_cores ||
+                                    _streamId >= _impl->_config._streams)) {
+                _taskArena.reset(new custom::task_arena{concurrency});
+            } else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+                _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{_numaNodeId, concurrency}});
+            } else if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
+                const auto selected_core_type =
+                    (stream_id < _impl->_config._big_core_streams + _impl->_config._big_core_logic_streams)
+                        ? custom::info::core_types().back()
+                        : custom::info::core_types().front();
+#    ifdef _WIN32
+                _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
+                                                            .set_core_type(selected_core_type)
+                                                            .set_max_concurrency(concurrency)});
+#    else
+                if (_impl->bind_cores) {
+                    _taskArena.reset(new custom::task_arena{concurrency});
+                } else {
+                    _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
+                                                                .set_core_type(selected_core_type)
+                                                                .set_max_concurrency(concurrency)});
+                }
+#    endif
+            }
+            if (_impl->bind_cores && _streamId < _impl->_config._streams) {
+                const auto cpu_core_type =
+                    _streamId < _impl->_config._big_core_streams
+                        ? MAIN_CORE_PROC
+                        : (_streamId < _impl->_config._big_core_streams + _impl->_config._big_core_logic_streams
+                               ? HYPER_THREADING_PROC
+                               : EFFICIENT_CORE_PROC);
+                const auto small_core_threads_3 =
+                    cpu_core_type == EFFICIENT_CORE_PROC && concurrency == 3 && _impl->_config._small_core_streams > 1;
+                const auto num_cpus = small_core_threads_3 ? concurrency + 1 : concurrency;
+                _cpu_ids = reserve_available_cpus(cpu_core_type, num_cpus, _impl->_config._plugin_task);
+                if (_cpu_ids.size() > 0) {
+                    CpuSet processMask;
+                    int ncpus = 0;
+                    std::tie(processMask, ncpus) = get_process_mask();
+                    if (nullptr != processMask) {
+                        _observer.reset(new Observer{*_taskArena,
+                                                     std::move(processMask),
+                                                     ncpus,
+                                                     0,
+                                                     concurrency,
+                                                     0,
+                                                     0,
+                                                     0,
+                                                     _cpu_ids});
+                        _observer->observe(true);
+                    }
+                }
+            }
+        }
+
+        void init_stream_legacy() {
             const auto concurrency = (0 == _impl->_config._threadsPerStream) ? custom::task_arena::automatic
                                                                              : _impl->_config._threadsPerStream;
             if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
@@ -122,8 +241,8 @@ struct CPUStreamsExecutor::Impl {
                             : streamId_wrapped;
                     const auto thread_binding_step = hybrid_core ? (small_core ? _impl->_config._threadBindingStep : 2)
                                                                  : _impl->_config._threadBindingStep;
-                    // Special handling of _threads_per_stream_small == 3, need to skip 4 (Four cores share one L2 cache
-                    // on the small core), stream_id = 0, cpu_idx_offset cumulative plus 4
+                    // Special handling of _threads_per_stream_small == 3, need to skip 4 (Four cores share one L2
+                    // cache on the small core), stream_id = 0, cpu_idx_offset cumulative plus 4
                     const auto small_core_offset =
                         small_core_skip ? _impl->_config._small_core_offset + (streamId_wrapped - big_core_streams) * 4
                                         : _impl->_config._small_core_offset;
@@ -132,7 +251,6 @@ struct CPUStreamsExecutor::Impl {
                             // Prevent conflicts with system scheduling, so default cpu id on big core starts from 1
                             ? (small_core ? small_core_offset : (logic_core ? 0 : 1))
                             : 0;
-
 #    ifdef _WIN32
                     _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
                                                                 .set_core_type(selected_core_type)
@@ -176,47 +294,8 @@ struct CPUStreamsExecutor::Impl {
                     }
                 }
             }
-#elif OV_THREAD == OV_THREAD_OMP
-            omp_set_num_threads(_impl->_config._threadsPerStream);
-            if (!check_open_mp_env_vars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
-                CpuSet processMask;
-                int ncpus = 0;
-                std::tie(processMask, ncpus) = get_process_mask();
-                if (nullptr != processMask) {
-                    parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
-                        int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
-                                     _impl->_config._threadBindingOffset;
-                        pin_thread_to_vacant_core(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
-                    });
-                }
-            }
-#elif OV_THREAD == OV_THREAD_SEQ
-            if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
-                pin_current_thread_to_socket(_numaNodeId);
-            } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
-                CpuSet processMask;
-                int ncpus = 0;
-                std::tie(processMask, ncpus) = get_process_mask();
-                if (nullptr != processMask) {
-                    pin_thread_to_vacant_core(_streamId + _impl->_config._threadBindingOffset,
-                                              _impl->_config._threadBindingStep,
-                                              ncpus,
-                                              processMask);
-                }
-            }
-#endif
         }
-        ~Stream() {
-            {
-                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
-                _impl->_streamIdQueue.push(_streamId);
-            }
-#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
-            if (nullptr != _observer) {
-                _observer->observe(false);
-            }
 #endif
-        }
 
         Impl* _impl = nullptr;
         int _streamId = 0;
@@ -226,6 +305,7 @@ struct CPUStreamsExecutor::Impl {
 #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
         std::unique_ptr<custom::task_arena> _taskArena;
         std::unique_ptr<Observer> _observer;
+        std::vector<int> _cpu_ids;
 #endif
     };
 
@@ -244,7 +324,24 @@ struct CPUStreamsExecutor::Impl {
             _usedNumaNodes = numaNodes;
         }
 #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
+        if ((config._streams > static_cast<int>(numaNodes.size()) &&
+             ThreadBindingType::HYBRID_AWARE == config._threadBindingType) ||
+            ThreadBindingType::CORES == config._threadBindingType) {
+            bind_cores = true;
+            if (_config._plugin_task >= PLUGIN_USED_START) {
+                _config._threads = _config._streams;
+                if (_config._threadPreferredCoreType == Config::PreferredCoreType::LITTLE) {
+                    _config._small_core_streams = config._small_core_streams;
+                    _config._threads_per_stream_small = 1;
+                } else {
+                    _config._big_core_streams = config._small_core_streams;
+                    _config._threads_per_stream_big = 1;
+                }
+            }
+        }
         if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) {
+            any_cores = config._streams >
+                        config._big_core_streams + config._big_core_logic_streams + config._small_core_streams;
             const auto core_types = custom::info::core_types();
             const auto num_core_phys = get_number_of_cpu_cores();
             num_big_core_phys = get_number_of_cpu_cores(true);
@@ -339,6 +436,7 @@ struct CPUStreamsExecutor::Impl {
     std::queue<int> _streamIdQueue;
     std::vector<std::thread> _threads;
     std::mutex _mutex;
+    std::mutex _cpumap_mutex;
     std::condition_variable _queueCondVar;
     std::queue<Task> _taskQueue;
     bool _isStopped = false;
@@ -352,6 +450,8 @@ struct CPUStreamsExecutor::Impl {
     using StreamIdToCoreTypes = std::vector<std::pair<custom::core_type_id, int>>;
     StreamIdToCoreTypes total_streams_on_core_types;
     int num_big_core_phys;
+    bool any_cores = false;
+    bool bind_cores = false;
 #endif
     std::shared_ptr<ExecutorManager> _exectorMgr;
 };
diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp
index eb6dad48aa52e4..e9e5b70e2d6199 100644
--- a/src/inference/src/dev/threading/istreams_executor.cpp
+++ b/src/inference/src/dev/threading/istreams_executor.cpp
@@ -73,6 +73,7 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
             }
         } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
             if (value.as<std::string>() == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) {
+                _set_streams = true;
                 _streams = static_cast<int>(get_available_numa_nodes().size());
             } else if (value.as<std::string>() == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) {
                 // bare minimum of streams (that evenly divides available number of cores)
@@ -90,16 +91,21 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
                     IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS)
                                << ". Expected only positive numbers (#streams)";
                 }
+                _set_streams = true;
                 _streams = val_i;
             }
         } else if (key == ov::num_streams) {
             auto streams = value.as<ov::streams::Num>();
             if (streams == ov::streams::NUMA) {
+                _set_streams = true;
                 _streams = static_cast<int32_t>(get_available_numa_nodes().size());
             } else if (streams == ov::streams::AUTO) {
                 // bare minimum of streams (that evenly divides available number of cores)
-                _streams = get_default_num_streams();
+                if (!is_cpu_map_available()) {
+                    _streams = get_default_num_streams();
+                }
             } else if (streams.num >= 0) {
+                _set_streams = true;
                 _streams = streams.num;
             } else {
                 OPENVINO_THROW("Wrong value for property key ",
@@ -147,6 +153,19 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
                            << ". Expected only non negative numbers (#streams)";
             }
             _big_core_streams = val_i;
+        } else if (key == CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS)) {
+            int val_i;
+            try {
+                val_i = value.as<int>();
+            } catch (const std::exception&) {
+                IE_THROW() << "Wrong value for HYBRID_AWARE key " << CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS)
+                           << ". Expected only non negative numbers (#streams)";
+            }
+            if (val_i < 0) {
+                IE_THROW() << "Wrong value for HYBRID_AWARE key " << CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS)
+                           << ". Expected only non negative numbers (#streams)";
+            }
+            _big_core_logic_streams = val_i;
         } else if (key == CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS)) {
             int val_i;
             try {
@@ -221,6 +240,7 @@ ov::Any IStreamsExecutor::Config::get_property(const std::string& key) const {
             CONFIG_KEY(CPU_THREADS_NUM),
             CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM),
             CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS),
+            CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS),
             CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS),
             CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG),
             CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_SMALL),
@@ -265,6 +285,8 @@ ov::Any IStreamsExecutor::Config::get_property(const std::string& key) const {
         return {std::to_string(_threadsPerStream)};
     } else if (key == CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)) {
         return {std::to_string(_big_core_streams)};
+    } else if (key == CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS)) {
+        return {std::to_string(_big_core_logic_streams)};
     } else if (key == CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS)) {
         return {std::to_string(_small_core_streams)};
     } else if (key == CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG)) {
diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp
index 86b25c78f4774d..1e74c5cd055b63 100644
--- a/src/inference/src/dev/threading/thread_affinity.cpp
+++ b/src/inference/src/dev/threading/thread_affinity.cpp
@@ -47,26 +47,37 @@ bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
     return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
 }
 
-bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
+bool pin_thread_to_vacant_core(int thrIdx,
+                               int hyperthreads,
+                               int ncores,
+                               const CpuSet& procMask,
+                               const std::vector<int>& cpu_ids,
+                               int cpuIdxOffset) {
     if (procMask == nullptr)
         return false;
     const size_t size = CPU_ALLOC_SIZE(ncores);
     const int num_cpus = CPU_COUNT_S(size, procMask.get());
     thrIdx %= num_cpus;  // To limit unique number in [; num_cpus-1] range
-    // Place threads with specified step
-    int cpu_idx = cpuIdxOffset;
-    for (int i = 0, offset = 0; i < thrIdx; ++i) {
-        cpu_idx += hyperthreads;
-        if (cpu_idx >= num_cpus)
-            cpu_idx = ++offset;
-    }
 
-    // Find index of 'cpu_idx'-th bit that equals to 1
-    int mapped_idx = cpuIdxOffset - 1;
-    while (cpu_idx >= cpuIdxOffset) {
-        mapped_idx++;
-        if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
-            --cpu_idx;
+    int mapped_idx;
+    if (cpu_ids.size() > 0) {
+        mapped_idx = cpu_ids[thrIdx];
+    } else {
+        // Place threads with specified step
+        int cpu_idx = cpuIdxOffset;
+        for (int i = 0, offset = 0; i < thrIdx; ++i) {
+            cpu_idx += hyperthreads;
+            if (cpu_idx >= num_cpus)
+                cpu_idx = ++offset;
+        }
+
+        // Find index of 'cpu_idx'-th bit that equals to 1
+        mapped_idx = cpuIdxOffset - 1;
+        while (cpu_idx >= cpuIdxOffset) {
+            mapped_idx++;
+            if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
+                --cpu_idx;
+        }
     }
 
     CpuSet targetMask{CPU_ALLOC(ncores)};
@@ -105,7 +116,12 @@ std::tuple<CpuSet, int> get_process_mask() {
 }
 void release_process_mask(cpu_set_t*) {}
 
-bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
+bool pin_thread_to_vacant_core(int thrIdx,
+                               int hyperthreads,
+                               int ncores,
+                               const CpuSet& procMask,
+                               const std::vector<int>& cpu_ids,
+                               int cpuIdxOffset) {
     return false;
 }
 bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
diff --git a/src/inference/src/dev/threading/thread_affinity.hpp b/src/inference/src/dev/threading/thread_affinity.hpp
index 562b128a787fd3..92f1bf9b1800e8 100644
--- a/src/inference/src/dev/threading/thread_affinity.hpp
+++ b/src/inference/src/dev/threading/thread_affinity.hpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <tuple>
+#include <vector>
 
 #if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
 #    include <sched.h>
@@ -68,6 +69,7 @@ bool pin_thread_to_vacant_core(int thrIdx,
                                int hyperThreads,
                                int ncores,
                                const CpuSet& processMask,
+                               const std::vector<int>& cpu_ids = {},
                                int cpuIdxOffset = 0);
 
 /**
diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index d60c0410660755..3fa4e8d659a2eb 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -8,8 +8,6 @@
 
 #include <fstream>
 #include <iostream>
-#include <map>
-#include <numeric>
 #include <string>
 #include <vector>
 
@@ -20,82 +18,72 @@
 
 namespace ov {
 
-struct CPU {
-    int _processors = 0;
-    int _sockets = 0;
-    int _cores = 0;
+void CPU::init_cpu(CPU& cpu) {
+    std::vector<std::vector<std::string>> system_info_table;
 
-    std::vector<std::vector<int>> _proc_type_table;
-    std::vector<std::vector<int>> _cpu_mapping_table;
+    auto GetCatchInfoLinux = [&]() {
+        cpu._processors = sysconf(_SC_NPROCESSORS_ONLN);
+        system_info_table.resize(cpu._processors, std::vector<std::string>(3));
 
-    CPU() {
-        std::vector<std::vector<std::string>> system_info_table;
+        for (int n = 0; n < cpu._processors; n++) {
+            for (int m = 0; m < 3; m++) {
+                int Ln = (m == 0) ? m : m + 1;
 
-        auto GetCatchInfoLinux = [&]() {
-            _processors = sysconf(_SC_NPROCESSORS_ONLN);
-            system_info_table.resize(_processors, std::vector<std::string>(3));
-
-            for (int n = 0; n < _processors; n++) {
-                for (int m = 0; m < 3; m++) {
-                    int Ln = (m == 0) ? m : m + 1;
-
-                    std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(n) + "/cache/index" +
-                                             std::to_string(Ln) + "/shared_cpu_list");
-                    if (!cache_file.is_open()) {
-                        return -1;
-                    }
-                    std::string cache_info;
-                    std::getline(cache_file, cache_info);
-                    system_info_table[n][m] += cache_info;
+                std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(n) + "/cache/index" +
+                                         std::to_string(Ln) + "/shared_cpu_list");
+                if (!cache_file.is_open()) {
+                    return -1;
                 }
+                std::string cache_info;
+                std::getline(cache_file, cache_info);
+                system_info_table[n][m] += cache_info;
             }
-            return 0;
-        };
-
-        if (!GetCatchInfoLinux()) {
-            parse_processor_info_linux(_processors,
-                                       system_info_table,
-                                       _sockets,
-                                       _cores,
-                                       _proc_type_table,
-                                       _cpu_mapping_table);
-        } else {
-            /*Previous CPU resource based on calculation*/
-            std::ifstream cpuinfo("/proc/cpuinfo");
-            std::vector<int> processors;
-            std::map<int, int> sockets;
-            int socketId = 0;
-            while (!cpuinfo.eof()) {
-                std::string line;
-                std::getline(cpuinfo, line);
-                if (line.empty())
-                    continue;
-                auto delimeter = line.find(':');
-                auto key = line.substr(0, delimeter);
-                auto value = line.substr(delimeter + 1);
-                if (0 == key.find("processor")) {
-                    processors.emplace_back(std::stoi(value));
-                }
-                if (0 == key.find("physical id")) {
-                    socketId = std::stoi(value);
-                }
-                if (0 == key.find("cpu cores")) {
-                    sockets[socketId] = std::stoi(value);
-                }
+        }
+        return 0;
+    };
+
+    if (!GetCatchInfoLinux()) {
+        parse_processor_info_linux(cpu._processors,
+                                   system_info_table,
+                                   cpu._sockets,
+                                   cpu._cores,
+                                   cpu._proc_type_table,
+                                   cpu._cpu_mapping_table);
+    } else {
+        /*Previous CPU resource based on calculation*/
+        std::ifstream cpuinfo("/proc/cpuinfo");
+        std::vector<int> processors;
+        std::map<int, int> sockets;
+        int socketId = 0;
+        while (!cpuinfo.eof()) {
+            std::string line;
+            std::getline(cpuinfo, line);
+            if (line.empty())
+                continue;
+            auto delimeter = line.find(':');
+            auto key = line.substr(0, delimeter);
+            auto value = line.substr(delimeter + 1);
+            if (0 == key.find("processor")) {
+                processors.emplace_back(std::stoi(value));
             }
-            _processors = processors.size();
-            _sockets = sockets.size();
-            for (auto&& socket : sockets) {
-                _cores += socket.second;
+            if (0 == key.find("physical id")) {
+                socketId = std::stoi(value);
             }
-            if (_cores == 0) {
-                _cores = _processors;
+            if (0 == key.find("cpu cores")) {
+                sockets[socketId] = std::stoi(value);
             }
         }
-        std::vector<std::vector<std::string>>().swap(system_info_table);
+        cpu._processors = processors.size();
+        cpu._sockets = sockets.size();
+        for (auto&& socket : sockets) {
+            cpu._cores += socket.second;
+        }
+        if (cpu._cores == 0) {
+            cpu._cores = cpu._processors;
+        }
     }
-};
-static CPU cpu;
+    std::vector<std::vector<std::string>>().swap(system_info_table);
+}
 
 void parse_processor_info_linux(const int _processors,
                                 const std::vector<std::vector<std::string>> system_info_table,
@@ -242,42 +230,4 @@ void parse_processor_info_linux(const int _processors,
     }
 };
 
-#if !((OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO))
-std::vector<int> get_available_numa_nodes() {
-    std::vector<int> nodes((0 == cpu._sockets) ? 1 : cpu._sockets);
-    std::iota(std::begin(nodes), std::end(nodes), 0);
-    return nodes;
-}
-#endif
-int get_number_of_cpu_cores(bool bigCoresOnly) {
-    unsigned numberOfProcessors = cpu._processors;
-    unsigned totalNumberOfCpuCores = cpu._cores;
-    OPENVINO_ASSERT(totalNumberOfCpuCores != 0);
-    cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
-    CPU_ZERO(&currentCpuSet);
-    CPU_ZERO(&usedCoreSet);
-    CPU_ZERO(&currentCoreSet);
-
-    sched_getaffinity(0, sizeof(currentCpuSet), &currentCpuSet);
-
-    for (unsigned processorId = 0u; processorId < numberOfProcessors; processorId++) {
-        if (CPU_ISSET(processorId, &currentCpuSet)) {
-            unsigned coreId = processorId % totalNumberOfCpuCores;
-            if (!CPU_ISSET(coreId, &usedCoreSet)) {
-                CPU_SET(coreId, &usedCoreSet);
-                CPU_SET(processorId, &currentCoreSet);
-            }
-        }
-    }
-    int phys_cores = CPU_COUNT(&currentCoreSet);
-#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
-    auto core_types = custom::info::core_types();
-    if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
-        phys_cores = custom::info::default_concurrency(
-            custom::task_arena::constraints{}.set_core_type(core_types.back()).set_max_threads_per_core(1));
-    }
-#endif
-    return phys_cores;
-}
-
 }  // namespace ov
diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp
index 58fee804cc02df..b61985339cdc0a 100644
--- a/src/inference/src/os/win/win_system_conf.cpp
+++ b/src/inference/src/os/win/win_system_conf.cpp
@@ -17,33 +17,28 @@
 
 namespace ov {
 
-struct CPU {
-    int _processors = 0;
-    int _sockets = 0;
-    int _cores = 0;
-
-    std::vector<std::vector<int>> _proc_type_table;
-    std::vector<std::vector<int>> _cpu_mapping_table;
-
-    CPU() {
-        DWORD len = 0;
-        if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &len) ||
-            GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
-            return;
-        }
+void CPU::init_cpu(CPU& cpu) {
+    DWORD len = 0;
+    if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &len) || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        return;
+    }
 
-        std::shared_ptr<char> base_shared_ptr(new char[len]);
-        char* base_ptr = base_shared_ptr.get();
-        if (!GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)base_ptr, &len)) {
-            return;
-        }
+    std::shared_ptr<char> base_shared_ptr(new char[len]);
+    char* base_ptr = base_shared_ptr.get();
+    if (!GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)base_ptr, &len)) {
+        return;
+    }
 
-        _processors = GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
+    cpu._processors = GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
 
-        parse_processor_info_win(base_ptr, len, _processors, _sockets, _cores, _proc_type_table, _cpu_mapping_table);
-    }
-};
-static CPU cpu;
+    parse_processor_info_win(base_ptr,
+                             len,
+                             cpu._processors,
+                             cpu._sockets,
+                             cpu._cores,
+                             cpu._proc_type_table,
+                             cpu._cpu_mapping_table);
+}
 
 void parse_processor_info_win(const char* base_ptr,
                               const unsigned long len,
diff --git a/src/inference/src/streams_executor.hpp b/src/inference/src/streams_executor.hpp
index 4bea102dbceb63..a5210ca861dfd6 100644
--- a/src/inference/src/streams_executor.hpp
+++ b/src/inference/src/streams_executor.hpp
@@ -8,11 +8,29 @@
  */
 #pragma once
 
+#include <mutex>
 #include <string>
 #include <vector>
 
 namespace ov {
 
+struct CPU {
+    int _processors = 0;
+    int _sockets = 0;
+    int _cores = 0;
+    std::vector<std::vector<int>> _proc_type_table;
+    std::vector<std::vector<int>> _cpu_mapping_table;
+    std::mutex _cpu_mutex;
+    std::mutex _task_mutex;
+    int _plugin_status = PLUGIN_USED_START;
+    int _socket_idx = 0;
+
+    CPU() {
+        init_cpu(*this);
+    }
+    void init_cpu(CPU& cpu);
+};
+
 #ifdef __linux__
 /**
  * @brief      Parse processors infomation on Linux
@@ -31,6 +49,7 @@ void parse_processor_info_linux(const int _processors,
                                 int& _cores,
                                 std::vector<std::vector<int>>& _proc_type_table,
                                 std::vector<std::vector<int>>& _cpu_mapping_table);
+
 #endif
 
 #if (defined(_WIN32) || defined(_WIN64))
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
index 3de87e2e86960d..20e078d3d9347c 100644
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -6,10 +6,17 @@
 
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <numeric>
 #include <vector>
 
 #include "dev/threading/parallel_custom_arena.hpp"
+#include "ie_common.h"
 #include "openvino/core/visibility.hpp"
+#include "streams_executor.hpp"
 
 #define XBYAK_NO_OP_NAMES
 #define XBYAK_UNDEF_JNL
@@ -155,7 +162,161 @@ std::vector<int> get_available_numa_nodes() {
 int get_number_of_logical_cpu_cores(bool) {
     return parallel_get_max_threads();
 }
+std::vector<std::vector<int>> get_num_available_cpu_cores() {
+    return {{-1}};
+}
+bool is_cpu_map_available() {
+    return false;
+}
+std::vector<int> reserve_available_cpus(const ColumnOfProcessorTypeTable core_type,
+                                        const int num_cpus,
+                                        const int seek_status,
+                                        const int reset_status,
+                                        const bool reserve_logic_core) {
+    return {};
+}
+std::vector<int> get_logic_cores(const std::vector<int> cpu_ids) {
+    return {};
+}
+void set_cpu_used(std::vector<int>& cpu_ids, int used) {}
+
 #else
+
+static CPU cpu;
+
+#    ifndef _WIN32
+int get_number_of_cpu_cores(bool bigCoresOnly) {
+    unsigned numberOfProcessors = cpu._processors;
+    unsigned totalNumberOfCpuCores = cpu._cores;
+    IE_ASSERT(totalNumberOfCpuCores != 0);
+    cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
+    CPU_ZERO(&currentCpuSet);
+    CPU_ZERO(&usedCoreSet);
+    CPU_ZERO(&currentCoreSet);
+
+    sched_getaffinity(0, sizeof(currentCpuSet), &currentCpuSet);
+
+    for (unsigned processorId = 0u; processorId < numberOfProcessors; processorId++) {
+        if (CPU_ISSET(processorId, &currentCpuSet)) {
+            unsigned coreId = processorId % totalNumberOfCpuCores;
+            if (!CPU_ISSET(coreId, &usedCoreSet)) {
+                CPU_SET(coreId, &usedCoreSet);
+                CPU_SET(processorId, &currentCoreSet);
+            }
+        }
+    }
+    int phys_cores = CPU_COUNT(&currentCoreSet);
+#        if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
+    auto core_types = custom::info::core_types();
+    if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
+        phys_cores = custom::info::default_concurrency(
+            custom::task_arena::constraints{}.set_core_type(core_types.back()).set_max_threads_per_core(1));
+    }
+#        endif
+    return phys_cores;
+}
+
+#        if !((OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO))
+std::vector<int> get_available_numa_nodes() {
+    std::vector<int> nodes((0 == cpu._sockets) ? 1 : cpu._sockets);
+    std::iota(std::begin(nodes), std::end(nodes), 0);
+    return nodes;
+}
+#        endif
+#    endif
+
+std::vector<std::vector<int>> get_num_available_cpu_cores() {
+    return cpu._proc_type_table;
+}
+
+bool is_cpu_map_available() {
+    return cpu._cpu_mapping_table.size() > 0;
+}
+
+std::vector<int> reserve_available_cpus(const ColumnOfProcessorTypeTable core_type,
+                                        const int num_cpus,
+                                        const int seek_status,
+                                        const int reset_status,
+                                        const bool reserve_logic_core) {
+    std::lock_guard<std::mutex> lock{cpu._cpu_mutex};
+    std::vector<int> cpu_ids;
+    int socket = -1;
+    if (reset_status >= PLUGIN_USED_START && cpu._sockets > 1) {
+        socket = cpu._socket_idx;
+        cpu._socket_idx = (cpu._socket_idx + 1) % cpu._sockets;
+    }
+    if (core_type < PROC_TYPE_TABLE_SIZE && core_type >= ALL_PROC) {
+        for (int i = 0; i < cpu._processors; i++) {
+            if (cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE] == core_type &&
+                cpu._cpu_mapping_table[i][CPU_MAP_USED_FLAG] == seek_status &&
+                (socket < 0 || (socket >= 0 && cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] == socket))) {
+                cpu_ids.push_back(cpu._cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
+            }
+            if (static_cast<int>(cpu_ids.size()) == num_cpus) {
+                break;
+            }
+        }
+        if (reserve_logic_core) {
+            auto logic_ids = get_logic_cores(cpu_ids);
+            cpu_ids.insert(cpu_ids.end(), logic_ids.begin(), logic_ids.end());
+        }
+        set_cpu_used(cpu_ids, reset_status);
+    } else {
+        IE_THROW() << "Wrong value for core_type " << core_type;
+    }
+    return cpu_ids;
+}
+
+std::vector<int> get_logic_cores(const std::vector<int> cpu_ids) {
+    std::vector<int> logic_cores;
+    if (cpu._proc_type_table[0][HYPER_THREADING_PROC] > 0) {
+        int cpu_size = static_cast<int>(cpu_ids.size());
+        for (int i = 0; i < cpu._processors; i++) {
+            for (int j = 0; j < cpu_size; j++) {
+                if (cpu._cpu_mapping_table[i][CPU_MAP_CORE_ID] == cpu._cpu_mapping_table[cpu_ids[j]][CPU_MAP_CORE_ID] &&
+                    cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE] == HYPER_THREADING_PROC) {
+                    logic_cores.push_back(cpu._cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
+                }
+            }
+            if (cpu_ids.size() == logic_cores.size()) {
+                break;
+            }
+        }
+    }
+
+    return logic_cores;
+}
+
+void set_cpu_used(std::vector<int>& cpu_ids, int used) {
+    const auto cpu_size = static_cast<int>(cpu_ids.size());
+    for (int i = 0; i < cpu_size; i++) {
+        if (cpu_ids[i] < cpu._processors) {
+            cpu._cpu_mapping_table[cpu_ids[i]][CPU_MAP_USED_FLAG] = used;
+        }
+    }
+    // update _proc_type_table
+    if (used == NOT_USED || used >= PLUGIN_USED_START) {
+        std::vector<int> all_table;
+        int start = cpu._sockets > 1 ? 1 : 0;
+        if (is_cpu_map_available()) {
+            cpu._proc_type_table.assign(cpu._proc_type_table.size(), std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
+            all_table.resize(PROC_TYPE_TABLE_SIZE, 0);
+            for (int i = 0; i < cpu._processors; i++) {
+                if (cpu._cpu_mapping_table[i][CPU_MAP_USED_FLAG] < PLUGIN_USED_START) {
+                    cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start]
+                                        [cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
+                    cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start][ALL_PROC]++;
+                    all_table[cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
+                    all_table[ALL_PROC]++;
+                }
+            }
+            if (cpu._sockets > 1) {
+                cpu._proc_type_table[0] = all_table;
+            }
+        }
+    }
+}
+
 int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
     int logical_cores = parallel_get_max_threads();
 #    if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
diff --git a/src/inference/src/threading/ie_thread_affinity.cpp b/src/inference/src/threading/ie_thread_affinity.cpp
index c0dc955cb30144..578775ac42ab57 100644
--- a/src/inference/src/threading/ie_thread_affinity.cpp
+++ b/src/inference/src/threading/ie_thread_affinity.cpp
@@ -16,8 +16,13 @@ void ReleaseProcessMask(cpu_set_t* mask) {
     ov::threading::release_process_mask(mask);
 }
 
-bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
-    return ov::threading::pin_thread_to_vacant_core(thrIdx, hyperthreads, ncores, procMask, cpuIdxOffset);
+bool PinThreadToVacantCore(int thrIdx,
+                           int hyperthreads,
+                           int ncores,
+                           const CpuSet& procMask,
+                           const std::vector<int>& cpu_ids,
+                           int cpuIdxOffset) {
+    return ov::threading::pin_thread_to_vacant_core(thrIdx, hyperthreads, ncores, procMask, cpu_ids, cpuIdxOffset);
 }
 bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
     return ov::threading::pin_current_thread_by_mask(ncores, procMask);
diff --git a/src/inference/src/threading/ie_thread_affinity.hpp b/src/inference/src/threading/ie_thread_affinity.hpp
index 9b8322ddc130c1..53fb8219cf4245 100644
--- a/src/inference/src/threading/ie_thread_affinity.hpp
+++ b/src/inference/src/threading/ie_thread_affinity.hpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <tuple>
+#include <vector>
 
 #include "dev/threading/thread_affinity.hpp"
 
@@ -47,7 +48,12 @@ std::tuple<CpuSet, int> GetProcessMask();
  * @param[in]  processMask   The process mask
  * @return     `True` in case of success, `false` otherwise
  */
-bool PinThreadToVacantCore(int thrIdx, int hyperThreads, int ncores, const CpuSet& processMask, int cpuIdxOffset = 0);
+bool PinThreadToVacantCore(int thrIdx,
+                           int hyperThreads,
+                           int ncores,
+                           const CpuSet& processMask,
+                           const std::vector<int>& cpu_ids = {},
+                           int cpuIdxOffset = 0);
 
 /**
  * @brief      Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index e8edac810940cd..f4a2ee85634887 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -8,8 +8,12 @@
 #include <cstdio>
 #include <iostream>
 #include <numeric>
+#include <transformations/utils/utils.hpp>
 
+#include "graph.h"
 #include "ie_system_conf.h"
+#include "openvino/runtime/threading/istreams_executor.hpp"
+#include "performance_heuristics.hpp"
 #include "threading/ie_cpu_streams_info.hpp"
 
 using namespace InferenceEngine;
@@ -168,5 +172,133 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
         }
     }
 }
+
+int get_model_prefer_threads(const int num_streams,
+                             const std::vector<std::vector<int>> proc_type_table,
+                             const std::shared_ptr<ngraph::Function>& ngraphFunc,
+                             const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig) {
+    const int sockets = static_cast<int>(getAvailableNUMANodes().size());
+    auto model_prefer = 0;
+    // latency
+    if (num_streams <= sockets && num_streams > 0) {
+        if (streamExecutorConfig._threadBindingType == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
+            bool fp_intesive = !ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(ngraphFunc);
+            const int int8_threshold = 4;  // ~relative efficiency of the VNNI-intensive code for Big vs Little cores;
+            const int fp32_threshold = 2;  // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores;
+            // by default the latency case uses (faster) Big cores only, depending on the compute ratio
+            model_prefer = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] /
+                                                                 (fp_intesive ? fp32_threshold : int8_threshold))
+                               ? proc_type_table[0][MAIN_CORE_PROC]
+                               : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC];
+        }
+    } else { // throughput
+        const auto isa = dnnl::get_effective_cpu_isa();
+        float isaSpecificThreshold = 1.0f;
+        switch (isa) {
+        case dnnl::cpu_isa::sse41:
+            isaSpecificThreshold = 0.5f;
+            break;
+        case dnnl::cpu_isa::avx2:
+        case dnnl::cpu_isa::avx512_core:
+            isaSpecificThreshold = 1.0f;
+            break;
+        case dnnl::cpu_isa::avx512_core_vnni:
+        case dnnl::cpu_isa::avx2_vnni:
+            isaSpecificThreshold = 2.0f;
+            break;
+        case dnnl::cpu_isa::avx512_core_amx:
+            isaSpecificThreshold = 4.0f;
+            break;
+        default:
+            isaSpecificThreshold = 1.0f;
+        }
+        // the more "capable" the CPU in general, the more streams we may want to keep to keep it utilized
+        const float memThresholdAssumeLimitedForISA = ov::MemBandwidthPressure::LIMITED / isaSpecificThreshold;
+        const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */);
+        ov::MemBandwidthPressure networkToleranceForLowCache =
+            ov::MemBandwidthPressureTolerance(ngraphFunc, L2_cache_size, memThresholdAssumeLimitedForISA);
+        model_prefer = IStreamsExecutor::Config::StreamMode::DEFAULT;
+        if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
+            if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
+                (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
+                // all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
+                model_prefer = 1;
+            }  // otherwise (no recognized layers) falling back to the default value
+        } else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
+            // network is below the ISA-specific threshold
+            model_prefer = 1;
+        } else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
+            // network is below general threshold
+            model_prefer = 2;
+        }
+        if (model_prefer == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
+            model_prefer = 2;
+        }
+    }
+
+    return model_prefer;
+}
+
+StreamCfg parse_streams_table(std::vector<std::vector<int>> streams_table) {
+    StreamCfg streams_info = {0};
+    for (int i = 0; i < streams_table.size(); i++) {
+        if (streams_table[i][PROC_TYPE] == ALL_PROC) {
+            streams_info.num_streams = streams_table[i][NUMBER_OF_STREAMS];
+            streams_info.num_threads = streams_table[i][THREADS_PER_STREAM];
+        } else if (streams_table[i][PROC_TYPE] == MAIN_CORE_PROC) {
+            streams_info.big_core_streams = streams_table[i][NUMBER_OF_STREAMS];
+            streams_info.threads_per_stream_big = streams_table[i][THREADS_PER_STREAM];
+        } else if (streams_table[i][PROC_TYPE] == EFFICIENT_CORE_PROC) {
+            streams_info.small_core_streams = streams_table[i][NUMBER_OF_STREAMS];
+            streams_info.threads_per_stream_small = streams_table[i][THREADS_PER_STREAM];
+        } else if (streams_table[i][PROC_TYPE] == HYPER_THREADING_PROC) {
+            streams_info.big_core_logic_streams = streams_table[i][NUMBER_OF_STREAMS];
+            streams_info.threads_per_stream_big = streams_table[i][THREADS_PER_STREAM];
+        }
+    }
+    streams_info.num_streams =
+        streams_info.num_streams == 0
+            ? streams_info.big_core_streams + streams_info.small_core_streams + streams_info.big_core_logic_streams
+            : streams_info.num_streams;
+    streams_info.num_threads = streams_info.num_threads == 0
+                                   ? ((streams_info.big_core_streams + streams_info.big_core_logic_streams) *
+                                          streams_info.threads_per_stream_big +
+                                      streams_info.small_core_streams * streams_info.threads_per_stream_small)
+                                   : streams_info.num_threads;
+    return streams_info;
+}
+
+std::pair<std::string, StreamCfg> get_num_streams(
+    const int streams,
+    const int infer_requests,
+    const std::shared_ptr<ngraph::Function>& ngraphFunc,
+    const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig) {
+    const std::vector<std::vector<int>> proc_type_table = get_num_available_cpu_cores();
+    const int model_prefer = get_model_prefer_threads(streams, proc_type_table, ngraphFunc, streamExecutorConfig);
+    const std::vector<std::vector<int>> stream_info_table =
+        get_streams_info_table(streams, streamExecutorConfig._threads, infer_requests, model_prefer, proc_type_table);
+    StreamCfg streams_info = parse_streams_table(stream_info_table);
+
+    DEBUG_LOG(
+        "[ p_e_core_info ] streams (threads): ",
+        streams_info.num_streams,
+        "(",
+        streams_info.num_threads,
+        ") -- PCore: ",
+        streams_info.big_core_streams,
+        "(",
+        streams_info.threads_per_stream_big,
+        ") ",
+        streams_info.big_core_logic_streams,
+        "(",
+        streams_info.threads_per_stream_big,
+        ")  ECore: ",
+        streams_info.small_core_streams,
+        "(",
+        streams_info.threads_per_stream_small,
+        ")");
+
+    return std::pair<std::string, StreamCfg>(std::to_string(streams_info.num_streams), streams_info);
+}
 }  // namespace intel_cpu
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index eae73a0a3e8bf9..98d1de7a7a8b03 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -9,9 +9,12 @@
 
 #pragma once
 
+#include <memory>
 #include <vector>
 
+#include "graph.h"
 #include "openvino/runtime/properties.hpp"
+
 namespace ov {
 namespace intel_cpu {
 
@@ -42,5 +45,43 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table);
+/**
+ * @brief      Get model_prefer_threads
+ * @param[in]  num_streams is target streams set by user via NUM_STREAMS or hints.
+ *               - input "0" mean function generate the optimal number of streams
+ *               - LATENCY hint equals 1 stream.
+ * @param[in]  proc_type_table candidate processors available at this time
+ *               - candidate processors have benn updated based on properties like "Ecore only" in previous function
+ * @param[in]  ngraphFunc ngraph function
+ * @return     model_prefer_threads "0" means generating the optimal threads per stream based on platform
+ */
+int get_model_prefer_threads(const int num_streams,
+                             const std::vector<std::vector<int>> proc_type_table,
+                             const std::shared_ptr<ngraph::Function>& ngraphFunc,
+                             const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig);
+
+struct StreamCfg {
+    int num_streams;               // Number of streams
+    int num_threads;               // Number of threads
+    int big_core_streams;          // Number of streams in Performance-core(big core)
+    int big_core_logic_streams;    // Number of streams in Performance logical core(big core)
+    int small_core_streams;        // Number of streams in Efficient-core(small core)
+    int threads_per_stream_big;    // Threads per stream in big cores
+    int threads_per_stream_small;  // Threads per stream in small cores
+    int small_core_offset;
+};
+
+/**
+ * @brief      Parse streams table to StreamCfg
+ * @param[in]  streams_table summary table of streams info will be used by StreamsExecutor
+ * @return     Streams info
+ */
+StreamCfg parse_streams_table(std::vector<std::vector<int>> streams_table);
+
+std::pair<std::string, StreamCfg> get_num_streams(const int streams,
+                                                  const int infer_requests,
+                                                  const std::shared_ptr<ngraph::Function>& ngraphFunc,
+                                                  const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig);
+
 }  // namespace intel_cpu
 }  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index 07c2f139b0c24a..3617829e86bba4 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -99,7 +99,11 @@ ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network,
         // special case when all InferRequests are muxed into a single queue
         _taskExecutor = _plugin->executorManager()->getExecutor("CPU");
     } else {
-        auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel);
+        auto streamsExecutorConfig =
+            is_cpu_map_available()
+                ? _cfg.streamExecutorConfig
+                : InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig,
+                                                                                      isFloatModel);
         streamsExecutorConfig._name = "CPUStreamsExecutor";
         _cfg.streamExecutorConfig._threads = streamsExecutorConfig._threads;
 #if FIX_62820 && (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index daaae2ecf933b6..1cb154e5c48975 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -16,8 +16,10 @@
 #include "ie_icore.hpp"
 #include "ie_plugin_config.hpp"
 #include "ie_system_conf.h"
+#include "threading/ie_cpu_streams_info.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 
+#include <transformations/utils/utils.hpp>
 #include <ie_ngraph_utils.hpp>
 
 #include "performance_heuristics.hpp"
@@ -188,13 +190,13 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
                                                    engConfig.streamExecutorConfig._enable_hyper_thread);
         auto streams_info = default_streams;
         if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
-            if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL)
-                || (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
+            if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
+                (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
                 // all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
                 streams_info = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType,
                                              IStreamsExecutor::Config::StreamMode::AGGRESSIVE,
                                              engConfig.streamExecutorConfig._enable_hyper_thread);
-            }   // otherwise (no recognized layers) falling back to the default value
+            }  //  otherwise (no recognized layers) falling back to the default value
         } else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
             // network is below the ISA-specific threshold
             streams_info = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType,
@@ -216,7 +218,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
             streams_info.num_streams =
                 std::min(streams_info.num_streams, engConfig.perfHintsConfig.ovPerfHintNumRequests);
         }
-        return std::pair<std::string, Engine::StreamCfg>(std::to_string(streams_info.num_streams), streams_info);
+        return std::pair<std::string, StreamCfg>(std::to_string(streams_info.num_streams), streams_info);
     };
 
     auto getPerfHintName = [&]() {
@@ -260,19 +262,73 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
         config[ov::num_streams.name()] = tput_hints.first;
         config[CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)] = std::to_string(tput_hints.second.big_core_streams);
         config[CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS)] = std::to_string(tput_hints.second.small_core_streams);
-        config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG)] = std::to_string(tput_hints.second.threads_per_stream_big);
+        config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG)] =
+            std::to_string(tput_hints.second.threads_per_stream_big);
         config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_SMALL)] =
             std::to_string(tput_hints.second.threads_per_stream_small);
         config[CONFIG_KEY_INTERNAL(SMALL_CORE_OFFSET)] = std::to_string(tput_hints.second.small_core_offset);
     }
 }
 
-Engine::StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBindingType thread_binding_type,
+void Engine::GetPerformanceStreams(std::map<std::string, std::string>& config,
+                                   const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
+    auto getPerfHintName = [&]() {
+        const bool streamsExplicitlySetForModel = streamsSet(config);
+        // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via
+        // SetConfig)
+        if (streamsExplicitlySetForModel || streamsExplicitlySetForEngine)
+            return std::string();
+
+        const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT));
+        // the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
+        if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
+            return std::string();
+        /* performance hints set for network has higher pririty than engine ones.
+         * This applies for all the configuration parameters */
+        const auto perf_hint_name = (perf_hint != config.end())
+                                        ? PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second)
+                                        : engConfig.perfHintsConfig.ovPerfHint;
+        return perf_hint_name;
+    };
+
+    const auto perf_hint_name = getPerfHintName();
+    int streams = engConfig.streamExecutorConfig._streams;
+    if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
+        streams = static_cast<int>(getAvailableNUMANodes().size());
+    } else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
+        streams = 0;
+    } else if (perf_hint_name.empty()) {
+        streams = streams > 1 ? streams : (engConfig.streamExecutorConfig._set_streams ? streams : 0);
+    }
+
+    auto num_requests = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
+    int infer_requests = 0;
+    if (num_requests != config.end()) {  // arrived with config to the LoadNetwork (and thus higher pri)
+        infer_requests = PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second);
+    } else if (engConfig.perfHintsConfig.ovPerfHintNumRequests) {  // set thru SetConfig to the plugin, 2nd priority
+        infer_requests = engConfig.perfHintsConfig.ovPerfHintNumRequests;
+    }
+
+    const auto common_hints = get_num_streams(streams, infer_requests, ngraphFunc, engConfig.streamExecutorConfig);
+
+    config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = common_hints.first;
+    config[ov::num_streams.name()] = common_hints.first;
+    config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(common_hints.second.num_threads);
+    config[CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)] = std::to_string(common_hints.second.big_core_streams);
+    config[CONFIG_KEY_INTERNAL(BIG_CORE_LOGIC_STREAMS)] = std::to_string(common_hints.second.big_core_logic_streams);
+    config[CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS)] = std::to_string(common_hints.second.small_core_streams);
+    config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG)] = std::to_string(common_hints.second.threads_per_stream_big);
+    config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_SMALL)] =
+        std::to_string(common_hints.second.threads_per_stream_small);
+    config[CONFIG_KEY_INTERNAL(SMALL_CORE_OFFSET)] = std::to_string(common_hints.second.small_core_offset);
+}
+
+StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBindingType thread_binding_type,
                                         int stream_mode,
                                         const bool enable_hyper_thread) const {
     const int sockets = static_cast<int>(getAvailableNUMANodes().size());
     const int num_cores =
-        thread_binding_type == InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE
+        thread_binding_type == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE
             ? parallel_get_max_threads()
             : (sockets == 1 ? (enable_hyper_thread ? parallel_get_max_threads() : getNumberOfCPUCores())
                             : getNumberOfCPUCores());
@@ -282,9 +338,9 @@ Engine::StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::Threa
     const int num_big_cores = num_cores > num_cores_phy ? num_big_cores_phy * 2 : num_big_cores_phy;
     StreamCfg stream_cfg = {0};
 
-    if (stream_mode == DEFAULT) {
+    if (stream_mode == IStreamsExecutor::Config::StreamMode::DEFAULT) {
         // bare minimum of streams (that evenly divides available number of core)
-        if (thread_binding_type == InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
+        if (thread_binding_type == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
             if (0 == num_big_cores_phy % 4) {
                 stream_cfg.threads_per_stream_big = 4;
             } else if (0 == num_big_cores_phy % 5) {
@@ -319,8 +375,8 @@ Engine::StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::Threa
             else  // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
                 stream_cfg.num_streams = 1;
         }
-    } else if (stream_mode == AGGRESSIVE) {
-        if (thread_binding_type == InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
+    } else if (stream_mode == IStreamsExecutor::Config::StreamMode::AGGRESSIVE) {
+        if (thread_binding_type == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
             stream_cfg.big_core_streams = num_big_cores;
             stream_cfg.small_core_streams = num_small_cores;
             stream_cfg.threads_per_stream_big = num_big_cores / stream_cfg.big_core_streams;
@@ -329,8 +385,8 @@ Engine::StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::Threa
         } else {
             stream_cfg.num_streams = num_cores_phy;
         }
-    } else if (stream_mode == LESSAGGRESSIVE) {
-        if (thread_binding_type == InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
+    } else if (stream_mode == IStreamsExecutor::Config::StreamMode::LESSAGGRESSIVE) {
+        if (thread_binding_type == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE) {
             stream_cfg.big_core_streams = num_big_cores / 2;
             stream_cfg.small_core_streams = num_small_cores / 2;
             stream_cfg.threads_per_stream_big = num_big_cores / stream_cfg.big_core_streams;
@@ -426,7 +482,12 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
         }
     }
 
-    ApplyPerformanceHints(config, nGraphFunc);
+    if (is_cpu_map_available()) {
+        SetStreamtoConfig(config);
+        GetPerformanceStreams(config, nGraphFunc);
+    } else {
+        ApplyPerformanceHints(config, nGraphFunc);
+    }
     transformations.CpuSpecificOpSet();
 
     DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_"));
@@ -461,6 +522,15 @@ void Engine::SetConfig(const std::map<std::string, std::string> &config) {
     engConfig.readProperties(config);
 }
 
+void Engine::SetStreamtoConfig(const std::map<std::string, std::string>& config) {
+    auto set_enable = config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
+                      config.count(ov::num_streams.name()) || config.count(CONFIG_KEY(CPU_THREADS_NUM)) ||
+                      config.count(ov::inference_num_threads.name());
+    if (set_enable && is_cpu_map_available()) {
+        engConfig.readProperties(config);
+    }
+}
+
 bool Engine::isLegacyAPI() const {
     return !IsNewAPI();
 }
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index be7bf5ce00832d..ae20c40f2194c5 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "exec_network.h"
+#include "cpu_streams_calculation.hpp"
 
 #include <string>
 #include <map>
@@ -46,15 +47,10 @@ class Engine : public InferenceEngine::IInferencePlugin {
 
     void ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const;
 
-    struct StreamCfg {
-        int num_streams;
-        int big_core_streams;          // Number of streams in Performance-core(big core)
-        int small_core_streams;        // Number of streams in Efficient-core(small core)
-        int threads_per_stream_big;    // Threads per stream in big cores
-        int threads_per_stream_small;  // Threads per stream in small cores
-        int small_core_offset;
-    };
-    enum StreamMode { DEFAULT, AGGRESSIVE, LESSAGGRESSIVE };
+    void GetPerformanceStreams(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const;
+
+    void SetStreamtoConfig(const std::map<std::string, std::string> &config);
+
     StreamCfg GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBindingType thread_binding_type,
                             int stream_mode,
                             const bool enable_hyper_thread = true) const;

From 591c3e61c5b8395f875b8360f903f1fe0dd74325 Mon Sep 17 00:00:00 2001
From: Przemyslaw Wysocki <przemyslaw.wysocki@intel.com>
Date: Wed, 29 Mar 2023 12:27:27 +0200
Subject: [PATCH 158/296] [PyOV] Simplify requirement files (#15343)

* Partial progress

* Finish v1

* Cleanup

* Remove useless files

* Fix path to pdpd

* Fix onnx path

* Minor change

* Rework MO

* Minor change

* Remove some costraints

* Add MO constraints

* Update gitignore for MO

* Minor change

* Apply tech sync discussion

* Cleanup

* CR comment

* Debug ONNX FE

* simplify ONNX FE

* Update cmake

* Hardcode ONNX requirement

* Add dependency resolver to cmake

* Add constraints for openvino/tests

* Add missing pytest-html

* Fix -c path

* Revert debug changes to path

* Add cmake to copy constraints.txt

* Update dependabot

* Remove slash

* Remove cmake

* Debug prints

* Minor changes

* Move reqs check to separate file

* Add requirements parser to benchmark_tool

* Fix smoke tests constraints

* Minor fixes

* Minor change

* My fixes were apparently wrong

* Debug - self.executable_path

* Debug - add singledispatch to tests and tools

* Debug - print IE_APP_PATHs

* Revert "Debug - print IE_APP_PATHs"

This reverts commit 67ccb6d3f597b04bf92e94ddfb5912338acac5f0.

* Revert "Debug - add singledispatch to tests and tools"

This reverts commit 3b945931e2d1c1d96418b7e335d6c7469da6007c.

* Revert "Debug - self.executable_path"

This reverts commit 3aa724eff66869b17b0a245ee65c185ee46a8578.

* update dependabot

* update dependabot

* Skip benchmark_app tests

* Use CMAKE_CURRENT_BINARY_DIR in cmake

* Remove debug prints

* minor change

---------

Signed-off-by: p-wysocki <przemyslaw.wysocki@intel.com>
---
 .github/dependabot.yml                        | 104 +++++-------------
 .../check_python_requirements.py              |  52 +++++++++
 .../python_requirements.cmake                 |   6 +-
 src/bindings/python/constraints.txt           |  22 ++++
 src/bindings/python/requirements_test.txt     |  23 ++--
 src/frontends/onnx/tests/requirements.txt     |   7 +-
 src/frontends/paddle/tests/requirements.txt   |   5 +-
 .../tensorflow/tests/requirements.txt         |   3 +-
 .../tensorflow_lite/tests/requirements.txt    |   3 +-
 .../conditional_compilation/requirements.txt  |  11 +-
 tests/constraints.txt                         |  21 ++++
 tests/layer_tests/requirements.txt            |   3 +-
 .../memory_tests/test_runner/requirements.txt |  18 +--
 tests/samples_tests/CMakeLists.txt            |   8 +-
 .../smoke_tests/requirements.txt              |   3 +-
 .../smoke_tests/test_benchmark_app.py         |   2 +
 tests/stress_tests/scripts/requirements.txt   |  18 +--
 tests/time_tests/scripts/requirements.txt     |   3 +-
 tests/time_tests/test_runner/requirements.txt |  20 ++--
 tools/benchmark_tool/requirements.txt         |   3 +-
 tools/benchmark_tool/setup.py                 |  75 +++++++++++--
 tools/constraints.txt                         |  24 ++++
 tools/mo/requirements.txt                     |  10 +-
 tools/mo/requirements_caffe.txt               |  12 +-
 tools/mo/requirements_dev.txt                 |  22 ++--
 tools/mo/requirements_kaldi.txt               |  10 +-
 tools/mo/requirements_mxnet.txt               |  15 ++-
 tools/mo/requirements_onnx.txt                |  14 +--
 tools/mo/requirements_tf.txt                  |  12 +-
 tools/mo/requirements_tf2.txt                 |  12 +-
 tools/mo/setup.py                             |  80 ++++++++++++--
 tools/openvino_dev/requirements.txt           |   1 +
 tools/openvino_dev/setup.py                   |  73 ++++++++++--
 33 files changed, 480 insertions(+), 215 deletions(-)
 create mode 100644 cmake/developer_package/check_python_requirements.py
 create mode 100644 src/bindings/python/constraints.txt
 create mode 100644 tests/constraints.txt
 create mode 100644 tools/constraints.txt

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 137d5131cdcf66..74661a77af5794 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,7 +6,7 @@ updates:
   # Python product dependencies
   #
 
-  # Python API requirements
+  # Python API, Frontends
   - package-ecosystem: pip
     directory: "/src/bindings/python/"
     schedule:
@@ -17,97 +17,33 @@ updates:
     assignees:
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
-    versioning-strategy: increase-if-necessary
-
-  # Model Optimizer requirements
-  - package-ecosystem: pip
-    directory: "/tools/mo"
-    schedule:
-      interval: "daily"
-      time: "09:00"
-      timezone: "Asia/Dubai"
-    open-pull-requests-limit: 3
-    assignees:
-      - "rkazants"
-      - "andrei-kochin"
-      - "jiwaszki"
-      - "p-wysocki"
-    allow:
-      - dependency-name: "*"
-        dependency-type: "production"
-    versioning-strategy: increase-if-necessary
-
-  # POT requirements
-  - package-ecosystem: pip
-    directory: "/tools/pot"
-    schedule:
-      interval: "daily"
-      time: "09:00"
-      timezone: "Asia/Dubai"
-    open-pull-requests-limit: 3
-    assignees:
-      - "AlexKoff88"
-      - "KodiaqQ"
-      - "jiwaszki"
-      - "p-wysocki"
-      - "rkazants"
-    versioning-strategy: increase-if-necessary
-
-  # benchmark_tool requirements
-  - package-ecosystem: pip
-    directory: "/tools/benchmark_tool"
-    schedule:
-      interval: "daily"
-      time: "09:00"
-      timezone: "Asia/Dubai"
-    open-pull-requests-limit: 3
-    assignees:
-      - "Wovchena"
-      - "jiwaszki"
-      - "p-wysocki"
-      - "rkazants"
-    versioning-strategy: increase-if-necessary
-
-  #
-  # Tests requirements for frontends
-  #
-
-  # PaddlePaddle FE tests requirements
-  - package-ecosystem: pip
-    directory: "/src/frontends/paddle/tests/"
-    schedule:
-      interval: "daily"
-      time: "09:00"
-      timezone: "Asia/Shanghai"
-    open-pull-requests-limit: 3
-    assignees:
       - "ceciliapeng2011"
       - "meiyang-intel"
-      - "jiwaszki"
-      - "p-wysocki"
-      - "rkazants"
+      - "mbencer"
+      - "tomdol"
+      - "jane-intel"
     versioning-strategy: increase-if-necessary
 
-  # ONNX FE tests requirements
+  # Tests
   - package-ecosystem: pip
-    directory: "/src/frontends/onnx/tests/"
+    directory: "/tests"
     schedule:
       interval: "daily"
       time: "09:00"
       timezone: "Poland"
     open-pull-requests-limit: 3
     assignees:
-      - "mbencer"
-      - "tomdol"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
     versioning-strategy: increase-if-necessary
 
-  # TensorFlow FE tests requirements
+  # Model Optimizer, openvino_dev and Benchmark tool
   - package-ecosystem: pip
-    directory: "/src/frontends/tensorflow/tests/"
+    directory: "/tools"
     schedule:
       interval: "daily"
       time: "09:00"
@@ -115,23 +51,31 @@ updates:
     open-pull-requests-limit: 3
     assignees:
       - "rkazants"
+      - "andrei-kochin"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
+      - "Wovchena"
+    allow:
+      - dependency-name: "*"
+        dependency-type: "production"
     versioning-strategy: increase-if-necessary
 
-  # TensorFlow Lite FE tests requirements
+  # POT requirements
   - package-ecosystem: pip
-    directory: "/src/frontends/tensorflow_lite/tests/"
+    directory: "/tools/pot"
     schedule:
       interval: "daily"
       time: "09:00"
       timezone: "Asia/Dubai"
     open-pull-requests-limit: 3
     assignees:
-      - "jane-intel"
-      - "rkazants"
+      - "AlexKoff88"
+      - "KodiaqQ"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
+      - "rkazants"
     versioning-strategy: increase-if-necessary
 
   #
@@ -149,6 +93,7 @@ updates:
       - "Wovchena"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
     versioning-strategy: increase-if-necessary
 
@@ -163,6 +108,7 @@ updates:
       - "Wovchena"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
     versioning-strategy: increase-if-necessary
 
@@ -177,6 +123,7 @@ updates:
       - "Wovchena"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
     versioning-strategy: increase-if-necessary
 
@@ -191,6 +138,7 @@ updates:
       - "Wovchena"
       - "jiwaszki"
       - "p-wysocki"
+      - "akuporos"
       - "rkazants"
     versioning-strategy: increase-if-necessary
 
diff --git a/cmake/developer_package/check_python_requirements.py b/cmake/developer_package/check_python_requirements.py
new file mode 100644
index 00000000000000..8db944ee6e4888
--- /dev/null
+++ b/cmake/developer_package/check_python_requirements.py
@@ -0,0 +1,52 @@
+import pkg_resources
+import re
+import os
+
+
+def check_python_requirements(requirements_path: str) -> None:
+    """
+    Checks if the requirements defined in `requirements_path` are installed
+    in the active Python environment, while also taking constraints.txt files
+    into account.
+    """
+
+    constraints = {}
+    constraints_path = None
+    requirements = []
+
+    # read requirements and find constraints file
+    with open(requirements_path) as f:
+        raw_requirements = f.readlines()
+    for line in raw_requirements:
+        if line.startswith("-c"):
+            constraints_path = os.path.join(os.path.dirname(requirements_path), line.split(' ')[1][:-1])
+
+    # read constraints if they exist
+    if constraints_path:
+        with open(constraints_path) as f:
+            raw_constraints = f.readlines()
+        for line in raw_constraints:
+            if line.startswith("#") or line=="\n":
+                continue
+            line = line.replace("\n", "")
+            package, delimiter, constraint = re.split("(~|=|<|>|;)", line, maxsplit=1)
+            if constraints.get(package) is None:
+                constraints[package] = [delimiter + constraint]
+            else:
+                constraints[package].extend([delimiter + constraint])
+        for line in raw_requirements:
+            if line.startswith(("#", "-c")):
+                continue
+            line = line.replace("\n", "")
+            if re.search("\W", line):
+                requirements.append(line)
+            else:
+                constraint = constraints.get(line)
+                if constraint:
+                    for marker in constraint: 
+                        requirements.append(line+marker)
+                else:
+                    requirements.append(line)
+    else:
+        requirements = raw_requirements
+    pkg_resources.require(requirements)
diff --git a/cmake/developer_package/python_requirements.cmake b/cmake/developer_package/python_requirements.cmake
index d187669d6ffbe9..3f5dcd6bd0d5dd 100644
--- a/cmake/developer_package/python_requirements.cmake
+++ b/cmake/developer_package/python_requirements.cmake
@@ -97,7 +97,11 @@ function(ov_check_pip_packages)
 
     if(PYTHONINTERP_FOUND)
         execute_process(
-            COMMAND ${PYTHON_EXECUTABLE} -c "import pkg_resources ; pkg_resources.require(open('${ARG_REQUIREMENTS_FILE}', mode='r'))"
+            COMMAND ${PYTHON_EXECUTABLE} -c "
+from check_python_requirements import check_python_requirements ;
+check_python_requirements('${ARG_REQUIREMENTS_FILE}') ;
+            "
+            WORKING_DIRECTORY "${IEDevScripts_DIR}"
             RESULT_VARIABLE EXIT_CODE
             OUTPUT_VARIABLE OUTPUT_TEXT
             ERROR_VARIABLE ERROR_TEXT)
diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt
new file mode 100644
index 00000000000000..8586435dc593e9
--- /dev/null
+++ b/src/bindings/python/constraints.txt
@@ -0,0 +1,22 @@
+# used in multiple components
+onnx==1.13.1  # Python bindings, ONNX Frontend
+
+# pytest
+pytest==7.2.0; python_version >= '3.10'
+pytest>=5.0,<=7.0.1; python_version < '3.10'
+pytest-dependency==0.5.1
+pytest-forked; platform_system != "Windows"
+pytest-html==1.19.0
+pytest-timeout==2.0.1
+
+# Python bindings
+py>=1.9.0
+pygments>=2.8.1
+setuptools>=53.0.0
+wheel>=0.38.1
+
+# Frontends
+docopt~=0.6.2
+paddlepaddle==2.4.2
+tensorflow>=1.15.5,<2.13.0
+six~=1.16.0
\ No newline at end of file
diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt
index d8fc77f469cdac..e056fa13852952 100644
--- a/src/bindings/python/requirements_test.txt
+++ b/src/bindings/python/requirements_test.txt
@@ -1,3 +1,7 @@
+-c ./constraints.txt
+numpy>=1.16.6,<=1.23.4
+protobuf~=3.20.3
+onnx
 bandit
 black
 flake8
@@ -14,8 +18,7 @@ flake8-expression-complexity
 flake8-print
 flake8-pytest-style
 flake8-rst-docstrings
-# Force version of pygments from flake8-rst-docstrings
-pygments>=2.8.1
+pygments
 flake8-string-format
 flake8-variables-names
 flake8_builtins
@@ -24,20 +27,16 @@ flake8_commas
 flake8_pep3101
 flake8_quotes
 mypy
-onnx==1.13.1
 Pep8-naming
 pydocstyle
-pytest-forked; platform_system != "Windows"
+pytest-forked
 pytest-xdist
-pytest>=5.0,<=7.0.1; python_version < '3.10'
-pytest==7.2.1; python_version >= '3.10'
-pytest-html==1.19.0
-py>=1.9.0
+pytest-html
+pytest
+py
 radon
 retrying
 tox
 types-pkg_resources
-wheel>=0.38.1
-protobuf~=3.20.3
-numpy>=1.16.6,<=1.23.4
-singledispatchmethod; python_version<'3.8'
+wheel
+singledispatchmethod
diff --git a/src/frontends/onnx/tests/requirements.txt b/src/frontends/onnx/tests/requirements.txt
index e8e66c157fb1ad..e3d6ea192ad1de 100644
--- a/src/frontends/onnx/tests/requirements.txt
+++ b/src/frontends/onnx/tests/requirements.txt
@@ -1,4 +1,5 @@
 # ONNX - generate test models
-docopt~=0.6.2
-onnx==1.13.1
-protobuf>=3.18.1,<4.0.0
\ No newline at end of file
+-c ../../../bindings/python/constraints.txt
+protobuf>=3.18.1,<4.0.0
+docopt
+onnx
diff --git a/src/frontends/paddle/tests/requirements.txt b/src/frontends/paddle/tests/requirements.txt
index 8ad03b94a79985..8263f791981cd8 100644
--- a/src/frontends/paddle/tests/requirements.txt
+++ b/src/frontends/paddle/tests/requirements.txt
@@ -1,5 +1,6 @@
 # PaddlePaddle - generate test models
-paddlepaddle==2.4.2
+-c ../../../../src/bindings/python/constraints.txt
 numpy>=1.16.6,<1.25.0
-six~=1.16.0
 protobuf>=3.18.1,<4.0.0
+six
+paddlepaddle
diff --git a/src/frontends/tensorflow/tests/requirements.txt b/src/frontends/tensorflow/tests/requirements.txt
index 398e7ce8868327..2f8b23acc09f1f 100644
--- a/src/frontends/tensorflow/tests/requirements.txt
+++ b/src/frontends/tensorflow/tests/requirements.txt
@@ -1,2 +1,3 @@
+-c ../../../bindings/python/constraints.txt
 numpy>=1.16.6,<=1.23.4
-tensorflow>=1.15.5,<2.13.0
+tensorflow
diff --git a/src/frontends/tensorflow_lite/tests/requirements.txt b/src/frontends/tensorflow_lite/tests/requirements.txt
index 49b433c132ee5d..85cc64cbcc4f51 100644
--- a/src/frontends/tensorflow_lite/tests/requirements.txt
+++ b/src/frontends/tensorflow_lite/tests/requirements.txt
@@ -1,2 +1,3 @@
+-c ../../../bindings/python/constraints.txt
 numpy>=1.16.6,<1.25.0
-tensorflow>=1.15.5,<2.13.0
+tensorflow
diff --git a/tests/conditional_compilation/requirements.txt b/tests/conditional_compilation/requirements.txt
index e7a9a3e564ba08..e8518d0d38953e 100644
--- a/tests/conditional_compilation/requirements.txt
+++ b/tests/conditional_compilation/requirements.txt
@@ -1,6 +1,7 @@
-pytest>=6.2.1
-pytest-dependency==0.5.1
-py>=1.9.0
-PyYAML>=5.4.1
+-c ../constraints.txt
 numpy>=1.16.6
-pytest-html>=1.19.0
\ No newline at end of file
+pytest
+pytest-dependency
+pytest-html
+py
+PyYAML
diff --git a/tests/constraints.txt b/tests/constraints.txt
new file mode 100644
index 00000000000000..219731dc417cb0
--- /dev/null
+++ b/tests/constraints.txt
@@ -0,0 +1,21 @@
+attrs==22.1.0
+distro==1.8.0
+h5py>=3.1.0
+Jinja2>=2.11.2
+pandas>=1.3.5
+pymongo>=3.12.0
+PyYAML>=5.4.1
+scipy>=1.8; python_version >= '3.8'
+scipy~=1.7; python_version == '3.7'
+wheel>=0.38.1
+defusedxml>=0.7.1
+fastjsonschema~=2.15.1
+test-generator==0.1.1
+requests>=2.25.1
+opencv-python>=4.5
+py>=1.9.0
+pytest==7.2.0; python_version >= '3.10'
+pytest>=5.0,<=7.0.1; python_version < '3.10'
+pytest-dependency==0.5.1
+pytest-html==1.19.0
+pytest-timeout==2.0.1
diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt
index 1fa23c01b70c4c..6138c71a61e65f 100644
--- a/tests/layer_tests/requirements.txt
+++ b/tests/layer_tests/requirements.txt
@@ -1,4 +1,5 @@
-requests>=2.25.1
+-c ../constraints.txt
+requests
 numpy>=1.19.2
 torch
 torchvision
diff --git a/tests/memory_tests/test_runner/requirements.txt b/tests/memory_tests/test_runner/requirements.txt
index e65c5a97cbb92f..db36fb72f325fa 100644
--- a/tests/memory_tests/test_runner/requirements.txt
+++ b/tests/memory_tests/test_runner/requirements.txt
@@ -1,10 +1,10 @@
-pytest>=5.0,<=7.0.1; python_version < '3.10'
-pytest==7.2.0; python_version >= '3.10'
-py>=1.9.0
-PyYAML>=5.4.1
-jsonschema==3.2.0
-distro==1.8.0
+-c ../../constraints.txt
+pytest
+py
+PyYAML
+pymongo
+jsonschema
+distro
+pytest-html
+pytest-timeout
 numpy>=1.19.2
-pymongo>=3.12.0
-pytest-html>=1.19.0
-pytest-timeout==2.0.1
diff --git a/tests/samples_tests/CMakeLists.txt b/tests/samples_tests/CMakeLists.txt
index 7279ea123add76..d656709b459ae5 100644
--- a/tests/samples_tests/CMakeLists.txt
+++ b/tests/samples_tests/CMakeLists.txt
@@ -6,4 +6,10 @@ cmake_minimum_required(VERSION 3.13)
 
 project(samples_tests)
 
-install(DIRECTORY smoke_tests/ DESTINATION tests/smoke_tests COMPONENT tests EXCLUDE_FROM_ALL)
+install(DIRECTORY smoke_tests/ DESTINATION tests/smoke_tests COMPONENT tests EXCLUDE_FROM_ALL PATTERN "requirements.txt" EXCLUDE)
+install(FILES ../constraints.txt DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL)
+
+file(READ smoke_tests/requirements.txt REQUIREMENTS_REPO)
+string(REPLACE "-c ../../constraints.txt" "-c ../constraints.txt" REQUIREMENTS_TMP ${REQUIREMENTS_REPO})
+file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/smoke_tests/requirements.txt ${REQUIREMENTS_TMP})
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/smoke_tests/requirements.txt DESTINATION tests/smoke_tests COMPONENT tests EXCLUDE_FROM_ALL)
diff --git a/tests/samples_tests/smoke_tests/requirements.txt b/tests/samples_tests/smoke_tests/requirements.txt
index a92c0a4fca44cc..2181461187efa3 100644
--- a/tests/samples_tests/smoke_tests/requirements.txt
+++ b/tests/samples_tests/smoke_tests/requirements.txt
@@ -1,10 +1,11 @@
+-c ../../constraints.txt
 requests
 pyyaml
 wheel
 test-generator
 numpy
 pytest
-py>=1.9.0
+py
 scikit-build
 opencv-python
 progress
diff --git a/tests/samples_tests/smoke_tests/test_benchmark_app.py b/tests/samples_tests/smoke_tests/test_benchmark_app.py
index fa33d4a48d0e1c..bfe9f6230a1bed 100644
--- a/tests/samples_tests/smoke_tests/test_benchmark_app.py
+++ b/tests/samples_tests/smoke_tests/test_benchmark_app.py
@@ -51,10 +51,12 @@ def setup_class(cls):
         super().setup_class()
 
     @pytest.mark.parametrize("param", test_data_fp32_async)
+    @pytest.mark.skip("Ticket: 106850")
     def test_benchmark_app_sample_fp32_async(self, param):
         _check_output(self, param)
 
     @pytest.mark.parametrize("param", test_data_fp32_sync)
+    @pytest.mark.skip("Ticket: 106850")
     def test_benchmark_app_fp32_sync(self, param):
         _check_output(self, param)
 
diff --git a/tests/stress_tests/scripts/requirements.txt b/tests/stress_tests/scripts/requirements.txt
index f891375339718f..da7d6a5a51c8db 100644
--- a/tests/stress_tests/scripts/requirements.txt
+++ b/tests/stress_tests/scripts/requirements.txt
@@ -1,9 +1,9 @@
-pymongo>=3.12.0
-Jinja2>=2.11.2
-PyYAML>=5.4.1
-fastjsonschema~=2.15.1
-pandas>=1.3.5
-h5py>=3.1.0
-scipy~=1.7; python_version == '3.7'
-scipy>=1.8; python_version >= '3.8'
-defusedxml>=0.7.1
+-c ../../constraints.txt
+pymongo
+Jinja2
+PyYAML
+fastjsonschema
+pandas
+h5py
+scipy
+defusedxml
diff --git a/tests/time_tests/scripts/requirements.txt b/tests/time_tests/scripts/requirements.txt
index 373be65de35940..268e1cbd18c266 100644
--- a/tests/time_tests/scripts/requirements.txt
+++ b/tests/time_tests/scripts/requirements.txt
@@ -1 +1,2 @@
-PyYAML>=5.4.1
\ No newline at end of file
+-c ../../constraints.txt
+PyYAML
\ No newline at end of file
diff --git a/tests/time_tests/test_runner/requirements.txt b/tests/time_tests/test_runner/requirements.txt
index 049b37efb6ea63..4ade7443081a8b 100644
--- a/tests/time_tests/test_runner/requirements.txt
+++ b/tests/time_tests/test_runner/requirements.txt
@@ -1,11 +1,11 @@
-pytest>=5.0,<=7.0.1; python_version < '3.10'
-pytest==7.2.0; python_version >= '3.10'
-py>=1.9.0
-attrs==22.1.0
-PyYAML>=5.4.1
-jsonschema==3.2.0
-distro==1.8.0
+-c ../../constraints.txt
 numpy>=1.19.2
-pymongo>=3.12.0
-pytest-html>=1.19.0
-pytest-timeout==2.0.1
+pytest
+py
+attrs
+PyYAML
+jsonschema
+distro
+pymongo
+pytest-html
+pytest-timeout
diff --git a/tools/benchmark_tool/requirements.txt b/tools/benchmark_tool/requirements.txt
index 24a1329aa18145..690121c422363b 100644
--- a/tools/benchmark_tool/requirements.txt
+++ b/tools/benchmark_tool/requirements.txt
@@ -1,2 +1,3 @@
+-c ../constraints.txt
 numpy>=1.16.6
-opencv-python>=4.5
+opencv-python
diff --git a/tools/benchmark_tool/setup.py b/tools/benchmark_tool/setup.py
index 7a51dceb7a346c..4b5d3145e618b3 100644
--- a/tools/benchmark_tool/setup.py
+++ b/tools/benchmark_tool/setup.py
@@ -9,17 +9,78 @@
 $ python setup.py sdist bdist_wheel
 """
 import pkg_resources
+import re
 from setuptools import setup, find_packages
+from pathlib import Path
+from typing import List
+
 
 with open('README.md', 'r', encoding='utf-8') as f:
     long_description = f.read()
 
-with open('requirements.txt') as requirements_txt:
-    reqs = [
-        str(requirement)
-        for requirement
-        in pkg_resources.parse_requirements(requirements_txt)
-    ]
+
+def read_constraints(path: str='../constraints.txt') -> Dict[str, List[str]]:
+    """
+    Read a constraints.txt file and return a dict
+    of {package_name: [required_version_1, required_version_2]}.
+    The dict values are a list because a package can be mentioned
+    multiple times, for example:
+        mxnet~=1.2.0; sys_platform == 'win32'
+        mxnet>=1.7.0; sys_platform != 'win32'
+    """
+    constraints = {}
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_constraints = f.readlines()
+    for line in raw_constraints:
+        # skip comments
+        if line.startswith('#'):
+            continue
+        line = line.replace('\n', '')
+        # read constraints for that package
+        package, delimiter, constraint = re.split('(~|=|<|>|;)', line, maxsplit=1)
+        # if there is no entry for that package, add it
+        if constraints.get(package) is None:
+            constraints[package] = [delimiter + constraint]
+        # else add another entry for that package
+        else:
+            constraints[package].extend([delimiter + constraint])
+    return constraints
+
+
+def read_requirements(path: str) -> List[str]:
+    """
+    Read a requirements.txt file and return a list
+    of requirements. Three cases are supported, the
+    list corresponds to priority:
+    1. version specified in requirements.txt
+    2. version specified in constraints.txt
+    3. version unbound
+    """
+    requirements = []
+    constraints = read_constraints()
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_requirements = f.readlines()
+    for line in raw_requirements:
+        # skip comments and constraints link
+        if line.startswith(('#', '-c')):
+            continue
+        # get rid of newlines
+        line = line.replace('\n', '')
+        # if version is specified (non-word chars present)
+        if re.search('\W', line):
+            requirements.append(line)
+        # else get version from constraints
+        else:
+            constraint = constraints.get(line)
+            # if version found in constraints.txt
+            if constraint:
+                for marker in constraint:
+                    requirements.append(line+marker)
+            # else version is unbound
+            else:
+                requirements.append(line)
+    return requirements
+
 
 setup(
     name='benchmark_tool',
@@ -40,6 +101,6 @@
         'Operating System :: OS Independent',
     ],
     packages=find_packages(),
-    install_requires=reqs,
+    install_requires=read_requirements('requirements.txt'),
     python_requires='>=3.7',
 )
diff --git a/tools/constraints.txt b/tools/constraints.txt
new file mode 100644
index 00000000000000..988689cf0f8e37
--- /dev/null
+++ b/tools/constraints.txt
@@ -0,0 +1,24 @@
+# EXCEPTIONS
+# some package versions need to be specified in respective requirements.txt
+# files because the version differs between them:
+# tensorflow, numpy
+mxnet~=1.2.0; sys_platform == 'win32'
+mxnet>=1.7.0.post2,<=1.9.1; sys_platform != 'win32'
+onnx>=1.8.1,<=1.13.1
+networkx<=2.8.8
+pytest>=6.2.4; python_version < '3.10'
+pytest==7.2.0; python_version >= '3.10'
+protobuf>=3.18.1,<4.0.0
+defusedxml>=0.7.1
+requests>=2.25.1
+fastjsonschema>=2.15.1,<2.17
+coverage>=4.4.2,<=7.0.5
+astroid>=2.9.0
+pylint>=2.7.0
+pyenchant>=3.0.0
+test-generator==0.1.1
+py>=1.9.0
+urllib3>=1.26.4
+importlib-metadata; python_version < "3.8" and sys_platform == 'win32'
+openvino-telemetry>=2022.1.0
+opencv-python>=4.5
\ No newline at end of file
diff --git a/tools/mo/requirements.txt b/tools/mo/requirements.txt
index 36cb4f32eb7edc..c6e9e00c3508b9 100644
--- a/tools/mo/requirements.txt
+++ b/tools/mo/requirements.txt
@@ -1,6 +1,6 @@
+-c ../constraints.txt
 numpy>=1.16.6
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
-defusedxml>=0.7.1
-importlib-metadata; python_version < "3.8" and sys_platform == 'win32'
-openvino-telemetry>=2022.1.0
\ No newline at end of file
+networkx
+defusedxml
+importlib-metadata
+openvino-telemetry
\ No newline at end of file
diff --git a/tools/mo/requirements_caffe.txt b/tools/mo/requirements_caffe.txt
index 93a2e760ac5c4b..3d8b0480a43602 100644
--- a/tools/mo/requirements_caffe.txt
+++ b/tools/mo/requirements_caffe.txt
@@ -1,7 +1,7 @@
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
+-c ../constraints.txt
 numpy>=1.16.6,<1.25.0
-protobuf>=3.18.1,<4.0.0
-defusedxml>=0.7.1
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
+networkx
+protobuf
+defusedxml
+requests
+fastjsonschema
\ No newline at end of file
diff --git a/tools/mo/requirements_dev.txt b/tools/mo/requirements_dev.txt
index 542e2e86cb9acb..2dff4a760c799d 100644
--- a/tools/mo/requirements_dev.txt
+++ b/tools/mo/requirements_dev.txt
@@ -1,11 +1,11 @@
-coverage>=4.4.2,<=7.0.5
-astroid>=2.9.0
-pylint>=2.7.0
-pyenchant>=3.0.0
-test-generator==0.1.1
-defusedxml>=0.5.0
-requests>=2.20.0
-pytest>=6.2.4; python_version < '3.10'
-pytest==7.2.0; python_version >= '3.10'
-py>=1.9.0
-fastjsonschema>=2.15.1,<2.17
+-c ../constraints.txt
+coverage
+astroid
+pylint
+pyenchant
+test-generator
+defusedxml
+requests
+pytest
+py
+fastjsonschema
\ No newline at end of file
diff --git a/tools/mo/requirements_kaldi.txt b/tools/mo/requirements_kaldi.txt
index 1973fc24acd5b0..6ef5aeaf13fc56 100644
--- a/tools/mo/requirements_kaldi.txt
+++ b/tools/mo/requirements_kaldi.txt
@@ -1,6 +1,6 @@
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
+-c ../constraints.txt
 numpy>=1.16.6,<1.25.0
-defusedxml>=0.7.1
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
+networkx
+defusedxml
+requests
+fastjsonschema
\ No newline at end of file
diff --git a/tools/mo/requirements_mxnet.txt b/tools/mo/requirements_mxnet.txt
index f9ba6982ff64d3..8df37622cc2135 100644
--- a/tools/mo/requirements_mxnet.txt
+++ b/tools/mo/requirements_mxnet.txt
@@ -1,9 +1,8 @@
-mxnet~=1.2.0; sys_platform == 'win32'
-mxnet>=1.7.0.post2,<=1.9.1; sys_platform != 'win32'
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
+-c ../constraints.txt
 numpy>=1.16.6,<1.25.0
-defusedxml>=0.7.1
-urllib3>=1.26.4
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
+mxnet
+networkx
+defusedxml
+urllib3
+requests
+fastjsonschema
\ No newline at end of file
diff --git a/tools/mo/requirements_onnx.txt b/tools/mo/requirements_onnx.txt
index a7223c85951aa9..178e18ac85e15f 100644
--- a/tools/mo/requirements_onnx.txt
+++ b/tools/mo/requirements_onnx.txt
@@ -1,8 +1,8 @@
-onnx>=1.8.1,<=1.13.1
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
+-c ../constraints.txt
 numpy>=1.16.6,<1.25.0
-defusedxml>=0.7.1
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
-protobuf>=3.18.1,<4.0.0
+onnx
+networkx
+defusedxml
+requests
+fastjsonschema
+protobuf
\ No newline at end of file
diff --git a/tools/mo/requirements_tf.txt b/tools/mo/requirements_tf.txt
index 4c8329c8192dd4..a6ab68f6564d02 100644
--- a/tools/mo/requirements_tf.txt
+++ b/tools/mo/requirements_tf.txt
@@ -1,7 +1,7 @@
-numpy>=1.16.6,<1.25.0
+-c ../constraints.txt
 tensorflow>=1.15.5,<2.13.0
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
-defusedxml>=0.7.1
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
\ No newline at end of file
+numpy>=1.16.6,<1.25.0
+networkx
+defusedxml
+requests
+fastjsonschema
diff --git a/tools/mo/requirements_tf2.txt b/tools/mo/requirements_tf2.txt
index b77a4ae3365dc6..562e77e06f2646 100644
--- a/tools/mo/requirements_tf2.txt
+++ b/tools/mo/requirements_tf2.txt
@@ -1,7 +1,7 @@
-numpy>=1.16.6,<1.25.0
+-c ../constraints.txt
 tensorflow>=2.5,<2.13.0
-networkx~=2.5; python_version <= "3.6"
-networkx<=2.8.8; python_version > "3.6"
-defusedxml>=0.7.1
-requests>=2.25.1
-fastjsonschema>=2.15.1,<2.17
\ No newline at end of file
+numpy>=1.16.6,<1.25.0
+networkx
+defusedxml
+requests
+fastjsonschema
diff --git a/tools/mo/setup.py b/tools/mo/setup.py
index 3c3d01bfe883b0..d670e2c7cec4fc 100644
--- a/tools/mo/setup.py
+++ b/tools/mo/setup.py
@@ -11,7 +11,6 @@
 
 import os
 import re
-import sys
 from pathlib import Path
 from shutil import copyfile, copy
 
@@ -19,12 +18,73 @@
 from setuptools.command.build_py import build_py
 from setuptools.command.install import install
 
+from typing import Dict, List
+
 prefix = 'openvino/tools/mo/'
 SETUP_DIR = Path(__file__).resolve().parent / Path(prefix)
 
 
-def read_text(path):
-    return (Path(__file__).resolve().parent / path).read_text()
+def read_constraints(path: str='../constraints.txt') -> Dict[str, List[str]]:
+    """
+    Read a constraints.txt file and return a dict
+    of {package_name: [required_version_1, required_version_2]}.
+    The dict values are a list because a package can be mentioned
+    multiple times, for example:
+        mxnet~=1.2.0; sys_platform == 'win32'
+        mxnet>=1.7.0; sys_platform != 'win32'
+    """
+    constraints = {}
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_constraints = f.readlines()
+    for line in raw_constraints:
+        # skip comments
+        if line.startswith('#'):
+            continue
+        line = line.replace('\n', '')
+        # read constraints for that package
+        package, delimiter, constraint = re.split('(~|=|<|>|;)', line, maxsplit=1)
+        # if there is no entry for that package, add it
+        if constraints.get(package) is None:
+            constraints[package] = [delimiter + constraint]
+        # else add another entry for that package
+        else:
+            constraints[package].extend([delimiter + constraint])
+    return constraints
+
+
+def read_requirements(path: str) -> List[str]:
+    """
+    Read a requirements.txt file and return a list
+    of requirements. Three cases are supported, the
+    list corresponds to priority:
+    1. version specified in requirements.txt
+    2. version specified in constraints.txt
+    3. version unbound
+    """
+    requirements = []
+    constraints = read_constraints()
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_requirements = f.readlines()
+    for line in raw_requirements:
+        # skip comments and constraints link
+        if line.startswith(('#', '-c')):
+            continue
+        # get rid of newlines
+        line = line.replace('\n', '')
+        # if version is specified (non-word chars present)
+        if re.search('\W', line):
+            requirements.append(line)
+        # else get version from constraints
+        else:
+            constraint = constraints.get(line)
+            # if version found in constraints.txt
+            if constraint:
+                for marker in constraint:
+                    requirements.append(line+marker)
+            # else version is unbound
+            else:
+                requirements.append(line)
+    return requirements
 
 
 # Detect all the framework specific requirements_*.txt files.
@@ -91,17 +151,17 @@ def find_package_modules(self, package, package_dir):
       'openvino.tools.mo.front.caffe': ['CustomLayersMapping.xml*']
     },
     extras_require={
-      'caffe': read_text('requirements_caffe.txt'),
-      'kaldi': read_text('requirements_kaldi.txt'),
-      'mxnet': read_text('requirements_mxnet.txt'),
-      'onnx': read_text('requirements_onnx.txt'),
-      'tensorflow': read_text('requirements_tf.txt'),
-      'tensorflow2': read_text('requirements_tf2.txt'),
+      'caffe': read_requirements('requirements_caffe.txt'),
+      'kaldi': read_requirements('requirements_kaldi.txt'),
+      'mxnet': read_requirements('requirements_mxnet.txt'),
+      'onnx': read_requirements('requirements_onnx.txt'),
+      'tensorflow': read_requirements('requirements_tf.txt'),
+      'tensorflow2': read_requirements('requirements_tf2.txt'),
     },
     classifiers=[
       "Programming Language :: Python :: 3",
       "License :: OSI Approved :: Apache Software License",
       "Operating System :: OS Independent",
     ],
-    install_requires=read_text('requirements.txt'),
+    install_requires=read_requirements('requirements.txt'),
 )
diff --git a/tools/openvino_dev/requirements.txt b/tools/openvino_dev/requirements.txt
index e69de29bb2d1d6..5153bff24e3440 100644
--- a/tools/openvino_dev/requirements.txt
+++ b/tools/openvino_dev/requirements.txt
@@ -0,0 +1 @@
+-c ../constraints.txt
\ No newline at end of file
diff --git a/tools/openvino_dev/setup.py b/tools/openvino_dev/setup.py
index e6f84abd86fd27..081c1e5e1d7d95 100644
--- a/tools/openvino_dev/setup.py
+++ b/tools/openvino_dev/setup.py
@@ -12,6 +12,7 @@
 import platform
 import subprocess  # nosec
 import shutil
+import re
 from distutils import log
 from distutils.command.build import build
 from distutils.command.clean import clean
@@ -20,6 +21,7 @@
 import pkg_resources
 from setuptools.command.install import install
 from setuptools import setup, find_namespace_packages
+from typing import Dict, List
 
 PYTHON_VERSION = f'python{sys.version_info.major}.{sys.version_info.minor}'
 SCRIPT_DIR = Path(__file__).resolve().parents[0]
@@ -198,12 +200,68 @@ def get_description(desc_file_path):
         description = fstream.read()
     return description
 
-with (SCRIPT_DIR / 'requirements.txt').open() as requirements:
-    install_reqs = [
-        str(requirement)
-        for requirement
-        in pkg_resources.parse_requirements(requirements)
-    ]
+
+def read_constraints(path: str='../constraints.txt') -> Dict[str, List[str]]:
+    """
+    Read a constraints.txt file and return a dict
+    of {package_name: [required_version_1, required_version_2]}.
+    The dict values are a list because a package can be mentioned
+    multiple times, for example:
+        mxnet~=1.2.0; sys_platform == 'win32'
+        mxnet>=1.7.0; sys_platform != 'win32'
+    """
+    constraints = {}
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_constraints = f.readlines()
+    for line in raw_constraints:
+        # skip comments
+        if line.startswith('#'):
+            continue
+        line = line.replace('\n', '')
+        # read constraints for that package
+        package, delimiter, constraint = re.split('(~|=|<|>|;)', line, maxsplit=1)
+        # if there is no entry for that package, add it
+        if constraints.get(package) is None:
+            constraints[package] = [delimiter + constraint]
+        # else add another entry for that package
+        else:
+            constraints[package].extend([delimiter + constraint])
+    return constraints
+
+
+def read_requirements(path: str) -> List[str]:
+    """
+    Read a requirements.txt file and return a list
+    of requirements. Three cases are supported, the
+    list corresponds to priority:
+    1. version specified in requirements.txt
+    2. version specified in constraints.txt
+    3. version unbound
+    """
+    requirements = []
+    constraints = read_constraints()
+    with open(Path(__file__).resolve().parent / path) as f:
+        raw_requirements = f.readlines()
+    for line in raw_requirements:
+        # skip comments and constraints link
+        if line.startswith(('#', '-c')):
+            continue
+        # get rid of newlines
+        line = line.replace('\n', '')
+        # if version is specified (non-word chars present)
+        if re.search('\W', line):
+            requirements.append(line)
+        # else get version from constraints
+        else:
+            constraint = constraints.get(line)
+            # if version found in constraints.txt
+            if constraint:
+                for marker in constraint:
+                    requirements.append(line+marker)
+            # else version is unbound
+            else:
+                requirements.append(line)
+    return requirements
 
 
 def concat_files(output_file, input_files):
@@ -214,7 +272,6 @@ def concat_files(output_file, input_files):
                 outfile.write(content)
     return output_file
 
-
 description_md = SCRIPT_DIR.parents[1] / 'docs' / 'install_guides' / 'pypi-openvino-dev.md'
 md_files = [description_md, SCRIPT_DIR.parents[1] / 'docs' / 'install_guides' / 'pre-release-note.md']
 docs_url = 'https://docs.openvino.ai/latest/index.html'
@@ -248,7 +305,7 @@ def concat_files(output_file, input_files):
     entry_points = {
         'console_scripts': [],
     },
-    install_requires=install_reqs,
+    install_requires=read_requirements(SCRIPT_DIR / 'requirements.txt'),
     packages=find_namespace_packages(where=str(SRC_DIR)),
     package_dir={'': str(SRC_DIR)},
 )

From f7e898893d9be15c2a68f5d945fa3c6294c70993 Mon Sep 17 00:00:00 2001
From: Tomasz Jankowski <tomasz1.jankowski@intel.com>
Date: Wed, 29 Mar 2023 13:32:57 +0200
Subject: [PATCH 159/296] Add PRelu fusion (#16617)

---
 .../common_optimizations/prelu_fusion.hpp     | 25 +++++++++++--
 .../common_optimizations/prelu_fusion.cpp     | 35 +++++++++++++++++++
 .../transformations/tests/prelu_fusion.cpp    | 25 +++++++++++++
 3 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp
index 7e4939a2c5b8b8..e9b090381ec7f8 100644
--- a/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp
+++ b/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp
@@ -18,6 +18,7 @@ class TRANSFORMATIONS_API PReluFusionNegativeSub;
 class TRANSFORMATIONS_API PReluFusionMultiplyAdd;
 class TRANSFORMATIONS_API PReluFusionMultiplySub;
 class TRANSFORMATIONS_API PReluFusionAbsSubMulMulAdd;
+class TRANSFORMATIONS_API PReluFusionNegReluMulAdd;
 
 }  // namespace pass
 }  // namespace ov
@@ -103,11 +104,11 @@ class ov::pass::PReluFusionMultiplySub : public ov::pass::MatcherPass {
 /**
  * @ingroup ie_transformation_common_api
  * @brief PReluFusionAbsSubMulMulAdd transformation replaces a sub-graph
- *            Op
+ *             Op
  *          /  |  \
  *        Relu |  Abs
  *         |    \  |
- *         |      Sub
+ *         |    Subtract
  *         |       |
  *         |    Multiply
  *         |       |
@@ -121,6 +122,25 @@ class ov::pass::PReluFusionAbsSubMulMulAdd : public ov::pass::MatcherPass {
     PReluFusionAbsSubMulMulAdd();
 };
 
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief PReluFusionNegReluMulAdd transformation replaces a sub-graph
+ *             Op
+ *          /     \
+ *        Relu  Negative
+ *         |       |
+ *         |      Relu
+ *         |       |
+ *         |    Multiply
+ *          \     /
+ *            Add
+ */
+class ov::pass::PReluFusionNegReluMulAdd : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("PReluFusionNegReluMulAdd", "0");
+    PReluFusionNegReluMulAdd();
+};
+
 /**
  * @ingroup ie_transformation_common_api
  * @brief PReluFusion transformation replaces various sub-graphs with a PRelu op.
@@ -134,5 +154,6 @@ class ov::pass::PReluFusion : public ov::pass::GraphRewrite {
         add_matcher<ov::pass::PReluFusionMultiplyAdd>();
         add_matcher<ov::pass::PReluFusionMultiplySub>();
         add_matcher<ov::pass::PReluFusionAbsSubMulMulAdd>();
+        add_matcher<ov::pass::PReluFusionNegReluMulAdd>();
     }
 };
diff --git a/src/common/transformations/src/transformations/common_optimizations/prelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/prelu_fusion.cpp
index 98474c2ad9ad18..4a2ed9729ec53d 100644
--- a/src/common/transformations/src/transformations/common_optimizations/prelu_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/prelu_fusion.cpp
@@ -201,3 +201,38 @@ ov::pass::PReluFusionAbsSubMulMulAdd::PReluFusionAbsSubMulMulAdd() {
     auto m = make_shared<pattern::Matcher>(add, matcher_name);
     register_matcher(m, callback);
 }
+
+ov::pass::PReluFusionNegReluMulAdd::PReluFusionNegReluMulAdd() {
+    MATCHER_SCOPE(PReluFusionNegReluMulAdd);
+
+    using namespace std;
+    using namespace ov;
+    using namespace ov::opset10;
+
+    const auto input = pass::pattern::any_input();
+    const auto relu_pos = pattern::wrap_type<Relu>({input});
+    const auto neg1 = pattern::wrap_type<Negative>({input});
+    const auto relu_neg = pattern::wrap_type<Relu>({neg1});
+    const auto mul_constant = pattern::wrap_type<Constant>();
+    const auto mul = pattern::wrap_type<Multiply>({relu_neg, mul_constant});
+    const auto add = pattern::wrap_type<Add>({relu_pos, mul});
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto& pattern_to_output = m.get_pattern_value_map();
+        const auto input_output = pattern_to_output.at(input);
+        const auto add_node = pattern_to_output.at(add).get_node_shared_ptr();
+        const auto slope = op::util::make_try_fold<Negative>(pattern_to_output.at(mul_constant));
+        const auto prelu = make_shared<PRelu>(input_output, slope);
+        prelu->set_friendly_name(m.get_match_root()->get_friendly_name());
+        NodeVector copy_from = {pattern_to_output.at(relu_pos).get_node_shared_ptr(),
+                                pattern_to_output.at(neg1).get_node_shared_ptr(),
+                                pattern_to_output.at(relu_neg).get_node_shared_ptr(),
+                                pattern_to_output.at(mul).get_node_shared_ptr(),
+                                pattern_to_output.at(add).get_node_shared_ptr()};
+        copy_runtime_info(copy_from, prelu);
+        replace_node(add_node, prelu);
+        return true;
+    };
+    auto matcher = make_shared<pattern::Matcher>(add, matcher_name);
+    register_matcher(matcher, callback);
+}
diff --git a/src/common/transformations/tests/prelu_fusion.cpp b/src/common/transformations/tests/prelu_fusion.cpp
index 3c2df005f3d0a2..ba878750cddff4 100644
--- a/src/common/transformations/tests/prelu_fusion.cpp
+++ b/src/common/transformations/tests/prelu_fusion.cpp
@@ -156,4 +156,29 @@ TEST_F(TransformationTestsF, PReluFusionAbsSubMulMulAdd) {
         const auto prelu = make_shared<PRelu>(data, prelu_const);
         function_ref = make_shared<Function>(NodeVector{prelu}, ParameterVector{data});
     }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, PReluFusionNegReluMulAdd) {
+    using namespace std;
+    using namespace ov::opset10;
+    {
+        const auto data = make_shared<Parameter>(element::f32, Shape{2, 12});
+        const auto relu_pos = make_shared<Relu>(data);
+        const auto neg = make_shared<Negative>(data);
+        const auto relu_neg = make_shared<Relu>(neg);
+        const auto mul_const = Constant::create(element::f32, Shape{1}, {0.235});
+        const auto mul = make_shared<Multiply>(relu_neg, mul_const);
+        const auto add = make_shared<Add>(relu_pos, mul);
+        function = make_shared<Function>(NodeVector{add}, ParameterVector{data});
+
+        manager.register_pass<ov::pass::PReluFusion>();
+    }
+    {
+        const auto data = make_shared<Parameter>(element::f32, Shape{2, 12});
+        const auto prelu_const = Constant::create(element::f32, Shape{1}, {-0.235});
+        const auto prelu = make_shared<PRelu>(data, prelu_const);
+        function_ref = make_shared<Function>(NodeVector{prelu}, ParameterVector{data});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
 }

From 0c2308506fdf8fc616beeb10ed8977a09bc6071d Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 29 Mar 2023 16:28:37 +0400
Subject: [PATCH 160/296] [TF FE] Fix leftovers from review (#16619)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp | 1 +
 tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
index a38b6031e52609..b193aaaef52010 100644
--- a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
+++ b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp
@@ -22,6 +22,7 @@ class GraphIteratorProtoTxt : public GraphIteratorProto {
         std::ifstream pbtxt_stream(path, std::ios::in);
         FRONT_END_GENERAL_CHECK(pbtxt_stream && pbtxt_stream.is_open(), "Model file does not exist");
         auto input_stream = std::make_shared<::google::protobuf::io::IstreamInputStream>(&pbtxt_stream);
+        FRONT_END_GENERAL_CHECK(input_stream, "Model cannot be read");
         auto is_parsed = ::google::protobuf::TextFormat::Parse(input_stream.get(), m_graph_def.get());
         FRONT_END_GENERAL_CHECK(
             is_parsed,
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
index bf0c2ef7bc3b85..d981b2997542b5 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py
@@ -23,6 +23,7 @@ def create_bucketize_net(self, input_shape, input_type, boundaries_size):
         with tf.compat.v1.Session() as sess:
             input = tf.compat.v1.placeholder(input_type, input_shape, 'input')
             # generate boundaries list
+            # use wider range for boundaries than input data in order to cover all bucket indices cases
             boundaries = np.sort(np.unique(np.random.randint(-200, 200, [boundaries_size]).astype(np.float32))).tolist()
             tf.raw_ops.Bucketize(input=input, boundaries=boundaries)
             tf.compat.v1.global_variables_initializer()

From a9360f804590ae407cb8f2cce240ab4fe2bd67e9 Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Wed, 29 Mar 2023 13:31:30 +0100
Subject: [PATCH 161/296] [CPU] Element-wise precision selection fix (#16547)

---
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   | 274 ++++++++++--------
 src/plugins/intel_cpu/src/nodes/eltwise.h     |  10 +
 .../tests/unit/nodes/eltwise_node_test.cpp    |  45 +++
 3 files changed, 209 insertions(+), 120 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp

diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index 5bc46c00b40b7e..ed6db22a6455c7 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -5,6 +5,9 @@
 
 #include "eltwise.h"
 
+#include <map>
+#include <set>
+
 #include <ie_parallel.hpp>
 
 #include "cpu_types.h"
@@ -157,8 +160,158 @@ class EltwiseShapeInferFactory : public ShapeInferFactory {
     }
 };
 
+void set_intersection(const std::set<std::vector<element::Type>>& precisions1,
+                      const std::set<std::vector<element::Type>>& precisions2,
+                      std::set<std::vector<element::Type>>& intersection) {
+    std::map<element::Type, size_t> intersection_types;
+
+    for (auto it1 = precisions1.begin(); it1 != precisions1.end(); ++it1) {
+        for (auto it2 = precisions2.begin(); it2 != precisions2.end(); ++it2) {
+            const auto& it1_precisions = *it1;
+            // all element types are equal
+            if (it1_precisions[0] == (*it2)[0]) {
+                // first precisions size is used
+                intersection_types.emplace(it1_precisions[0], it1_precisions.size());
+            }
+        }
+    }
+
+    for (auto it = intersection_types.begin(); it != intersection_types.end(); ++it) {
+        intersection.insert(std::vector<element::Type>(it->second, it->first));
+    }
+}
+
 }   // namespace
 
+
+InferenceEngine::Precision eltwise_precision_helper::get_precision(const size_t inputs_number,
+                                                                   const InferenceEngine::Precision(&src_prc)[MAX_ELTWISE_INPUTS],
+                                                                   const std::vector<Eltwise::EltwiseData>& eltwise_data) {
+    Precision exec_prc = Precision::UNSPECIFIED;
+
+    std::set<std::vector<element::Type>> supported_precision_intersection = get_supported_precisions(eltwise_data.front().algo);
+
+    // for element-wise operations all inputs must to have the same precisions
+    assert(std::all_of(
+        supported_precision_intersection.begin(),
+        supported_precision_intersection.end(),
+        [&supported_precision_intersection](const std::vector<element::Type>& precisions) {
+            return std::all_of(
+                precisions.begin(),
+                precisions.end(),
+                [&precisions](const element::Type precision) { return precision == precisions[0]; });
+        }));
+
+    for (size_t i = 1; i < eltwise_data.size(); ++i) {
+        std::set<std::vector<element::Type>> prcs = get_supported_precisions(eltwise_data[i].algo);
+        std::set<std::vector<element::Type>> prcs_intersect = {};
+
+        OPENVINO_ASSERT(std::all_of(
+            prcs.begin(),
+            prcs.end(),
+            [](const std::vector<element::Type>& precisions) {
+                return std::all_of(
+                    precisions.begin(),
+                    precisions.end(),
+                    [&precisions](const element::Type& precision) { return precision == precisions[0]; });
+            }),
+            "for element-wise nodes all precisions have to be equal");
+
+        set_intersection(supported_precision_intersection, prcs, prcs_intersect);
+
+        supported_precision_intersection = prcs_intersect;
+    }
+
+    static const element::Type exec_precisions_priority[] = {
+            element::u8,
+            element::i8,
+            element::u16,
+            element::i16,
+            element::bf16,
+            element::i32,
+            element::f32
+    };
+
+    for (const auto prc : exec_precisions_priority) {
+        if (std::any_of(
+            supported_precision_intersection.begin(),
+            supported_precision_intersection.end(),
+            [&prc](const std::vector<element::Type>& precisions) { return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) {
+            exec_prc = InferenceEngine::details::convertPrecision(prc);
+            break;
+        }
+    }
+
+    for (int i = 0; i < inputs_number; i++) {
+        if (src_prc[i] != exec_prc) {
+            exec_prc = Precision::FP32;
+            break;
+        }
+    }
+
+    if (exec_prc == Precision::UNSPECIFIED) {
+        IE_THROW() << "Eltwise jitter failed to specify execution precision for Eltwise node";
+    }
+
+    return exec_prc;
+}
+
+std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_precisions(const Algorithm& algo) {
+    std::set<std::vector<element::Type>> precisions;
+
+    OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo,
+        OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseGelu, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter),
+        OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter),
+        OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter),
+        OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter),
+        OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter),
+        OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
+        OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
+        OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
+        OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
+        OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter),
+        OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter),
+        OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter),
+        OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter),
+        OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter),
+        OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter),
+        OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter),
+        OV_CASE(Algorithm::EltwiseLess, jit_less_emitter),
+        OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter),
+        OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter),
+        OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter),
+        OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter),
+        OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter),
+        OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
+        OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
+        OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
+        OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
+        OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
+        OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
+        OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter),
+        OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter));
+
+    if (precisions.empty())
+        IE_THROW() << "Unsupported operation type for Eltwise emitter";
+
+    return precisions;
+}
+
 template <cpu_isa_t isa>
 struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_generator {
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic)
@@ -175,70 +328,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
     }
 
     void generate() override {
-        Precision exec_prc = Precision::UNSPECIFIED;
-
-        std::set<std::vector<element::Type>> supported_precision_intersection = get_supported_precisions(eltwise_data_.front().algo);
-
-        // for element-wise operations all inputs must to have the same precisions
-        assert(std::all_of(
-            supported_precision_intersection.begin(),
-            supported_precision_intersection.end(),
-            [&supported_precision_intersection](const std::vector<element::Type>& precisions) {
-                return std::all_of(
-                    precisions.begin(),
-                    precisions.end(),
-                    [&precisions](const element::Type precision) { return precision == precisions[0]; });
-            }));
-
-        for (size_t i = 1; i < eltwise_data_.size(); ++i) {
-            std::set<std::vector<element::Type>> prcs = get_supported_precisions(eltwise_data_[i].algo);
-            std::set<std::vector<element::Type>> prcs_intersect = {};
-
-            // to support previous functionality
-            if (!std::all_of(
-                prcs.begin(),
-                prcs.end(),
-                [&supported_precision_intersection](const std::vector<element::Type>& types) {
-                    return types.size() == supported_precision_intersection.size(); })) {
-                continue;
-            }
-
-            std::set_intersection(supported_precision_intersection.begin(), supported_precision_intersection.end(),
-                                  prcs.begin(), prcs.end(), std::inserter(prcs_intersect, prcs_intersect.begin()));
-
-            supported_precision_intersection = prcs_intersect;
-        }
-
-        static const element::Type exec_precisions_priority[] = {
-                element::u8,
-                element::i8,
-                element::u16,
-                element::i16,
-                element::bf16,
-                element::i32,
-                element::f32
-        };
-
-        for (const auto prc : exec_precisions_priority) {
-            if (std::any_of(
-                supported_precision_intersection.begin(),
-                supported_precision_intersection.end(),
-                [&prc](const std::vector<element::Type>& precisions) { return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) {
-                exec_prc = InferenceEngine::details::convertPrecision(prc);
-                break;
-            }
-        }
-
-        for (int i = 0; i < jep_.inputs_number; i++) {
-            if (jep_.src_prc[i] != exec_prc) {
-                exec_prc = Precision::FP32;
-                break;
-            }
-        }
-
-        if (exec_prc == Precision::UNSPECIFIED) {
-            IE_THROW() << "Eltwise jitter failed to specify execution precision for Eltwise node";
-        }
+        auto const exec_prc = eltwise_precision_helper::get_precision(jep_.inputs_number, jep_.src_prc, eltwise_data_);
 
         eltwise_emitter = create_eltwise_emitter(eltwise_data_.front(), exec_prc);
         for (size_t i = 1; i < eltwise_data_.size(); ++i) {
@@ -507,62 +597,6 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
     const std::vector<ov::intel_cpu::Type>& ops_list_;
     const dnnl::post_ops& post_ops_;
 
-    std::set<std::vector<element::Type>> get_supported_precisions(Algorithm algo) {
-        std::set<std::vector<element::Type>> precisions;
-
-        OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo,
-        OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseGelu, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter),
-        OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter),
-        OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter),
-        OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter),
-        OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter),
-        OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
-        OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
-        OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
-        OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
-        OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter),
-        OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter),
-        OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter),
-        OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter),
-        OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter),
-        OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter),
-        OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter),
-        OV_CASE(Algorithm::EltwiseLess, jit_less_emitter),
-        OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter),
-        OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter),
-        OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter),
-        OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter),
-        OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter),
-        OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
-        OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
-        OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
-        OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
-        OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
-        OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
-        OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter),
-        OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter));
-
-        if (precisions.empty())
-            IE_THROW() << "Unsupported operation type for Eltwise emitter";
-
-        return precisions;
-    }
-
     std::shared_ptr<jit_emitter> create_eltwise_emitter(const Eltwise::EltwiseData& data, Precision exec_prec) {
         EltwiseEmitterContext ctx = {
             nullptr,
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h
index 621daf5c079e12..074994301cf290 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.h
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.h
@@ -177,6 +177,16 @@ class Eltwise : public Node {
     void appendMemory(const std::vector<float> &data, MemoryPtr &memPtr, std::vector<const void*>& postOpsMem);
 };
 
+class eltwise_precision_helper {
+public:
+    static InferenceEngine::Precision get_precision(const size_t inputs_number,
+                                                    const InferenceEngine::Precision (&src_prc)[MAX_ELTWISE_INPUTS],
+                                                    const std::vector<Eltwise::EltwiseData>& eltwise_data);
+
+private:
+    static std::set<std::vector<element::Type>> get_supported_precisions(const Algorithm& algo);
+};
+
 }   // namespace node
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp
new file mode 100644
index 00000000000000..74a80c0cfbb4ea
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <gtest/gtest.h>
+#include "ie_common.h"
+#include "nodes/eltwise.h"
+
+using namespace InferenceEngine;
+using namespace ov::intel_cpu;
+
+class EltwisePrecisionHelperTest : public testing::Test {};
+
+TEST(EltwisePrecisionHelperTest, get_precision_mixed) {
+    InferenceEngine::Precision src_prc[MAX_ELTWISE_INPUTS];
+    const size_t inputs_size = 4ull;
+    for (auto i = 0; i < inputs_size; ++i) {
+        src_prc[i] = InferenceEngine::Precision::I32;
+    }
+
+    std::vector<ov::intel_cpu::node::Eltwise::EltwiseData> eltwise_data = {
+        {Algorithm::EltwiseMultiply},
+        {Algorithm::EltwiseMulAdd}
+    };
+
+    const auto precision = ov::intel_cpu::node::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data);
+    ASSERT_EQ(InferenceEngine::Precision::I32, precision);
+}
+
+TEST(EltwisePrecisionHelperTest, get_precision_single) {
+    InferenceEngine::Precision src_prc[MAX_ELTWISE_INPUTS];
+    const size_t inputs_size = 4ull;
+    for (auto i = 0; i < inputs_size; ++i) {
+        src_prc[i] = InferenceEngine::Precision::I32;
+    }
+
+    std::vector<ov::intel_cpu::node::Eltwise::EltwiseData> eltwise_data = {
+        {Algorithm::EltwiseMultiply},
+        {Algorithm::EltwiseMod}
+    };
+
+    const auto precision = ov::intel_cpu::node::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data);
+    ASSERT_EQ(InferenceEngine::Precision::FP32, precision);
+}

From 8d59252966fb6efdf5016fe5e4883df67b3cfb2b Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Wed, 29 Mar 2023 15:23:37 +0200
Subject: [PATCH 162/296] [Transforamtions] NonZero horizontal fusion (#16571)

* Added ValuePredicate 'consumers_more_than'

* NonZero fusion

* NonZero fusion tests
---
 .../common_optimizations/nonzero_fusion.hpp   |  28 ++++
 .../moc_transformations.cpp                   |   2 +
 .../common_optimizations/nonzero_fusion.cpp   |  40 ++++++
 .../nonzero_fusion_test.cpp                   | 130 ++++++++++++++++++
 .../openvino/pass/pattern/op/pattern.hpp      |   3 +
 src/core/src/pattern/op/pattern.cpp           |   6 +
 .../transformations/gather_sinking_unary.cpp  |  10 +-
 7 files changed, 210 insertions(+), 9 deletions(-)
 create mode 100644 src/common/transformations/include/transformations/common_optimizations/nonzero_fusion.hpp
 create mode 100644 src/common/transformations/src/transformations/common_optimizations/nonzero_fusion.cpp
 create mode 100644 src/common/transformations/tests/common_optimizations/nonzero_fusion_test.cpp

diff --git a/src/common/transformations/include/transformations/common_optimizations/nonzero_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/nonzero_fusion.hpp
new file mode 100644
index 00000000000000..de6623f334c36e
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/nonzero_fusion.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <openvino/pass/graph_rewrite.hpp>
+#include <openvino/pass/pattern/matcher.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API NonZeroFusion;
+
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief NonZeroFusion transformation makes horizontal fusion for equal NonZero layers
+ */
+class ov::pass::NonZeroFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("NonZeroFusion", "0");
+    NonZeroFusion();
+};
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index b814cbe0799269..21c3e3a400f836 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -38,6 +38,7 @@
 #include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/mvn_fusion.hpp>
 #include <transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp>
+#include <transformations/common_optimizations/nonzero_fusion.hpp>
 #include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/common_optimizations/normalize_l2_fusion.hpp>
 #include <transformations/common_optimizations/optimize_strided_slice.hpp>
@@ -200,6 +201,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph::Fu
     ADD_MATCHER(common_fusions, PReluFusion)
     ADD_MATCHER(common_fusions, DepthToSpaceFusion)
     ADD_MATCHER(common_fusions, ShuffleChannelsFusion, !m_use_shapes)
+    ADD_MATCHER(common_fusions, NonZeroFusion)
     common_fusions->set_name("ov::pass::CommonFusions");
 
     REGISTER_PASS(manager, BinarizeWeights)
diff --git a/src/common/transformations/src/transformations/common_optimizations/nonzero_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/nonzero_fusion.cpp
new file mode 100644
index 00000000000000..5eaebe4aac584c
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/nonzero_fusion.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/nonzero_fusion.hpp"
+
+#include <memory>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+#include <vector>
+
+#include "itt.hpp"
+#include "transformations/utils/utils.hpp"
+
+ov::pass::NonZeroFusion::NonZeroFusion() {
+    MATCHER_SCOPE(NonZeroFusion);
+    auto input_m = pass::pattern::any_input(ov::pass::pattern::consumers_more_than(1));
+    auto nonzero_m = pass::pattern::wrap_type<ov::opset10::NonZero>({input_m});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto nonzero = ov::as_type_ptr<ov::opset10::NonZero>(pattern_map.at(nonzero_m).get_node_shared_ptr());
+        const auto out_prc = nonzero->get_output_type();
+
+        bool status = false;
+        auto replace_if_nodes_match = [&](const ov::Input<ov::Node>& in) {
+            auto cur_nonzero = ov::as_type_ptr<ov::opset10::NonZero>(in.get_node()->shared_from_this());
+            if (cur_nonzero && cur_nonzero->get_output_type() == out_prc) {
+                status |= ov::replace_output_update_name(cur_nonzero->output(0), nonzero->output(0));
+            }
+        };
+
+        const auto consumers = pattern_map.at(input_m).get_target_inputs();
+        std::for_each(consumers.begin(), consumers.end(), replace_if_nodes_match);
+        return status;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(nonzero_m, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/transformations/tests/common_optimizations/nonzero_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/nonzero_fusion_test.cpp
new file mode 100644
index 00000000000000..4e96a976e02c13
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/nonzero_fusion_test.cpp
@@ -0,0 +1,130 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/opsets/opset10.hpp>
+#include <string>
+#include <transformations/common_optimizations/nonzero_fusion.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+
+enum NonZeroType { I32, I64, NONE };
+
+struct NonZeroFusionBuilder {
+    NonZeroFusionBuilder() = default;
+    NonZeroFusionBuilder(const std::vector<NonZeroType>& props) : branch_props(props) {}
+
+    std::shared_ptr<ov::Model> getOriginal() {
+        const auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::PartialShape::dynamic(4));
+        ov::NodeVector results;
+        for (size_t i = 0; i < branch_props.size(); ++i) {
+            std::shared_ptr<ov::Node> nonzero;
+            switch (branch_props[i]) {
+            case NonZeroType::I32:
+                nonzero = std::make_shared<ov::opset10::NonZero>(input, ov::element::i32);
+                break;
+            case NonZeroType::I64:
+                nonzero = std::make_shared<ov::opset10::NonZero>(input, ov::element::i64);
+                break;
+            default:
+                nonzero = input;
+                break;
+            }
+            auto last_node = std::make_shared<ov::opset10::Relu>(nonzero);
+            last_node->set_friendly_name("last_node_" + std::to_string(i));
+            results.push_back(last_node);
+        }
+        return std::make_shared<ov::Model>(results, ov::ParameterVector{input});
+    };
+
+    std::shared_ptr<ov::Model> getReference() {
+        const auto input = std::make_shared<ov::opset10::Parameter>(ov::element::f32, ov::PartialShape::dynamic(4));
+
+        std::shared_ptr<ov::Node> i32_node;
+        std::shared_ptr<ov::Node> i64_node;
+        ov::NodeVector results;
+        for (size_t i = 0; i < branch_props.size(); ++i) {
+            std::shared_ptr<ov::Node> nonzero;
+            if (branch_props[i] == NonZeroType::I32) {
+                nonzero = i32_node ? i32_node : std::make_shared<ov::opset10::NonZero>(input, ov::element::i32);
+                if (!i32_node)
+                    i32_node = nonzero;
+            } else if (branch_props[i] == NonZeroType::I64) {
+                nonzero = i64_node ? i64_node : std::make_shared<ov::opset10::NonZero>(input, ov::element::i64);
+                if (!i64_node)
+                    i64_node = nonzero;
+            } else {
+                nonzero = input;
+            }
+            auto last_node = std::make_shared<ov::opset10::Relu>(nonzero);
+            last_node->set_friendly_name("last_node_" + std::to_string(i));
+            results.push_back(last_node);
+        }
+        return std::make_shared<ov::Model>(results, ov::ParameterVector{input});
+    }
+
+    std::vector<NonZeroType> branch_props;
+};
+
+class NonZeroFusionTests : public testing::WithParamInterface<std::vector<NonZeroType>>, public TransformationTestsF {
+public:
+    NonZeroFusionTests() : TransformationTestsF() {
+        comparator.enable(FunctionsComparator::CONSUMERS_COUNT);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<std::vector<NonZeroType>> obj) {
+        const std::vector<NonZeroType> testValues = obj.param;
+        std::ostringstream result;
+        result << "branch_props_{";
+        for (const auto& value : testValues) {
+            switch (value) {
+            case NonZeroType::I32:
+                result << "nonzero_i32,";
+                break;
+            case NonZeroType::I64:
+                result << "nonzero_i64,";
+                break;
+            default:
+                result << "wo_nonzero,";
+                break;
+            }
+        }
+        result << "}";
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        TransformationTestsF::SetUp();
+        const auto branch_props = GetParam();
+        builder = NonZeroFusionBuilder(branch_props);
+        manager.register_pass<ov::pass::NonZeroFusion>();
+    }
+
+    NonZeroFusionBuilder builder;
+};
+
+TEST_P(NonZeroFusionTests, NonZeroFusion) {
+    model = builder.getOriginal();
+    model_ref = builder.getReference();
+}
+
+namespace NonZeroFusionTestsInstantiation {
+std::vector<std::vector<NonZeroType>> test_params{std::vector<NonZeroType>(5, I32),
+                                                  std::vector<NonZeroType>(5, I64),
+                                                  std::vector<NonZeroType>(2, NONE),
+                                                  {I32, I64, I32, I64, I32},
+                                                  {I32, I64, NONE, I64, I32},
+                                                  {NONE, I64, NONE, I64, I32}};
+
+INSTANTIATE_TEST_SUITE_P(TransformationTestsF,
+                         NonZeroFusionTests,
+                         ::testing::ValuesIn(test_params),
+                         NonZeroFusionTests::getTestCaseName);
+
+}  // namespace NonZeroFusionTestsInstantiation
diff --git a/src/core/include/openvino/pass/pattern/op/pattern.hpp b/src/core/include/openvino/pass/pattern/op/pattern.hpp
index 694d05473e4f3e..d5004c0ba70f2b 100644
--- a/src/core/include/openvino/pass/pattern/op/pattern.hpp
+++ b/src/core/include/openvino/pass/pattern/op/pattern.hpp
@@ -39,6 +39,9 @@ std::function<bool(std::shared_ptr<Node>)> has_class() {
 OPENVINO_API
 std::function<bool(Output<Node>)> consumers_count(size_t n);
 
+OPENVINO_API
+std::function<bool(Output<Node>)> consumers_more_than(size_t n);
+
 OPENVINO_API
 std::function<bool(Output<Node>)> has_static_dim(size_t pos);
 
diff --git a/src/core/src/pattern/op/pattern.cpp b/src/core/src/pattern/op/pattern.cpp
index 00e80ca563fe28..793d62dc9f1772 100644
--- a/src/core/src/pattern/op/pattern.cpp
+++ b/src/core/src/pattern/op/pattern.cpp
@@ -50,6 +50,12 @@ std::function<bool(Output<Node>)> consumers_count(size_t n) {
     };
 }
 
+std::function<bool(Output<Node>)> consumers_more_than(size_t n) {
+    return [=](Output<Node> output) -> bool {
+        return output.get_target_inputs().size() > n;
+    };
+}
+
 std::function<bool(Output<Node>)> has_static_dim(size_t pos) {
     return [=](Output<Node> output) -> bool {
         const auto& shape = output.get_partial_shape();
diff --git a/src/plugins/intel_gna/src/transformations/gather_sinking_unary.cpp b/src/plugins/intel_gna/src/transformations/gather_sinking_unary.cpp
index 1fb88d78052eea..856b402e5175d4 100644
--- a/src/plugins/intel_gna/src/transformations/gather_sinking_unary.cpp
+++ b/src/plugins/intel_gna/src/transformations/gather_sinking_unary.cpp
@@ -156,18 +156,10 @@ GatherSinkingUnaryBackwardSingleConsumer::GatherSinkingUnaryBackwardSingleConsum
     register_matcher(m, matcher_pass_callback);
 }
 
-namespace {
-std::function<bool(Output<Node>)> consumers_more_than(size_t n) {
-    return [=](Output<Node> output) -> bool {
-        return output.get_target_inputs().size() > n;
-    };
-}
-}  // namespace
-
 GatherSinkingUnaryBackwardMultiConsumers::GatherSinkingUnaryBackwardMultiConsumers() {
     MATCHER_SCOPE(GatherSinkingUnaryBackwardMultiConsumers);
     auto unary_restrictions = [](const Output<Node>& output) -> bool {
-        return consumers_more_than(1)(output) && HasSameOutputGatherNodes(output);
+        return ov::pass::pattern::consumers_more_than(1)(output) && HasSameOutputGatherNodes(output);
     };
 
     auto unary_label =

From 10668f4f3a1d47c6a8dbedaaf4d05995bf0f825d Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Wed, 29 Mar 2023 15:40:53 +0200
Subject: [PATCH 163/296] Docs shift to rst - install guides linux (#16568)

---
 .../installing-openvino-from-archive-linux.md | 159 +++++++++++-------
 .../installing-openvino-linux-header.md       |  21 ++-
 .../installing-openvino-runtime.md            |  13 +-
 3 files changed, 117 insertions(+), 76 deletions(-)

diff --git a/docs/install_guides/installing-openvino-from-archive-linux.md b/docs/install_guides/installing-openvino-from-archive-linux.md
index abb899a116d227..772b566103cd9d 100644
--- a/docs/install_guides/installing-openvino-from-archive-linux.md
+++ b/docs/install_guides/installing-openvino-from-archive-linux.md
@@ -1,32 +1,36 @@
 # Install OpenVINO™ Runtime on Linux from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_linux}
 
-With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. 
 
-Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI.
+@sphinxdirective
 
-> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only.
+Installing OpenVINO Runtime from archive files is recommended for C++ developers. It will contain code samples, 
+as well as pre-built binaries and library files needed for OpenVINO Runtime. If you work with Python, 
+the PyPI package may be a better choice. See the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` 
+page for instructions on how to install OpenVINO Runtime for Python using PyPI.
 
-See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release.
+.. note::
 
+   The following development tools can be installed via `pypi.org <https://pypi.org/project/openvino-dev/>`__ only: 
+   Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, 
+   Accuracy Checker, and Annotation Converter.
+
+See the `Release Notes <https://software.intel.com/en-us/articles/OpenVINO-RelNotes>` for more information on updates in the latest release.
 
-@sphinxdirective
 
 .. tab:: System Requirements
 
   | Full requirement listing is available in:
-  | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
+  | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`__
 
 .. tab:: Processor Notes
 
-  Processor graphics are not included in all processors. 
-  See `Product Specifications`_ for information about your processor.
-
-  .. _Product Specifications: https://ark.intel.com/
+  | Processor graphics are not included in all processors. 
+  | See `Product Specifications <https://ark.intel.com/>`__ for information about your processor.
 
 .. tab:: Software
 
-  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`_
-  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
+  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`__
+  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`__
   * GCC:
 
   .. tab:: Ubuntu 18.04
@@ -44,7 +48,7 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
   .. tab:: CENTOS 7
 
     * GCC 8.3.1
-    Use folloving instructions to install it:
+    Use the following instructions to install it:
     Install GCC 8.3.1 via devtoolset-8
 
     .. code-block:: sh
@@ -59,13 +63,13 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
       source /opt/rh/devtoolset-8/enable
       gcc -v
 
-@endsphinxdirective
 
-## Installing OpenVINO Runtime
 
-### <a name="install-openvino-archive-linux"></a>Step 1: Download and Install the OpenVINO Core Components
+Installing OpenVINO Runtime
+############################################################
 
-@sphinxdirective
+Step 1: Download and Install the OpenVINO Core Components
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 1. Open a command prompt terminal window. You can use the keyboard shortcut: Ctrl+Alt+T
 
@@ -132,37 +136,61 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
       sudo ln -s openvino_2022.3.0 openvino_2022
   
    .. note::
-      If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2022``, and then re-run the command above.
+      If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. 
+      Unlink the previous link with ``sudo unlink openvino_2022``, and then re-run the command above.
 
-@endsphinxdirective
 
-Congratulations, you finished the installation! The `/opt/intel/openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 2, for example, `/home/<USER>/Intel/`, OpenVINO is then installed in `/home/<USER>/Intel/openvino_2022`. The path to the `openvino_2022` directory is also referred as `<INSTALL_DIR>` throughout the OpenVINO documentation.
+Congratulations, you have finished the installation! The ``/opt/intel/openvino_2022`` folder now contains 
+the core components for OpenVINO. If you used a different path in Step 2, for example, ``/home/<USER>/Intel/``, 
+OpenVINO is now in ``/home/<USER>/Intel/openvino_2022``. The path to the ``openvino_2022`` directory is 
+also referred as ``<INSTALL_DIR>`` throughout the OpenVINO documentation.
+
 
-### <a name="set-the-environment-variables-linux"></a>Step 2: Configure the Environment
+Step 2: Configure the Environment
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the `setupvars.sh` script as shown below to temporarily set your environment variables. If your <INSTALL_DIR> is not `/opt/intel/openvino_2022`, use the correct one instead.
+You must update several environment variables before you can compile and run OpenVINO applications. 
+Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. 
+If your <INSTALL_DIR> is not ``/opt/intel/openvino_2022``, use the correct one instead.
 
-```sh
-source /opt/intel/openvino_2022/setupvars.sh
-```  
+.. code-block:: sh
 
-If you have more than one OpenVINO version on your machine, you can easily switch its version by sourcing the `setupvars.sh` of your choice.
+   source /opt/intel/openvino_2022/setupvars.sh
 
-> **NOTE**: The above command must be re-run every time you start a new terminal session. To set up Linux to automatically run the command every time a new terminal is opened, open `~/.bashrc` in your favorite editor and add `source /opt/intel/openvino_2022/setupvars.sh` after the last line. Next time when you open a terminal, you will see `[setupvars.sh] OpenVINO™ environment initialized`. Changing `.bashrc` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them.
+
+If you have more than one OpenVINO version installed on your system, you can easily switch versions by sourcing the `setupvars.sh` of your choice.
+
+.. note:: 
+   
+   The above command must be re-run every time you start a new terminal session. 
+   To set up Linux to automatically run the command every time a new terminal is opened, 
+   open ``~/.bashrc`` in your favorite editor and add ``source /opt/intel/openvino_2022/setupvars.sh`` after the last line. 
+   Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. 
+   Changing ``.bashrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them.
 
 The environment variables are set. Continue to the next section if you want to download any additional components.
 
-### <a name="model-optimizer-linux">Step 3 (Optional): Install Additional Components
-OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately.
+Step 3 (Optional): Install Additional Components
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. 
+It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. 
+If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately.
 
-See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions.
+See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` 
+page for step-by-step installation instructions.
 
-OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
+OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their 
+capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the 
+`instructions on GitHub <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`__.
+
+Step 4 (Optional): Configure Inference on Non-CPU Devices
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices 
+without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. 
+See the instructions below to set up OpenVINO on these devices.
 
-### <a name="optional-steps-linux"></a>Step 4 (Optional): Configure Inference on Non-CPU Devices
-OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices.
- 
-@sphinxdirective 
 .. tab:: GPU
 
    To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide <gpu guide>`.
@@ -171,58 +199,63 @@ OpenVINO Runtime has a plugin architecture that enables you to run inference on
 
    To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide <gna guide>`.
    
-@endsphinxdirective
 
-## <a name="get-started-linux"></a>What's Next?
-Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
 
-@sphinxdirective
-.. tab:: Get started with Python
+What's Next?
+############################################################
+
+Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! 
+Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
+
 
-   Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
+.. tab:: Get started with Python
+   
+   Try the `Python Quick Start Example <https://docs.openvino.ai/nightly/notebooks/201-vision-monodepth-with-output.html>`_
+   to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
    
    .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif
       :width: 400
-
-   Visit the :ref:`Tutorials <notebook tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
+   
+   Visit the :doc:`Tutorials <tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
    
    * `OpenVINO Python API Tutorial <https://docs.openvino.ai/nightly/notebooks/002-openvino-api-with-output.html>`_
    * `Basic image classification program with Hello Image Classification <https://docs.openvino.ai/nightly/notebooks/001-hello-world-with-output.html>`_
    * `Convert a PyTorch model and use it for image background removal <https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html>`_
 
+
 .. tab:: Get started with C++
 
-   Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
+   Try the :doc:`C++ Quick Start Example <openvino_docs_get_started_get_started_demos>` for step-by-step instructions 
+   on building and running a basic image classification C++ application.
    
    .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
       :width: 400
 
-   Visit the :ref:`Samples <code samples>` page for other C++ example applications to get you started with OpenVINO, such as:
+   Visit the :doc:`Samples <openvino_docs_OV_UG_Samples_Overview>` page for other C++ example applications to get you started with OpenVINO, such as:
    
-   * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
-   * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
+   * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`__
+   * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`__
 
-@endsphinxdirective
 
-## <a name="uninstall-from-linux"></a>Uninstalling the Intel® Distribution of OpenVINO™ Toolkit
 
-To uninstall the toolkit, follow the steps on the [Uninstalling page](uninstalling-openvino.md).
+Uninstalling the Intel® Distribution of OpenVINO™ Toolkit
+###########################################################
 
-## Additional Resources
+To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page <openvino_docs_install_guides_uninstalling_openvino>`.
 
-@sphinxdirective
-      
-* :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration <troubleshooting guide for install>`
-* Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide <deep learning model optimizer>`
-* Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide <deep learning openvino runtime>`
-* Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview <code samples>`
-* Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models <model zoo>`
-* IoT libraries and code samples in the GitHub repository: `Intel® IoT Developer Kit`_ 
 
-.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit
+Additional Resources
+###########################################################
 
-@endsphinxdirective
+* :doc:`Troubleshooting Guide for OpenVINO Installation & Configuration <openvino_docs_get_started_guide_troubleshooting>`
+* Converting models for use with OpenVINO™: :doc:`Model Optimizer User Guide <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+* Writing your own OpenVINO™ applications: :doc:`OpenVINO™ Runtime User Guide <openvino_docs_OV_UG_OV_Runtime_User_Guide>`
+* Sample applications: :doc:`OpenVINO™ Toolkit Samples Overview <openvino_docs_OV_UG_Samples_Overview>`
+* Pre-trained deep learning models: :doc:`Overview of OpenVINO™ Toolkit Pre-Trained Models <model_zoo>`
+* IoT libraries and code samples in the GitHub repository: `Intel® IoT Developer Kit <https://github.com/intel-iot-devkit>`__
+* `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 
-## Additional Resources
 
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
+
+@endsphinxdirective
+
diff --git a/docs/install_guides/installing-openvino-linux-header.md b/docs/install_guides/installing-openvino-linux-header.md
index b415caf8e96e67..f45cbb2a5010a0 100644
--- a/docs/install_guides/installing-openvino-linux-header.md
+++ b/docs/install_guides/installing-openvino-linux-header.md
@@ -13,14 +13,19 @@
    Using HomeBrew <openvino_docs_install_guides_installing_openvino_brew>
    Using Docker <openvino_docs_install_guides_installing_openvino_docker_linux>
 
-@endsphinxdirective
 
-If you want to install OpenVINO™ Runtime on your Linux machine, there are a few ways to accomplish this. We prepared the following options for you: 
+If you want to install OpenVINO™ Runtime on your Linux machine, these are your options: 
+
+* `Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_linux>`
+* `Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>`
+* `Install OpenVINO Runtime from APT <openvino_docs_install_guides_installing_openvino_apt>`
+* `Install OpenVINO Runtime from YUM <openvino_docs_install_guides_installing_openvino_yum>`
+* `Install OpenVINO Runtime using HomeBrew <openvino_docs_install_guides_installing_openvino_brew>`
+* `Install OpenVINO with Docker Using Docker <openvino_docs_install_guides_installing_openvino_docker_linux>`
+
+For a full selection of distribution channels, see the 
+`OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
 
-* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-linux.md)
-* [Install OpenVINO from PyPI](installing-openvino-pip.md)
-* [Install OpenVINO Runtime from APT](installing-openvino-apt.md)
-* [Install OpenVINO Runtime from YUM](installing-openvino-yum.md)
-* [Install OpenVINO with Docker](installing-openvino-docker-linux.md)
 
-For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
+@endsphinxdirective
+
diff --git a/docs/install_guides/installing-openvino-runtime.md b/docs/install_guides/installing-openvino-runtime.md
index a3e26d60cdbccd..02f2b4f17a0fbb 100644
--- a/docs/install_guides/installing-openvino-runtime.md
+++ b/docs/install_guides/installing-openvino-runtime.md
@@ -11,13 +11,16 @@
    macOS <openvino_docs_install_guides_installing_openvino_macos_header>
    Raspbian OS <openvino_docs_install_guides_installing_openvino_raspbian>
 
-@endsphinxdirective
 
 You can choose how to install OpenVINO™ Runtime according to your operating system: 
 
-* [Install OpenVINO Runtime on Linux](installing-openvino-linux-header.md)
-* [Install OpenVINO Runtime on Windows](installing-openvino-windows-header.md)
-* [Install OpenVINO Runtime on macOS](installing-openvino-macos-header.md)
-* [Install OpenVINO Runtime on Raspbian OS](installing-openvino-raspbian.md)
+* `Install OpenVINO Runtime on Linux <openvino_docs_install_guides_installing_openvino_linux_header>`
+* `Install OpenVINO Runtime on Windows <openvino_docs_install_guides_installing_openvino_windows_header>`
+* `Install OpenVINO Runtime on macOS <openvino_docs_install_guides_installing_openvino_macos_header>`
+* `Install OpenVINO Runtime on Raspbian OS <openvino_docs_install_guides_installing_openvino_raspbian>`
 
 Enjoy your journey with OpenVINO.
+
+
+@endsphinxdirective
+

From 7d8f4af78a09baa185730ae039600fa97963eb03 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Wed, 29 Mar 2023 16:39:09 +0200
Subject: [PATCH 164/296] DOCS shift to rst - Automatic Speech Recognition C++
 Sample & Automatic Speech Recognition Python* Sample (#16609)

---
 samples/cpp/speech_sample/README.md    | 401 ++++++++--------
 samples/python/speech_sample/README.md | 612 +++++++++++++------------
 2 files changed, 535 insertions(+), 478 deletions(-)

diff --git a/samples/cpp/speech_sample/README.md b/samples/cpp/speech_sample/README.md
index dfd12b70c15b33..23e8e421a62e19 100644
--- a/samples/cpp/speech_sample/README.md
+++ b/samples/cpp/speech_sample/README.md
@@ -1,134 +1,164 @@
 # Automatic Speech Recognition C++ Sample {#openvino_inference_engine_samples_speech_sample_README}
 
+@sphinxdirective
+
 This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors.  
 
 The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Available Devices | `ov::Core::get_available_devices`, `ov::Core::get_property` | Get information of the devices for inference |
-| Import/Export Model | `ov::Core::import_model`, `ov::CompiledModel::export_model` | The GNA plugin supports loading and saving of the GNA-optimized model |
-| Model Operations | `ov::set_batch`, `ov::Model::add_output`, `ov::CompiledModel::inputs`, `ov::CompiledModel::outputs` | Managing of model: configure batch_size, input and output tensors |
-| Node Operations | `ov::OutputVector::size`, `ov::Output::get_shape` | Get node shape |
-| Asynchronous Infer | `ov::InferRequest::start_async`, `ov::InferRequest::wait` | Do asynchronous inference and waits until inference result becomes available |
-| InferRequest Operations | `ov::InferRequest::query_state`, `ov::VariableState::reset` | Gets and resets CompiledModel state control |
-| Tensor Operations | `ov::Tensor::get_size`, `ov::Tensor::data`, `ov::InferRequest::get_tensor` | Get a tensor, its size and data |
-| Profiling | `ov::InferRequest::get_profiling_info` | Get infer request profiling info |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
-
-| Options | Values |
-| :--- | :--- |
-| Validated Models | Acoustic model based on Kaldi\* neural networks (see [Model Preparation](#model-preparation-speech) section) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin) |
-| Supported devices | See [Execution Modes](#execution-modes-speech) section below and [List Supported Devices](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-
-## How It Works
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Feature                                                     | API                                                                                                         | Description                                                                  |
++=============================================================+=============================================================================================================+==============================================================================+
+| Available Devices                                           | ``ov::Core::get_available_devices``, ``ov::Core::get_property``                                             | Get information of the devices for inference                                 |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Import/Export Model                                         | ``ov::Core::import_model``, ``ov::CompiledModel::export_model``                                             | The GNA plugin supports loading and saving of the GNA-optimized model        |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Model Operations                                            | ``ov::set_batch``, ``ov::Model::add_output``, ``ov::CompiledModel::inputs``, ``ov::CompiledModel::outputs`` | Managing of model: configure batch_size, input and output tensors            |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Node Operations                                             | ``ov::OutputVector::size``, ``ov::Output::get_shape``                                                       | Get node shape                                                               |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Asynchronous Infer                                          | ``ov::InferRequest::start_async``, ``ov::InferRequest::wait``                                               | Do asynchronous inference and waits until inference result becomes available |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| InferRequest Operations                                     | ``ov::InferRequest::query_state``, ``ov::VariableState::reset``                                             | Gets and resets CompiledModel state control                                  |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Tensor Operations                                           | ``ov::Tensor::get_size``, ``ov::Tensor::data``, ``ov::InferRequest::get_tensor``                            | Get a tensor, its size and data                                              |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+| Profiling                                                   | ``ov::InferRequest::get_profiling_info``                                                                    | Get infer request profiling info                                             |
++-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
+
++-------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Options                                                     | Values                                                                                                                                                        |
++=============================================================+===============================================================================================================================================================+
+| Validated Models                                            | Acoustic model based on Kaldi\* neural networks (see :ref:`Model Preparation <model-preparation-speech>` section)                                             |
++-------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Format                                                | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin)                                                                                               |
++-------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Supported devices                                           | See :ref:`Execution Modes <execution-modes-speech>` section below and :doc:`List Supported Devices <openvino_docs_OV_UG_supported_plugins_Supported_Devices>` |
++-------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command-line parameters, loads a specified model and input data to the OpenVINO™ Runtime plugin, performs inference on all speech utterances stored in the input file(s), logging each step in a standard output stream.  
-If the `-r` option is given, error statistics are provided for each speech utterance as shown above.
+If the ``-r`` option is given, error statistics are provided for each speech utterance as shown above.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-### GNA-specific details
+GNA-specific details
+++++++++++++++++++++
 
-#### Quantization
+Quantization
+------------
 
-If the GNA device is selected (for example, using the `-d` GNA flag), the GNA OpenVINO™ Runtime plugin quantizes the model and input feature vector sequence to integer representation before performing inference.
-Several parameters control neural network quantization. The `-q` flag determines the quantization mode.
+If the GNA device is selected (for example, using the ``-d`` GNA flag), the GNA OpenVINO™ Runtime plugin quantizes the model and input feature vector sequence to integer representation before performing inference.
+Several parameters control neural network quantization. The ``-q`` flag determines the quantization mode.
 Two modes are supported:
 
 - *static* - The first utterance in the input file is scanned for dynamic range. The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used for all subsequent inputs. The neural network is quantized to accommodate the scaled input dynamic range.
-- *user-defined* - The user may specify a scale factor via the `-sf` flag that will be used for static quantization.
+- *user-defined* - The user may specify a scale factor via the ``-sf`` flag that will be used for static quantization.
 
-The `-qb` flag provides a hint to the GNA plugin regarding the preferred target weight resolution for all layers. For example, when `-qb 8` is specified, the plugin will use 8-bit weights wherever possible in the
+The ``-qb`` flag provides a hint to the GNA plugin regarding the preferred target weight resolution for all layers. For example, when ``-qb 8`` is specified, the plugin will use 8-bit weights wherever possible in the
 network.
 
-> **NOTE**:
->
-> - It is not always possible to use 8-bit weights due to GNA hardware limitations. For example, convolutional layers always use 16-bit weights (GNA hardware version 1 and 2). This limitation will be removed in GNA hardware version 3 and higher.
+.. note::
+
+   It is not always possible to use 8-bit weights due to GNA hardware limitations. For example, convolutional layers always use 16-bit weights (GNA hardware version 1 and 2). This limitation will be removed in GNA hardware version 3 and higher.
 
-#### <a name="execution-modes-speech"></a> Execution Modes
 
-Several execution modes are supported via the `-d` flag:
+.. _execution-modes-speech:
 
-- `CPU` - All calculations are performed on CPU device using CPU Plugin.
-- `GPU` - All calculations are performed on GPU device using GPU Plugin.
-- `VPUX` - All calculations are performed on VPUX device using VPUX Plugin.
-- `GNA_AUTO` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_HW` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
-- `GNA_SW` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_SW_FP32` - Substitutes parameters and calculations from low precision to floating point (FP32).
-- `GNA_SW_EXACT` - GNA device is emulated in bit-exact mode.
+Execution Modes
+---------------
 
-#### Loading and Saving Models
+Several execution modes are supported via the ``-d`` flag:
 
-The GNA plugin supports loading and saving of the GNA-optimized model (non-IR) via the `-rg` and `-wg` flags.  Thereby, it is possible to avoid the cost of full model quantization at run time. The GNA plugin also supports export of firmware-compatible embedded model images for the Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium Far-Field Voice Development Kit via the `-we` flag (save only).
+- ``CPU`` - All calculations are performed on CPU device using CPU Plugin.
+- ``GPU`` - All calculations are performed on GPU device using GPU Plugin.
+- ``VPUX`` - All calculations are performed on VPUX device using VPUX Plugin.
+- ``GNA_AUTO`` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
+- ``GNA_HW`` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
+- ``GNA_SW`` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
+- ``GNA_SW_FP32`` - Substitutes parameters and calculations from low precision to floating point (FP32).
+- ``GNA_SW_EXACT`` - GNA device is emulated in bit-exact mode.
+
+Loading and Saving Models
+-------------------------
+
+The GNA plugin supports loading and saving of the GNA-optimized model (non-IR) via the ``-rg`` and ``-wg`` flags.  Thereby, it is possible to avoid the cost of full model quantization at run time. The GNA plugin also supports export of firmware-compatible embedded model images for the Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium Far-Field Voice Development Kit via the ``-we`` flag (save only).
 
 In addition to performing inference directly from a GNA model file, these combinations of options make it possible to:
 
-- Convert from IR format to GNA format model file (`-m`, `-wg`)
-- Convert from IR format to embedded format model file (`-m`, `-we`)
-- Convert from GNA format to embedded format model file (`-rg`, `-we`)
+- Convert from IR format to GNA format model file (``-m``, ``-wg``)
+- Convert from IR format to embedded format model file (``-m``, ``-we``)
+- Convert from GNA format to embedded format model file (``-rg``, ``-we``)
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
 Run the application with the -h option to see the usage message:
 
-```
-speech_sample -h
-```
+.. code-block:: sh
+   
+   speech_sample -h
 
 Usage message:
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Parsing input parameters
-
-speech_sample [OPTION]
-Options:
-
-    -h                         Print a usage message.
-    -i "<path>"                Required. Path(s) to input file(s). Usage for a single file/layer: <input_file.ark> or <input_file.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.
-    -m "<path>"                Required. Path to an .xml file with a trained model (required if -rg is missing).
-    -o "<path>"                Optional. Output file name(s) to save scores (inference results). Example of usage for a single file/layer: <output_file.ark> or <output_file.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.
-    -d "<device>"              Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
-    -pc                        Optional. Enables per-layer performance report.
-    -q "<mode>"                Optional. Input quantization mode for GNA: static (default) or user defined (use with -sf).
-    -qb "<integer>"            Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default)
-    -sf "<double>"             Optional. User-specified input scale factor for GNA quantization (use with -q user). If the model contains multiple inputs, provide scale factors by separating them with commas. For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.
-    -bs "<integer>"            Optional. Batch size 1-8 (default 1)
-    -r "<path>"                Optional. Read reference score file(s) and compare inference results with reference scores. Usage for a single file/layer: <reference.ark> or <reference.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.
-    -rg "<path>"               Read GNA model from file using path/filename provided (required if -m is missing).
-    -wg "<path>"               Optional. Write GNA model to file using path/filename provided.
-    -we "<path>"               Optional. Write GNA embedded model to file using path/filename provided.
-    -cw_l "<integer>"          Optional. Number of frames for left context windows (default is 0). Works only with context window networks. If you use the cw_l or cw_r flag, then batch size argument is ignored.
-    -cw_r "<integer>"          Optional. Number of frames for right context windows (default is 0). Works only with context window networks. If you use the cw_r or cw_l flag, then batch size argument is ignored.
-    -layout "<string>"         Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
-    -pwl_me "<double>"         Optional. The maximum percent of error for PWL function.The value must be in <0, 100> range. The default value is 1.0.
-    -exec_target "<string>"    Optional. Specify GNA execution target generation. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. By default, generation corresponds to the GNA HW available in the system or the latest fully supported generation by the software. See the GNA Plugin's GNA_EXEC_TARGET config option description.
-    -compile_target "<string>" Optional. Specify GNA compile target generation. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. By default, generation corresponds to the GNA HW available in the system or the latest fully supported generation by the software. See the GNA Plugin's GNA_COMPILE_TARGET config option description.
-    -memory_reuse_off          Optional. Disables memory optimizations for compiled model.
-
-Available target devices:  CPU  GNA  GPU  VPUX
-```
-
-### <a name="model-preparation-speech"></a> Model Preparation
+.. code-block:: sh
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Parsing input parameters
+   
+   speech_sample [OPTION]
+   Options:
+   
+       -h                         Print a usage message.
+       -i "<path>"                Required. Path(s) to input file(s). Usage for a single file/layer: <input_file.ark> or <input_file.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.
+       -m "<path>"                Required. Path to an .xml file with a trained model (required if -rg is missing).
+       -o "<path>"                Optional. Output file name(s) to save scores (inference results). Example of usage for a single file/layer: <output_file.ark> or <output_file.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.
+       -d "<device>"              Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
+       -pc                        Optional. Enables per-layer performance report.
+       -q "<mode>"                Optional. Input quantization mode for GNA: static (default) or user defined (use with -sf).
+       -qb "<integer>"            Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default)
+       -sf "<double>"             Optional. User-specified input scale factor for GNA quantization (use with -q user). If the model contains multiple inputs, provide scale factors by separating them with commas. For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.
+       -bs "<integer>"            Optional. Batch size 1-8 (default 1)
+       -r "<path>"                Optional. Read reference score file(s) and compare inference results with reference scores. Usage for a single file/layer: <reference.ark> or <reference.npz>. Example of usage for several files/layers: <layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.
+       -rg "<path>"               Read GNA model from file using path/filename provided (required if -m is missing).
+       -wg "<path>"               Optional. Write GNA model to file using path/filename provided.
+       -we "<path>"               Optional. Write GNA embedded model to file using path/filename provided.
+       -cw_l "<integer>"          Optional. Number of frames for left context windows (default is 0). Works only with context window networks. If you use the cw_l or cw_r flag, then batch size argument is ignored.
+       -cw_r "<integer>"          Optional. Number of frames for right context windows (default is 0). Works only with context window networks. If you use the cw_r or cw_l flag, then batch size argument is ignored.
+       -layout "<string>"         Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
+       -pwl_me "<double>"         Optional. The maximum percent of error for PWL function.The value must be in <0, 100> range. The default value is 1.0.
+       -exec_target "<string>"    Optional. Specify GNA execution target generation. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. By default, generation corresponds to the GNA HW available in the system or the latest fully supported generation by the software. See the GNA Plugin's GNA_EXEC_TARGET config option description.
+       -compile_target "<string>" Optional. Specify GNA compile target generation. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. By default, generation corresponds to the GNA HW available in the system or the latest fully supported generation by the software. See the GNA Plugin's GNA_COMPILE_TARGET config option description.
+       -memory_reuse_off          Optional. Disables memory optimizations for compiled model.
+   
+   Available target devices:  CPU  GNA  GPU  VPUX
+   
+
+.. _model-preparation-speech:
+
+Model Preparation
++++++++++++++++++
 
 You can use the following model optimizer command to convert a Kaldi nnet1 or nnet2 neural model to OpenVINO™ toolkit Intermediate Representation format:
 
-```
-mo --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>
-```
+.. code-block:: sh
+   
+   mo --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>
 
 The following pre-trained models are available:
 
@@ -136,103 +166,114 @@ The following pre-trained models are available:
 - rm_lstm4f
 - wsj_dnn5b_smbr
 
-All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2).
+All of them can be downloaded from `the storage <https://storage.openvinotoolkit.org/models_contrib/speech/2021.2>`__.
 
-### Speech Inference
+Speech Inference
+++++++++++++++++
 
 Once the IR is created, you can do inference on Intel® Processors with the GNA co-processor (or emulation library):
 
-```
-speech_sample -m wsj_dnn5b.xml -i dev93_10.ark -r dev93_scores_10.ark -d GNA_AUTO -o result.ark
-```
+.. code-block:: sh
+   
+   speech_sample -m wsj_dnn5b.xml -i dev93_10.ark -r dev93_scores_10.ark -d GNA_AUTO -o result.ark
 
-Here, the floating point Kaldi-generated reference neural network scores (`dev93_scores_10.ark`) corresponding to the input feature file (`dev93_10.ark`) are assumed to be available for comparison.
+Here, the floating point Kaldi-generated reference neural network scores (``dev93_scores_10.ark``) corresponding to the input feature file (``dev93_10.ark``) are assumed to be available for comparison.
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample supports input and output in numpy file format (.npz)
+.. note::
 
-## Sample Output
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+  
+   - The sample supports input and output in numpy file format (.npz)
+
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream.
 
-```
-[ INFO ] OpenVINO runtime: OpenVINO Runtime version ......... 2022.1.0
-[ INFO ] Build ........... 2022.1.0-6311-a90bb1ff017
-[ INFO ]
-[ INFO ] Parsing input parameters
-[ INFO ] Loading model files:
-[ INFO ] \test_data\models\wsj_dnn5b_smbr_fp32\wsj_dnn5b_smbr_fp32.xml
-[ INFO ] Using scale factor of 2175.43 calculated from first utterance.
-[ INFO ] Model loading time 0.0034 ms
-[ INFO ] Loading model to the device GNA_AUTO
-[ INFO ] Loading model to the device
-[ INFO ] Number scores per frame : 3425
-Utterance 0:
-Total time in Infer (HW and SW):        5687.53 ms
-Frames in utterance:                    1294 frames
-Average Infer time per frame:           4.39531 ms
-         max error: 0.705184
-         avg error: 0.0448388
-     avg rms error: 0.0574098
-       stdev error: 0.0371649
-
-
-End of Utterance 0
-
-[ INFO ] Number scores per frame : 3425
-Utterance 1:
-Total time in Infer (HW and SW):        4341.34 ms
-Frames in utterance:                    1005 frames
-Average Infer time per frame:           4.31974 ms
-         max error: 0.757597
-         avg error: 0.0452166
-     avg rms error: 0.0578436
-       stdev error: 0.0372769
-
-
-End of Utterance 1
-
-...
-End of Utterance X
-
-[ INFO ] Execution successful
-```
-
-## Use of Sample in Kaldi* Speech Recognition Pipeline
-
-The Wall Street Journal DNN model used in this example was prepared using the Kaldi s5 recipe and the Kaldi Nnet (nnet1) framework. It is possible to recognize speech by substituting the `speech_sample` for
-Kaldi's nnet-forward command. Since the `speech_sample` does not yet use pipes, it is necessary to use temporary files for speaker-transformed feature vectors and scores when running the Kaldi speech recognition pipeline. The following operations assume that feature extraction was already performed according to the `s5` recipe and that the working directory within the Kaldi source tree is `egs/wsj/s5`.
-
-1. Prepare a speaker-transformed feature set given the feature transform specified in `final.feature_transform` and the feature files specified in `feats.scp`:
-   ```sh
-   nnet-forward --use-gpu=no final.feature_transform "ark,s,cs:copy-feats scp:feats.scp ark:- |" ark:feat.ark
-   ```
-
-2. Score the feature set using the `speech_sample`:
-   ```sh
-   ./speech_sample -d GNA_AUTO -bs 8 -i feat.ark -m wsj_dnn5b.xml -o scores.ark
-   ```
-
-   OpenVINO™ toolkit Intermediate Representation `wsj_dnn5b.xml` file was generated in the previous [Model Preparation](#model-preparation-speech) section.
-
-3. Run the Kaldi decoder to produce n-best text hypotheses and select most likely text given the WFST (`HCLG.fst`), vocabulary (`words.txt`), and TID/PID mapping (`final.mdl`):
-   ```sh
-   latgen-faster-mapped --max-active=7000 --max-mem=50000000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.0833 --allow-partial=true    --word-symbol-table=words.txt final.mdl HCLG.fst ark:scores.ark ark:-| lattice-scale --inv-acoustic-scale=13 ark:- ark:- | lattice-best-path    --word-symbol-table=words.txt ark:- ark,t:-  > out.txt &
-   ```
-
-4. Run the word error rate tool to check accuracy given the vocabulary (`words.txt`) and reference transcript (`test_filt.txt`):
-   ```sh
-   cat out.txt | utils/int2sym.pl -f 2- words.txt | sed s:\<UNK\>::g | compute-wer --text --mode=present ark:test_filt.txt ark,p:-
-   ```
-
-   All of mentioned files can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr)
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: sh
+   
+   [ INFO ] OpenVINO runtime: OpenVINO Runtime version ......... 2022.1.0
+   [ INFO ] Build ........... 2022.1.0-6311-a90bb1ff017
+   [ INFO ]
+   [ INFO ] Parsing input parameters
+   [ INFO ] Loading model files:
+   [ INFO ] \test_data\models\wsj_dnn5b_smbr_fp32\wsj_dnn5b_smbr_fp32.xml
+   [ INFO ] Using scale factor of 2175.43 calculated from first utterance.
+   [ INFO ] Model loading time 0.0034 ms
+   [ INFO ] Loading model to the device GNA_AUTO
+   [ INFO ] Loading model to the device
+   [ INFO ] Number scores per frame : 3425
+   Utterance 0:
+   Total time in Infer (HW and SW):        5687.53 ms
+   Frames in utterance:                    1294 frames
+   Average Infer time per frame:           4.39531 ms
+            max error: 0.705184
+            avg error: 0.0448388
+        avg rms error: 0.0574098
+          stdev error: 0.0371649
+   
+   
+   End of Utterance 0
+   
+   [ INFO ] Number scores per frame : 3425
+   Utterance 1:
+   Total time in Infer (HW and SW):        4341.34 ms
+   Frames in utterance:                    1005 frames
+   Average Infer time per frame:           4.31974 ms
+            max error: 0.757597
+            avg error: 0.0452166
+        avg rms error: 0.0578436
+          stdev error: 0.0372769
+   
+   
+   End of Utterance 1
+   
+   ...
+   End of Utterance X
+   
+   [ INFO ] Execution successful
+
+Use of Sample in Kaldi* Speech Recognition Pipeline
+###################################################
+
+The Wall Street Journal DNN model used in this example was prepared using the Kaldi s5 recipe and the Kaldi Nnet (nnet1) framework. It is possible to recognize speech by substituting the ``speech_sample`` for
+Kaldi's nnet-forward command. Since the ``speech_sample`` does not yet use pipes, it is necessary to use temporary files for speaker-transformed feature vectors and scores when running the Kaldi speech recognition pipeline. The following operations assume that feature extraction was already performed according to the ``s5`` recipe and that the working directory within the Kaldi source tree is ``egs/wsj/s5``.
+
+1. Prepare a speaker-transformed feature set given the feature transform specified in ``final.feature_transform`` and the feature files specified in ``feats.scp``:
+   
+   .. code-block:: sh
+      
+      nnet-forward --use-gpu=no final.feature_transform "ark,s,cs:copy-feats scp:feats.scp ark:- |" ark:feat.ark
+
+2. Score the feature set using the ``speech_sample``:
+
+   .. code-block:: sh
+      
+      ./speech_sample -d GNA_AUTO -bs 8 -i feat.ark -m wsj_dnn5b.xml -o scores.ark
+
+   OpenVINO™ toolkit Intermediate Representation ``wsj_dnn5b.xml`` file was generated in the previous :ref:`Model Preparation <model-preparation-speech>` section.
+
+3. Run the Kaldi decoder to produce n-best text hypotheses and select most likely text given the WFST (``HCLG.fst``), vocabulary (``words.txt``), and TID/PID mapping (``final.mdl``):
+   
+   .. code-block:: sh
+      
+      latgen-faster-mapped --max-active=7000 --max-mem=50000000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.0833 --allow-partial=true    --word-symbol-table=words.txt final.mdl HCLG.fst ark:scores.ark ark:-| lattice-scale --inv-acoustic-scale=13 ark:- ark:- | lattice-best-path    --word-symbol-table=words.txt ark:- ark,t:-  > out.txt &
+
+4. Run the word error rate tool to check accuracy given the vocabulary (``words.txt``) and reference transcript (``test_filt.txt``):
+
+   .. code-block:: sh
+      
+      cat out.txt | utils/int2sym.pl -f 2- words.txt | sed s:\<UNK\>::g | compute-wer --text --mode=present ark:test_filt.txt ark,p:-
+
+   All of mentioned files can be downloaded from `the storage <https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr>`__
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+
diff --git a/samples/python/speech_sample/README.md b/samples/python/speech_sample/README.md
index 588e145459da13..dd3cc51ee87481 100644
--- a/samples/python/speech_sample/README.md
+++ b/samples/python/speech_sample/README.md
@@ -1,164 +1,190 @@
 # Automatic Speech Recognition Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_speech_sample_README}
 
+@sphinxdirective
+
 This sample demonstrates how to do a Synchronous Inference of acoustic model based on Kaldi\* neural models and speech feature vectors.
 
 The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
 
 Automatic Speech Recognition Python sample application demonstrates how to use the following Python API in applications:
 
-| Feature                 | API                                                                                                                                                                                            | Description                                                           |
-| :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------- |
-| Import/Export Model     | [openvino.runtime.Core.import_model], [openvino.runtime.CompiledModel.export_model]                                                                                                            | The GNA plugin supports loading and saving of the GNA-optimized model |
-| Model Operations        | [openvino.runtime.Model.add_outputs], [openvino.runtime.set_batch], [openvino.runtime.CompiledModel.inputs], [openvino.runtime.CompiledModel.outputs], [openvino.runtime.ConstOutput.any_name] | Managing of model: configure batch_size, input and output tensors     |
-| Synchronous Infer       | [openvino.runtime.CompiledModel.create_infer_request], [openvino.runtime.InferRequest.infer]                                                                                                   | Do synchronous inference                                              |
-| InferRequest Operations | [openvino.runtime.InferRequest.get_input_tensor],                              [openvino.runtime.InferRequest.model_outputs], [openvino.runtime.InferRequest.model_inputs],                    | Get info about model using infer request API                          |
-| InferRequest Operations | [openvino.runtime.InferRequest.query_state], [openvino.runtime.VariableState.reset]                                                                                                            | Gets and resets CompiledModel state control                           |
-| Profiling               | [openvino.runtime.InferRequest.profiling_info], [openvino.runtime.ProfilingInfo.real_time]                                                                                                     | Get infer request profiling info                                      |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](../hello_classification/README.md).
-
-| Options                    | Values                                                                                                                                         |
-| :------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------- |
-| Validated Models           | Acoustic model based on Kaldi* neural models (see [Model Preparation](#model-preparation-speech-python) section)                                           |
-| Model Format               | OpenVINO™ toolkit Intermediate Representation (.xml + .bin)                                                                                     |
-| Supported devices          | See [Execution Modes](#execution-modes-speech-python) section below and [List Supported Devices](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C++](../../../samples/cpp/speech_sample/README.md)                                                                                            |
-
-## How It Works
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| Feature                                                           | API                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | Description                                                           |
++===================================================================+================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+=======================================================================+
+| Import/Export Model                                               | `openvino.runtime.Core.import_model <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.import_model>`__ , `openvino.runtime.CompiledModel.export_model <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.export_model>`__                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | The GNA plugin supports loading and saving of the GNA-optimized model |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| Model Operations                                                  | `openvino.runtime.Model.add_outputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.add_outputs>`__ , `openvino.runtime.set_batch <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.html#openvino.runtime.set_batch>`__ , `openvino.runtime.CompiledModel.inputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.inputs>`__ , `openvino.runtime.CompiledModel.outputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.outputs>`__ , `openvino.runtime.ConstOutput.any_name <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.ConstOutput.html#openvino.runtime.ConstOutput.any_name>`__ | Managing of model: configure batch_size, input and output tensors     |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| Synchronous Infer                                                 | `openvino.runtime.CompiledModel.create_infer_request <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.create_infer_request>`__ , `openvino.runtime.InferRequest.infer <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.infer>`__                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Do synchronous inference                                              |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| InferRequest Operations                                           | `openvino.runtime.InferRequest.get_input_tensor <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.get_input_tensor>`__ ,                              `openvino.runtime.InferRequest.model_outputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_outputs>`__ , `openvino.runtime.InferRequest.model_inputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_inputs>`__ ,                                                                                                                                                                                                                                                         | Get info about model using infer request API                          |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| InferRequest Operations                                           | `openvino.runtime.InferRequest.query_state <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.query_state>`__ , `openvino.runtime.VariableState.reset <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.VariableState.html#openvino.inference_engine.VariableState.reset>`__                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Gets and resets CompiledModel state control                           |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| Profiling                                                         | `openvino.runtime.InferRequest.profiling_info <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.profiling_info>`__ , `openvino.runtime.ProfilingInfo.real_time <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.ProfilingInfo.html#openvino.runtime.ProfilingInfo.real_time>`__                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | Get infer request profiling info                                      |
++-------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification Python* Sample <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>`.
+
++----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Options                                                              | Values                                                                                                                                                               |
++======================================================================+======================================================================================================================================================================+
+| Validated Models                                                     | Acoustic model based on Kaldi* neural models (see :ref:`Model Preparation <model-preparation-speech-python>` section)                                                |
++----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Format                                                         | OpenVINO™ toolkit Intermediate Representation (.xml + .bin)                                                                                                          |
++----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Supported devices                                                    | See :ref:`Execution Modes <execution-modes-speech-python>` section below and :doc:`List Supported Devices <openvino_docs_OV_UG_supported_plugins_Supported_Devices>` |
++----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Other language realization                                           | :doc:`C++ <openvino_inference_engine_samples_speech_sample_README>`                                                                                                  |
++----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+How It Works
+############
 
 At startup, the sample application reads command-line parameters, loads a specified model and input data to the OpenVINO™ Runtime plugin, performs synchronous inference on all speech utterances stored in the input file, logging each step in a standard output stream.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## GNA-specific details
+GNA-specific details
+####################
 
-### Quantization
+Quantization
+++++++++++++
 
-If the GNA device is selected (for example, using the `-d` GNA flag), the GNA OpenVINO™ Runtime plugin quantizes the model and input feature vector sequence to integer representation before performing inference.
+If the GNA device is selected (for example, using the ``-d`` GNA flag), the GNA OpenVINO™ Runtime plugin quantizes the model and input feature vector sequence to integer representation before performing inference.
 
 Several neural model quantization modes:
 
 - *static* - The first utterance in the input file is scanned for dynamic range.  The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used for all subsequent inputs. The neural model is quantized to accommodate the scaled input dynamic range.
-- *user-defined* - The user may specify a scale factor via the `-sf` flag that will be used for static quantization.
+- *user-defined* - The user may specify a scale factor via the ``-sf`` flag that will be used for static quantization.
 
-The `-qb` flag provides a hint to the GNA plugin regarding the preferred target weight resolution for all layers.  
-For example, when `-qb 8` is specified, the plugin will use 8-bit weights wherever possible in the
+The ``-qb`` flag provides a hint to the GNA plugin regarding the preferred target weight resolution for all layers.  
+For example, when ``-qb 8`` is specified, the plugin will use 8-bit weights wherever possible in the
 model.
 
-> **NOTE**:
->
-> - It is not always possible to use 8-bit weights due to GNA hardware limitations. For example, convolutional layers always use 16-bit weights (GNA hardware version 1 and 2).  This limitation will be removed in GNA hardware version 3 and higher.
->
+.. note::
+
+   It is not always possible to use 8-bit weights due to GNA hardware limitations. For example, convolutional layers always use 16-bit weights (GNA hardware version 1 and 2).  This limitation will be removed in GNA hardware version 3 and higher.
 
-### <a name="execution-modes-speech-python"></a> Execution Modes
+.. _execution-modes-speech-python:
 
-Several execution modes are supported via the `-d` flag:
+Execution Modes
++++++++++++++++
 
-- `CPU` - All calculations are performed on CPU device using CPU Plugin.
-- `GPU` - All calculations are performed on GPU device using GPU Plugin.
-- `VPUX` - All calculations are performed on VPUX device using VPUX Plugin.
-- `GNA_AUTO` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_HW` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
-- `GNA_SW` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_SW_FP32` - Substitutes parameters and calculations from low precision to floating point (FP32).
-- `GNA_SW_EXACT` - GNA device is emulated in bit-exact mode.
+Several execution modes are supported via the ``-d`` flag:
 
-### Loading and Saving Models
+- ``CPU`` - All calculations are performed on CPU device using CPU Plugin.
+- ``GPU`` - All calculations are performed on GPU device using GPU Plugin.
+- ``VPUX`` - All calculations are performed on VPUX device using VPUX Plugin.
+- ``GNA_AUTO`` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
+- ``GNA_HW`` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
+- ``GNA_SW`` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
+- ``GNA_SW_FP32`` - Substitutes parameters and calculations from low precision to floating point (FP32).
+- ``GNA_SW_EXACT`` - GNA device is emulated in bit-exact mode.
 
-The GNA plugin supports loading and saving of the GNA-optimized model (non-IR) via the `-rg` and `-wg` flags.  
+Loading and Saving Models
++++++++++++++++++++++++++
+
+The GNA plugin supports loading and saving of the GNA-optimized model (non-IR) via the ``-rg`` and ``-wg`` flags.  
 Thereby, it is possible to avoid the cost of full model quantization at run time.  
-The GNA plugin also supports export of firmware-compatible embedded model images for the Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium Far-Field Voice Development Kit via the `-we` flag (save only).
+The GNA plugin also supports export of firmware-compatible embedded model images for the Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium Far-Field Voice Development Kit via the ``-we`` flag (save only).
 
 In addition to performing inference directly from a GNA model file, these options make it possible to:
 
-- Convert from IR format to GNA format model file (`-m`, `-wg`)
-- Convert from IR format to embedded format model file (`-m`, `-we`)
-- Convert from GNA format to embedded format model file (`-rg`, `-we`)
+- Convert from IR format to GNA format model file (``-m``, ``-wg``)
+- Convert from IR format to embedded format model file (``-m``, ``-we``)
+- Convert from GNA format to embedded format model file (``-rg``, ``-we``)
 
-## Running
+Running
+#######
 
-Run the application with the `-h` option to see the usage message:
+Run the application with the ``-h`` option to see the usage message:
 
-```
-python speech_sample.py -h
-```
+.. code-block:: bash
+   
+   python speech_sample.py -h
 
 Usage message:
 
-```
-usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT [-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-bs [1-8]]
-                        [-layout LAYOUT] [-qb [8, 16]] [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL]
-                        [-we EXPORT_EMBEDDED_GNA_MODEL] [-we_gen [GNA1, GNA3]]
-                        [--exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]] [-pc] [-a [CORE, ATOM]] [-iname INPUT_LAYERS]    
-                        [-oname OUTPUT_LAYERS] [-cw_l CONTEXT_WINDOW_LEFT] [-cw_r CONTEXT_WINDOW_RIGHT] [-pwl_me PWL_ME]  
-
-optional arguments:
-  -m MODEL, --model MODEL
-                        Path to an .xml file with a trained model (required if -rg is missing).
-  -rg IMPORT_GNA_MODEL, --import_gna_model IMPORT_GNA_MODEL
-                        Read GNA model from file using path/filename provided (required if -m is missing).
-
-Options:
-  -h, --help            Show this help message and exit.
-  -i INPUT, --input INPUT
-                        Required. Path(s) to input file(s).
-                        Usage for a single file/layer: <input_file.ark> or <input_file.npz>.
-                        Example of usage for several files/layers: <layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.
-  -o OUTPUT, --output OUTPUT
-                        Optional. Output file name(s) to save scores (inference results).
-                        Usage for a single file/layer: <output_file.ark> or <output_file.npz>.
-                        Example of usage for several files/layers: <layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.
-  -r REFERENCE, --reference REFERENCE
-                        Read reference score file(s) and compare inference results with reference scores.
-                        Usage for a single file/layer: <reference_file.ark> or <reference_file.npz>.
-                        Example of usage for several files/layers: <layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.
-  -d DEVICE, --device DEVICE
-                        Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32,   
-                        GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g.   
-                        HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.      
-                        Default value is CPU.
-  -bs [1-8], --batch_size [1-8]
-                        Optional. Batch size 1-8.
-  -layout LAYOUT        Optional. Custom layout in format: "input0[value0],input1[value1]" or "[value]" (applied to all      
-                        inputs)
-  -qb [8, 16], --quantization_bits [8, 16]
-                        Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default 16).
-  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
-                        Optional. User-specified input scale factor for GNA quantization.
-                        If the model contains multiple inputs, provide scale factors by separating them with commas.
-                        For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.
-  -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
-                        Optional. Write GNA model to file using path/filename provided.
-  -we EXPORT_EMBEDDED_GNA_MODEL, --export_embedded_gna_model EXPORT_EMBEDDED_GNA_MODEL
-                        Optional. Write GNA embedded model to file using path/filename provided.
-  -we_gen [GNA1, GNA3], --embedded_gna_configuration [GNA1, GNA3]
-                        Optional. GNA generation configuration string for embedded export. Can be GNA1 (default) or GNA3.    
-  --exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]
-                        Optional. Specify GNA execution target generation. By default, generation corresponds to the GNA HW  
-                        available in the system or the latest fully supported generation by the software. See the GNA        
-                        Plugin's GNA_EXEC_TARGET config option description.
-  -pc, --performance_counter
-                        Optional. Enables performance report (specify -a to ensure arch accurate results).
-  -a [CORE, ATOM], --arch [CORE, ATOM]
-                        Optional. Specify architecture. CORE, ATOM with the combination of -pc.
-  -cw_l CONTEXT_WINDOW_LEFT, --context_window_left CONTEXT_WINDOW_LEFT
-                        Optional. Number of frames for left context windows (default is 0). Works only with context window   
-                        models. If you use the cw_l or cw_r flag, then batch size argument is ignored.
-  -cw_r CONTEXT_WINDOW_RIGHT, --context_window_right CONTEXT_WINDOW_RIGHT
-                        Optional. Number of frames for right context windows (default is 0). Works only with context window  
-                        models. If you use the cw_l or cw_r flag, then batch size argument is ignored.
-  -pwl_me PWL_ME        Optional. The maximum percent of error for PWL function. The value must be in <0, 100> range. The    
-                        default value is 1.0.
-```
-
-## <a name="model-preparation-speech-python"></a> Model Preparation
+.. code-block:: console
+   
+   usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT [-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-bs [1-8]]
+                           [-layout LAYOUT] [-qb [8, 16]] [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL]
+                           [-we EXPORT_EMBEDDED_GNA_MODEL] [-we_gen [GNA1, GNA3]]
+                           [--exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]] [-pc] [-a [CORE, ATOM]] [-iname INPUT_LAYERS]    
+                           [-oname OUTPUT_LAYERS] [-cw_l CONTEXT_WINDOW_LEFT] [-cw_r CONTEXT_WINDOW_RIGHT] [-pwl_me PWL_ME]  
+   
+   optional arguments:
+     -m MODEL, --model MODEL
+                           Path to an .xml file with a trained model (required if -rg is missing).
+     -rg IMPORT_GNA_MODEL, --import_gna_model IMPORT_GNA_MODEL
+                           Read GNA model from file using path/filename provided (required if -m is missing).
+   
+   Options:
+     -h, --help            Show this help message and exit.
+     -i INPUT, --input INPUT
+                           Required. Path(s) to input file(s).
+                           Usage for a single file/layer: <input_file.ark> or <input_file.npz>.
+                           Example of usage for several files/layers: <layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.
+     -o OUTPUT, --output OUTPUT
+                           Optional. Output file name(s) to save scores (inference results).
+                           Usage for a single file/layer: <output_file.ark> or <output_file.npz>.
+                           Example of usage for several files/layers: <layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.
+     -r REFERENCE, --reference REFERENCE
+                           Read reference score file(s) and compare inference results with reference scores.
+                           Usage for a single file/layer: <reference_file.ark> or <reference_file.npz>.
+                           Example of usage for several files/layers: <layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.
+     -d DEVICE, --device DEVICE
+                           Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32,   
+                           GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g.   
+                           HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.      
+                           Default value is CPU.
+     -bs [1-8], --batch_size [1-8]
+                           Optional. Batch size 1-8.
+     -layout LAYOUT        Optional. Custom layout in format: "input0[value0],input1[value1]" or "[value]" (applied to all      
+                           inputs)
+     -qb [8, 16], --quantization_bits [8, 16]
+                           Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default 16).
+     -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
+                           Optional. User-specified input scale factor for GNA quantization.
+                           If the model contains multiple inputs, provide scale factors by separating them with commas.
+                           For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.
+     -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
+                           Optional. Write GNA model to file using path/filename provided.
+     -we EXPORT_EMBEDDED_GNA_MODEL, --export_embedded_gna_model EXPORT_EMBEDDED_GNA_MODEL
+                           Optional. Write GNA embedded model to file using path/filename provided.
+     -we_gen [GNA1, GNA3], --embedded_gna_configuration [GNA1, GNA3]
+                           Optional. GNA generation configuration string for embedded export. Can be GNA1 (default) or GNA3.    
+     --exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]
+                           Optional. Specify GNA execution target generation. By default, generation corresponds to the GNA HW  
+                           available in the system or the latest fully supported generation by the software. See the GNA        
+                           Plugin's GNA_EXEC_TARGET config option description.
+     -pc, --performance_counter
+                           Optional. Enables performance report (specify -a to ensure arch accurate results).
+     -a [CORE, ATOM], --arch [CORE, ATOM]
+                           Optional. Specify architecture. CORE, ATOM with the combination of -pc.
+     -cw_l CONTEXT_WINDOW_LEFT, --context_window_left CONTEXT_WINDOW_LEFT
+                           Optional. Number of frames for left context windows (default is 0). Works only with context window   
+                           models. If you use the cw_l or cw_r flag, then batch size argument is ignored.
+     -cw_r CONTEXT_WINDOW_RIGHT, --context_window_right CONTEXT_WINDOW_RIGHT
+                           Optional. Number of frames for right context windows (default is 0). Works only with context window  
+                           models. If you use the cw_l or cw_r flag, then batch size argument is ignored.
+     -pwl_me PWL_ME        Optional. The maximum percent of error for PWL function. The value must be in <0, 100> range. The    
+                           default value is 1.0.
+   
+
+.. _model-preparation-speech-python:
+
+Model Preparation
+#################
 
 You can use the following model optimizer command to convert a Kaldi nnet1 or nnet2 neural model to OpenVINO™ toolkit Intermediate Representation format:
 
-```sh
-mo --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>
-```
+.. code-block:: sh
+   
+   mo --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>
 
 The following pre-trained models are available:
 
@@ -166,189 +192,179 @@ The following pre-trained models are available:
 - rm_lstm4f
 - wsj_dnn5b_smbr
 
-All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2).
+All of them can be downloaded from `the storage <https://storage.openvinotoolkit.org/models_contrib/speech/2021.2>`.
 
-## Speech Inference
+Speech Inference
+################
 
 You can do inference on Intel® Processors with the GNA co-processor (or emulation library):
 
-```
-python speech_sample.py -m wsj_dnn5b.xml -i dev93_10.ark -r dev93_scores_10.ark -d GNA_AUTO -o result.npz
-```
+.. code-block:: sh
+   
+   python speech_sample.py -m wsj_dnn5b.xml -i dev93_10.ark -r dev93_scores_10.ark -d GNA_AUTO -o result.npz
+
 
-> **NOTES**:
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample supports input and output in numpy file format (.npz)
+.. note::
 
-## Sample Output
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample supports input and output in numpy file format (.npz)
+
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream.
 
-```
-[ INFO ] Creating OpenVINO Runtime Core
-[ INFO ] Reading the model: /models/wsj_dnn5b_smbr_fp32.xml
-[ INFO ] Using scale factor(s) calculated from first utterance
-[ INFO ] For input 0 using scale factor of 2175.4322418
-[ INFO ] Loading the model to the plugin
-[ INFO ] Starting inference in synchronous mode
-[ INFO ] 
-[ INFO ] Utterance 0:
-[ INFO ] Total time in Infer (HW and SW): 6326.06ms
-[ INFO ] Frames in utterance: 1294
-[ INFO ] Average Infer time per frame: 4.89ms      
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14       
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7051840
-[ INFO ] avg error: 0.0448388    
-[ INFO ] avg rms error: 0.0582387
-[ INFO ] stdev error: 0.0371650  
-[ INFO ] 
-[ INFO ] Utterance 1:
-[ INFO ] Total time in Infer (HW and SW): 4526.57ms
-[ INFO ] Frames in utterance: 1005
-[ INFO ] Average Infer time per frame: 4.50ms      
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14       
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7575974
-[ INFO ] avg error: 0.0452166    
-[ INFO ] avg rms error: 0.0586013
-[ INFO ] stdev error: 0.0372769  
-[ INFO ] 
-[ INFO ] Utterance 2:
-[ INFO ] Total time in Infer (HW and SW): 6636.56ms
-[ INFO ] Frames in utterance: 1471
-[ INFO ] Average Infer time per frame: 4.51ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7191710
-[ INFO ] avg error: 0.0472226
-[ INFO ] avg rms error: 0.0612991
-[ INFO ] stdev error: 0.0390846
-[ INFO ] 
-[ INFO ] Utterance 3:
-[ INFO ] Total time in Infer (HW and SW): 3927.01ms
-[ INFO ] Frames in utterance: 845
-[ INFO ] Average Infer time per frame: 4.65ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7436461
-[ INFO ] avg error: 0.0477581
-[ INFO ] avg rms error: 0.0621334
-[ INFO ] stdev error: 0.0397457
-[ INFO ] 
-[ INFO ] Utterance 4:
-[ INFO ] Total time in Infer (HW and SW): 3891.49ms
-[ INFO ] Frames in utterance: 855
-[ INFO ] Average Infer time per frame: 4.55ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7071600
-[ INFO ] avg error: 0.0449147
-[ INFO ] avg rms error: 0.0585048
-[ INFO ] stdev error: 0.0374897
-[ INFO ] 
-[ INFO ] Utterance 5:
-[ INFO ] Total time in Infer (HW and SW): 3378.61ms
-[ INFO ] Frames in utterance: 699
-[ INFO ] Average Infer time per frame: 4.83ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.8870468
-[ INFO ] avg error: 0.0479243
-[ INFO ] avg rms error: 0.0625490
-[ INFO ] stdev error: 0.0401951
-[ INFO ] 
-[ INFO ] Utterance 6:
-[ INFO ] Total time in Infer (HW and SW): 4034.31ms
-[ INFO ] Frames in utterance: 790
-[ INFO ] Average Infer time per frame: 5.11ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7648273
-[ INFO ] avg error: 0.0482702
-[ INFO ] avg rms error: 0.0629734
-[ INFO ] stdev error: 0.0404429
-[ INFO ] 
-[ INFO ] Utterance 7:
-[ INFO ] Total time in Infer (HW and SW): 2854.04ms
-[ INFO ] Frames in utterance: 622
-[ INFO ] Average Infer time per frame: 4.59ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.7389560
-[ INFO ] avg error: 0.0465543
-[ INFO ] avg rms error: 0.0604941
-[ INFO ] stdev error: 0.0386294
-[ INFO ]
-[ INFO ] Utterance 8:
-[ INFO ] Total time in Infer (HW and SW): 2493.28ms
-[ INFO ] Frames in utterance: 548
-[ INFO ] Average Infer time per frame: 4.55ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.6680136
-[ INFO ] avg error: 0.0439341
-[ INFO ] avg rms error: 0.0574614
-[ INFO ] stdev error: 0.0370353
-[ INFO ]
-[ INFO ] Utterance 9:
-[ INFO ] Total time in Infer (HW and SW): 1654.67ms
-[ INFO ] Frames in utterance: 368
-[ INFO ] Average Infer time per frame: 4.50ms
-[ INFO ]
-[ INFO ] Output blob name: affinetransform14
-[ INFO ] Number scores per frame: 3425
-[ INFO ]
-[ INFO ] max error: 0.6550579
-[ INFO ] avg error: 0.0467643
-[ INFO ] avg rms error: 0.0605045
-[ INFO ] stdev error: 0.0383914
-[ INFO ]
-[ INFO ] Total sample time: 39722.60ms
-[ INFO ] File result.npz was created!
-[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-
-[openvino.runtime.Core.import_model]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.import_model
-[openvino.runtime.CompiledModel.export_model]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.export_model
-[openvino.runtime.Model.add_outputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.add_outputs
-[openvino.runtime.set_batch]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.html#openvino.runtime.set_batch
-[openvino.runtime.CompiledModel.inputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.inputs
-[openvino.runtime.CompiledModel.outputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.outputs
-[openvino.runtime.ConstOutput.any_name]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.ConstOutput.html#openvino.runtime.ConstOutput.any_name
-[openvino.runtime.CompiledModel.create_infer_request]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.create_infer_request
-[openvino.runtime.InferRequest.infer]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.infer
-[openvino.runtime.InferRequest.model_outputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_outputs
-[openvino.runtime.InferRequest.model_inputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_inputs
-[openvino.runtime.InferRequest.query_state]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.query_state
-[openvino.runtime.VariableState.reset]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.VariableState.html#openvino.inference_engine.VariableState.reset
-[openvino.runtime.InferRequest.profiling_info]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.profiling_info
-[openvino.runtime.ProfilingInfo.real_time]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.ProfilingInfo.html#openvino.runtime.ProfilingInfo.real_time
-[openvino.runtime.InferRequest.get_input_tensor]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.get_input_tensor
+.. code-block:: sh
+   
+   [ INFO ] Creating OpenVINO Runtime Core
+   [ INFO ] Reading the model: /models/wsj_dnn5b_smbr_fp32.xml
+   [ INFO ] Using scale factor(s) calculated from first utterance
+   [ INFO ] For input 0 using scale factor of 2175.4322418
+   [ INFO ] Loading the model to the plugin
+   [ INFO ] Starting inference in synchronous mode
+   [ INFO ] 
+   [ INFO ] Utterance 0:
+   [ INFO ] Total time in Infer (HW and SW): 6326.06ms
+   [ INFO ] Frames in utterance: 1294
+   [ INFO ] Average Infer time per frame: 4.89ms      
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14       
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7051840
+   [ INFO ] avg error: 0.0448388    
+   [ INFO ] avg rms error: 0.0582387
+   [ INFO ] stdev error: 0.0371650  
+   [ INFO ] 
+   [ INFO ] Utterance 1:
+   [ INFO ] Total time in Infer (HW and SW): 4526.57ms
+   [ INFO ] Frames in utterance: 1005
+   [ INFO ] Average Infer time per frame: 4.50ms      
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14       
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7575974
+   [ INFO ] avg error: 0.0452166    
+   [ INFO ] avg rms error: 0.0586013
+   [ INFO ] stdev error: 0.0372769  
+   [ INFO ] 
+   [ INFO ] Utterance 2:
+   [ INFO ] Total time in Infer (HW and SW): 6636.56ms
+   [ INFO ] Frames in utterance: 1471
+   [ INFO ] Average Infer time per frame: 4.51ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7191710
+   [ INFO ] avg error: 0.0472226
+   [ INFO ] avg rms error: 0.0612991
+   [ INFO ] stdev error: 0.0390846
+   [ INFO ] 
+   [ INFO ] Utterance 3:
+   [ INFO ] Total time in Infer (HW and SW): 3927.01ms
+   [ INFO ] Frames in utterance: 845
+   [ INFO ] Average Infer time per frame: 4.65ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7436461
+   [ INFO ] avg error: 0.0477581
+   [ INFO ] avg rms error: 0.0621334
+   [ INFO ] stdev error: 0.0397457
+   [ INFO ] 
+   [ INFO ] Utterance 4:
+   [ INFO ] Total time in Infer (HW and SW): 3891.49ms
+   [ INFO ] Frames in utterance: 855
+   [ INFO ] Average Infer time per frame: 4.55ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7071600
+   [ INFO ] avg error: 0.0449147
+   [ INFO ] avg rms error: 0.0585048
+   [ INFO ] stdev error: 0.0374897
+   [ INFO ] 
+   [ INFO ] Utterance 5:
+   [ INFO ] Total time in Infer (HW and SW): 3378.61ms
+   [ INFO ] Frames in utterance: 699
+   [ INFO ] Average Infer time per frame: 4.83ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.8870468
+   [ INFO ] avg error: 0.0479243
+   [ INFO ] avg rms error: 0.0625490
+   [ INFO ] stdev error: 0.0401951
+   [ INFO ] 
+   [ INFO ] Utterance 6:
+   [ INFO ] Total time in Infer (HW and SW): 4034.31ms
+   [ INFO ] Frames in utterance: 790
+   [ INFO ] Average Infer time per frame: 5.11ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7648273
+   [ INFO ] avg error: 0.0482702
+   [ INFO ] avg rms error: 0.0629734
+   [ INFO ] stdev error: 0.0404429
+   [ INFO ] 
+   [ INFO ] Utterance 7:
+   [ INFO ] Total time in Infer (HW and SW): 2854.04ms
+   [ INFO ] Frames in utterance: 622
+   [ INFO ] Average Infer time per frame: 4.59ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.7389560
+   [ INFO ] avg error: 0.0465543
+   [ INFO ] avg rms error: 0.0604941
+   [ INFO ] stdev error: 0.0386294
+   [ INFO ]
+   [ INFO ] Utterance 8:
+   [ INFO ] Total time in Infer (HW and SW): 2493.28ms
+   [ INFO ] Frames in utterance: 548
+   [ INFO ] Average Infer time per frame: 4.55ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.6680136
+   [ INFO ] avg error: 0.0439341
+   [ INFO ] avg rms error: 0.0574614
+   [ INFO ] stdev error: 0.0370353
+   [ INFO ]
+   [ INFO ] Utterance 9:
+   [ INFO ] Total time in Infer (HW and SW): 1654.67ms
+   [ INFO ] Frames in utterance: 368
+   [ INFO ] Average Infer time per frame: 4.50ms
+   [ INFO ]
+   [ INFO ] Output blob name: affinetransform14
+   [ INFO ] Number scores per frame: 3425
+   [ INFO ]
+   [ INFO ] max error: 0.6550579
+   [ INFO ] avg error: 0.0467643
+   [ INFO ] avg rms error: 0.0605045
+   [ INFO ] stdev error: 0.0383914
+   [ INFO ]
+   [ INFO ] Total sample time: 39722.60ms
+   [ INFO ] File result.npz was created!
+   [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+   
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From 0250f62d11102ae97a35803756f079090876ddc1 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 29 Mar 2023 18:59:33 +0400
Subject: [PATCH 165/296] Revert inference precision to be a hint (#16634)

---
 docs/OV_Runtime_UG/supported_plugins/CPU.md   | 10 +++---
 docs/OV_Runtime_UG/supported_plugins/GNA.md   |  6 ++--
 docs/OV_Runtime_UG/supported_plugins/GPU.md   |  2 +-
 .../dldt_deployment_optimization_guide.md     |  2 +-
 docs/snippets/cpu/Bfloat16Inference1.cpp      |  2 +-
 docs/snippets/cpu/Bfloat16Inference2.cpp      |  2 +-
 docs/snippets/ov_hetero.cpp                   |  2 +-
 docs/snippets/ov_properties_api.cpp           |  2 +-
 docs/snippets/ov_properties_migration.cpp     |  2 +-
 samples/cpp/benchmark_app/benchmark_app.hpp   |  2 +-
 samples/cpp/benchmark_app/main.cpp            |  8 ++---
 samples/cpp/benchmark_app/utils.cpp           |  2 +-
 samples/cpp/speech_sample/main.cpp            |  2 +-
 .../pyopenvino/core/properties/properties.cpp |  1 -
 .../tests/test_runtime/test_properties.py     | 13 ++++----
 .../include/openvino/runtime/properties.hpp   | 12 ++-----
 src/plugins/intel_cpu/src/config.cpp          |  4 +--
 src/plugins/intel_cpu/src/exec_network.cpp    |  6 ++--
 src/plugins/intel_cpu/src/plugin.cpp          |  6 ++--
 .../behavior/ov_plugin/core_integration.cpp   |  6 ++--
 .../intel_gna/src/gna_plugin_config.cpp       | 10 +++---
 .../ov_executable_network/get_metric.cpp      |  6 ++--
 .../behavior/ov_plugin/core_integration.cpp   | 26 +++++++--------
 .../tests/unit/gna_export_import_test.cpp     |  4 +--
 .../tests/unit/gna_hw_precision_test.cpp      |  6 ++--
 .../tests/unit/gna_input_preproc_test.cpp     | 32 +++++++++----------
 .../tests/unit/gna_plugin_config_test.cpp     |  8 ++---
 .../intel_gpu/src/plugin/compiled_model.cpp   |  2 +-
 .../src/plugin/legacy_api_helper.cpp          |  2 +-
 src/plugins/intel_gpu/src/plugin/plugin.cpp   |  4 +--
 .../src/runtime/execution_config.cpp          | 10 +++---
 .../gpu/behavior/inference_precision.cpp      |  4 +--
 .../gpu/concurrency/gpu_concurrency_tests.cpp | 10 +++---
 .../cldnn_remote_blob_tests.cpp               | 22 ++++++-------
 .../behavior/ov_plugin/core_integration.cpp   | 10 +++---
 .../normalize_l2_decomposition.cpp            |  8 ++---
 .../src/base/ov_subgraph.cpp                  |  2 +-
 .../src/base/snippets_test_utils.cpp          |  2 +-
 38 files changed, 126 insertions(+), 134 deletions(-)

diff --git a/docs/OV_Runtime_UG/supported_plugins/CPU.md b/docs/OV_Runtime_UG/supported_plugins/CPU.md
index df17fd10f0d57c..14b569e3e4249c 100644
--- a/docs/OV_Runtime_UG/supported_plugins/CPU.md
+++ b/docs/OV_Runtime_UG/supported_plugins/CPU.md
@@ -105,14 +105,14 @@ to query ``ov::device::capabilities`` property, which should contain ``BF16`` in
          :fragment: [part0]
 
 
-If the model has been converted to ``bf16``, the ``ov::inference_precision`` is set to ``ov::element::bf16`` and can be checked via 
+If the model has been converted to ``bf16``, the ``ov::hint::inference_precision`` is set to ``ov::element::bf16`` and can be checked via 
 the ``ov::CompiledModel::get_property`` call. The code below demonstrates how to get the element type:
 
 .. doxygensnippet:: snippets/cpu/Bfloat16Inference1.cpp
    :language: py
    :fragment: [part1]
 
-To infer the model in ``f32`` precision instead of ``bf16`` on targets with native ``bf16`` support, set the ``ov::inference_precision`` to ``ov::element::f32``.
+To infer the model in ``f32`` precision instead of ``bf16`` on targets with native ``bf16`` support, set the ``ov::hint::inference_precision`` to ``ov::element::f32``.
 
 
 .. tab-set::
@@ -134,11 +134,11 @@ To infer the model in ``f32`` precision instead of ``bf16`` on targets with nati
 
 The ``Bfloat16`` software simulation mode is available on CPUs with Intel® AVX-512 instruction set that do not support the 
 native ``avx512_bf16`` instruction. This mode is used for development purposes and it does not guarantee good performance.
-To enable the simulation, the ``ov::inference_precision`` has to be explicitly set to ``ov::element::bf16``.
+To enable the simulation, the ``ov::hint::inference_precision`` has to be explicitly set to ``ov::element::bf16``.
 
 .. note:: 
    
-   If ``ov::inference_precision`` is set to ``ov::element::bf16`` on a CPU without native bfloat16 support or bfloat16 simulation mode, an exception is thrown.
+   If ``ov::hint::inference_precision`` is set to ``ov::element::bf16`` on a CPU without native bfloat16 support or bfloat16 simulation mode, an exception is thrown.
 
 .. note:: 
    
@@ -292,7 +292,7 @@ Read-write Properties
 All parameters must be set before calling ``ov::Core::compile_model()`` in order to take effect or passed as additional argument to ``ov::Core::compile_model()``
 
 - ``ov::enable_profiling``
-- ``ov::inference_precision``
+- ``ov::hint::inference_precision``
 - ``ov::hint::performance_mode``
 - ``ov::hint::num_request``
 - ``ov::num_streams``
diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md
index 7faace9e172d45..1b07d5de1bf815 100644
--- a/docs/OV_Runtime_UG/supported_plugins/GNA.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md
@@ -140,7 +140,7 @@ quantization hints based on statistics for the provided dataset.
 * Accuracy (i16 weights)
 * Performance (i8 weights)
 
-For POT quantized models, the ``ov::inference_precision`` property has no effect except in cases described in the
+For POT quantized models, the ``ov::hint::inference_precision`` property has no effect except in cases described in the
 :ref:`Model and Operation Limitations section <#model-and-operation-limitations>`.
 
 
@@ -268,7 +268,7 @@ In order to take effect, the following parameters must be set before model compi
 
 - ov::cache_dir
 - ov::enable_profiling
-- ov::inference_precision
+- ov::hint::inference_precision
 - ov::hint::num_requests
 - ov::intel_gna::compile_target
 - ov::intel_gna::firmware_model_image_path
@@ -354,7 +354,7 @@ Support for 2D Convolutions using POT
 For POT to successfully work with the models including GNA3.0 2D convolutions, the following requirements must be met:
 
 * All convolution parameters are natively supported by HW (see tables above).
-* The runtime precision is explicitly set by the ``ov::inference_precision`` property as ``i8`` for the models produced by 
+* The runtime precision is explicitly set by the ``ov::hint::inference_precision`` property as ``i8`` for the models produced by 
   the ``performance mode`` of POT, and as ``i16`` for the models produced by the ``accuracy mode`` of POT.
 
 
diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU.md b/docs/OV_Runtime_UG/supported_plugins/GPU.md
index d2e5bce1d37052..3be45dc12eba85 100644
--- a/docs/OV_Runtime_UG/supported_plugins/GPU.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md
@@ -327,7 +327,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order
 - ov::hint::performance_mode
 - ov::hint::execution_mode
 - ov::hint::num_requests
-- ov::inference_precision
+- ov::hint::inference_precision
 - ov::num_streams
 - ov::compilation_num_threads
 - ov::device::id
diff --git a/docs/optimization_guide/dldt_deployment_optimization_guide.md b/docs/optimization_guide/dldt_deployment_optimization_guide.md
index 9ae41edc37a0dc..8dfad7f088d87b 100644
--- a/docs/optimization_guide/dldt_deployment_optimization_guide.md
+++ b/docs/optimization_guide/dldt_deployment_optimization_guide.md
@@ -16,7 +16,7 @@
 
 
 Runtime optimization, or deployment optimization, focuses on tuning inference parameters and execution means (e.g., the optimum number of requests executed simultaneously). Unlike model-level optimizations, they are highly specific to the hardware and case they are used for, and often come at a cost.
-`ov::inference_precision <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1gad605a888f3c9b7598ab55023fbf44240>`__ is a "typical runtime configuration" which trades accuracy for performance, allowing ``fp16/bf16`` execution for the layers that remain in ``fp32`` after quantization of the original ``fp32`` model.
+`ov::hint::inference_precision <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1gad605a888f3c9b7598ab55023fbf44240>`__ is a "typical runtime configuration" which trades accuracy for performance, allowing ``fp16/bf16`` execution for the layers that remain in ``fp32`` after quantization of the original ``fp32`` model.
 
 Therefore, optimization should start with defining the use case. For example, if it is about processing millions of samples by overnight jobs in data centers, throughput could be prioritized over latency. On the other hand, real-time usages would likely trade off throughput to deliver the results at minimal latency. A combined scenario is also possible, targeting the highest possible throughput, while maintaining a specific latency threshold.
 
diff --git a/docs/snippets/cpu/Bfloat16Inference1.cpp b/docs/snippets/cpu/Bfloat16Inference1.cpp
index 58f42ebfcaf779..51850c6018db69 100644
--- a/docs/snippets/cpu/Bfloat16Inference1.cpp
+++ b/docs/snippets/cpu/Bfloat16Inference1.cpp
@@ -6,7 +6,7 @@ using namespace InferenceEngine;
 ov::Core core;
 auto network = core.read_model("sample.xml");
 auto exec_network = core.compile_model(network, "CPU");
-auto inference_precision = exec_network.get_property(ov::inference_precision);
+auto inference_precision = exec_network.get_property(ov::hint::inference_precision);
 //! [part1]
 
 return 0;
diff --git a/docs/snippets/cpu/Bfloat16Inference2.cpp b/docs/snippets/cpu/Bfloat16Inference2.cpp
index 762329269fc7f5..c06a6491b89432 100644
--- a/docs/snippets/cpu/Bfloat16Inference2.cpp
+++ b/docs/snippets/cpu/Bfloat16Inference2.cpp
@@ -4,7 +4,7 @@ int main() {
 using namespace InferenceEngine;
 //! [part2]
 ov::Core core;
-core.set_property("CPU", ov::inference_precision(ov::element::f32));
+core.set_property("CPU", ov::hint::inference_precision(ov::element::f32));
 //! [part2]
 
 return 0;
diff --git a/docs/snippets/ov_hetero.cpp b/docs/snippets/ov_hetero.cpp
index 2f5cf3f5c9eec3..791340afff56ef 100644
--- a/docs/snippets/ov_hetero.cpp
+++ b/docs/snippets/ov_hetero.cpp
@@ -49,7 +49,7 @@ auto compiled_model = core.compile_model(model, "HETERO",
     // profiling is enabled only for GPU
     ov::device::properties("GPU", ov::enable_profiling(true)),
     // FP32 inference precision only for CPU
-    ov::device::properties("CPU", ov::inference_precision(ov::element::f32))
+    ov::device::properties("CPU", ov::hint::inference_precision(ov::element::f32))
 );
 //! [configure_fallback_devices]
 }
diff --git a/docs/snippets/ov_properties_api.cpp b/docs/snippets/ov_properties_api.cpp
index 7815291ee7b90e..e5f1ff7648fcf5 100644
--- a/docs/snippets/ov_properties_api.cpp
+++ b/docs/snippets/ov_properties_api.cpp
@@ -19,7 +19,7 @@ auto model = core.read_model("sample.xml");
 //! [compile_model_with_property]
 auto compiled_model = core.compile_model(model, "CPU",
     ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-    ov::inference_precision(ov::element::f32));
+    ov::hint::inference_precision(ov::element::f32));
 //! [compile_model_with_property]
 }
 
diff --git a/docs/snippets/ov_properties_migration.cpp b/docs/snippets/ov_properties_migration.cpp
index 6ee3279395cc52..7be66b4a1d1266 100644
--- a/docs/snippets/ov_properties_migration.cpp
+++ b/docs/snippets/ov_properties_migration.cpp
@@ -25,7 +25,7 @@ auto model = core.read_model("sample.xml");
 auto compiled_model = core.compile_model(model, "MULTI",
     ov::device::priorities("GPU", "CPU"),
     ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-    ov::inference_precision(ov::element::f32));
+    ov::hint::inference_precision(ov::element::f32));
 //! [core_compile_model]
 
 //! [compiled_model_set_property]
diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp
index 50fe8e8dac1c28..1631861b18c123 100644
--- a/samples/cpp/benchmark_app/benchmark_app.hpp
+++ b/samples/cpp/benchmark_app/benchmark_app.hpp
@@ -327,7 +327,7 @@ DEFINE_string(nstreams, "", infer_num_streams_message);
 /// @brief Define flag for inference only mode <br>
 DEFINE_bool(inference_only, true, inference_only_message);
 
-/// @brief Define flag for inference precision
+/// @brief Define flag for inference precision hint
 DEFINE_string(infer_precision, "", inference_precision_message);
 
 /// @brief Specify precision for all input layers of the network
diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 99e268dc9dd0e6..107de484a1b462 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -481,17 +481,17 @@ int main(int argc, char* argv[]) {
                 auto it_device_infer_precision = device_infer_precision.find(device);
                 if (it_device_infer_precision != device_infer_precision.end()) {
                     // set to user defined value
-                    if (supported(ov::inference_precision.name())) {
-                        device_config.emplace(ov::inference_precision(it_device_infer_precision->second));
+                    if (supported(ov::hint::inference_precision.name())) {
+                        device_config.emplace(ov::hint::inference_precision(it_device_infer_precision->second));
                     } else if (is_virtual_device(device)) {
                         update_device_config_for_virtual_device(it_device_infer_precision->second,
                                                                 device_config,
-                                                                ov::inference_precision,
+                                                                ov::hint::inference_precision,
                                                                 is_dev_set_property,
                                                                 is_load_config);
                     } else {
                         throw std::logic_error("Device " + device + " doesn't support config key '" +
-                                               ov::inference_precision.name() + "'! " +
+                                               ov::hint::inference_precision.name() + "'! " +
                                                "Please specify -infer_precision for correct devices in format  "
                                                "<dev1>:<infer_precision1>,<dev2>:<infer_precision2>" +
                                                " or via configuration file.");
diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp
index 8c53f3d192430a..48940059dfebd4 100644
--- a/samples/cpp/benchmark_app/utils.cpp
+++ b/samples/cpp/benchmark_app/utils.cpp
@@ -200,7 +200,7 @@ void update_device_config_for_virtual_device(const std::string& value,
             const auto& device_value = it.second;
             if (device_config.find(ov::device::properties.name()) == device_config.end() ||
                 (is_load_config && is_dev_set_property[device_name])) {
-                // Create ov::device::properties with ov::num_stream/ov::inference_precision and
+                // Create ov::device::properties with ov::num_stream/ov::hint::inference_precision and
                 // 1. Insert this ov::device::properties into device config if this
                 // ov::device::properties isn't existed. Otherwise,
                 // 2. Replace the existed ov::device::properties within device config.
diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp
index 7553ee7bf221c7..3c17b8e34882c0 100644
--- a/samples/cpp/speech_sample/main.cpp
+++ b/samples/cpp/speech_sample/main.cpp
@@ -220,7 +220,7 @@ int main(int argc, char* argv[]) {
                 gnaPluginConfig[ov::intel_gna::scale_factors_per_input.name()] = scale_factors_per_input;
             }
         }
-        gnaPluginConfig[ov::inference_precision.name()] = (FLAGS_qb == 8) ? ov::element::i8 : ov::element::i16;
+        gnaPluginConfig[ov::hint::inference_precision.name()] = (FLAGS_qb == 8) ? ov::element::i8 : ov::element::i16;
         const std::unordered_map<std::string, ov::intel_gna::HWGeneration> StringHWGenerationMap{
             {"GNA_TARGET_1_0", ov::intel_gna::HWGeneration::GNA_1_0},
             {"GNA_TARGET_2_0", ov::intel_gna::HWGeneration::GNA_2_0},
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 3bc97410508a89..51c88082a8646a 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -39,7 +39,6 @@ void regmodule_properties(py::module m) {
     wrap_property_RO(m_properties, ov::optimal_batch_size, "optimal_batch_size");
     wrap_property_RO(m_properties, ov::max_batch_size, "max_batch_size");
     wrap_property_RO(m_properties, ov::range_for_async_infer_requests, "range_for_async_infer_requests");
-    wrap_property_RW(m_properties, ov::inference_precision, "inference_precision");
 
     // Submodule hint
     py::module m_hint =
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index f525ed23e1175c..1b00524b54915f 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -215,7 +215,6 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((properties.Affinity.NONE, properties.Affinity.NONE),),
         ),
         (properties.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True),)),
-        (properties.inference_precision, "INFERENCE_PRECISION_HINT", ((Type.f32, Type.f32),)),
         (properties.hint.inference_precision, "INFERENCE_PRECISION_HINT", ((Type.f32, Type.f32),)),
         (
             properties.hint.model_priority,
@@ -342,12 +341,12 @@ def check(value1, value2):
           {"CPU": {"NUM_STREAMS": 2}})
     check({"CPU": make_dict(properties.streams.num(2))},
           {"CPU": {"NUM_STREAMS": properties.streams.Num(2)}})
-    check({"GPU": make_dict(properties.inference_precision(Type.f32))},
+    check({"GPU": make_dict(properties.hint.inference_precision(Type.f32))},
           {"GPU": {"INFERENCE_PRECISION_HINT": Type.f32}})
-    check({"CPU": make_dict(properties.streams.num(2), properties.inference_precision(Type.f32))},
+    check({"CPU": make_dict(properties.streams.num(2), properties.hint.inference_precision(Type.f32))},
           {"CPU": {"INFERENCE_PRECISION_HINT": Type.f32, "NUM_STREAMS": properties.streams.Num(2)}})
-    check({"CPU": make_dict(properties.streams.num(2), properties.inference_precision(Type.f32)),
-           "GPU": make_dict(properties.streams.num(1), properties.inference_precision(Type.f16))},
+    check({"CPU": make_dict(properties.streams.num(2), properties.hint.inference_precision(Type.f32)),
+           "GPU": make_dict(properties.streams.num(1), properties.hint.inference_precision(Type.f16))},
           {"CPU": {"INFERENCE_PRECISION_HINT": Type.f32, "NUM_STREAMS": properties.streams.Num(2)},
            "GPU": {"INFERENCE_PRECISION_HINT": Type.f16, "NUM_STREAMS": properties.streams.Num(1)}})
 
@@ -420,7 +419,7 @@ def test_single_property_setting(device):
                 properties.cache_dir("./"),
                 properties.inference_num_threads(9),
                 properties.affinity(properties.Affinity.NONE),
-                properties.inference_precision(Type.f32),
+                properties.hint.inference_precision(Type.f32),
                 properties.hint.performance_mode(properties.hint.PerformanceMode.LATENCY),
                 properties.hint.scheduling_core_type(properties.hint.SchedulingCoreType.PCORE_ONLY),
                 properties.hint.use_hyper_threading(True),
@@ -434,7 +433,7 @@ def test_single_property_setting(device):
             properties.cache_dir(): "./",
             properties.inference_num_threads(): 9,
             properties.affinity(): properties.Affinity.NONE,
-            properties.inference_precision(): Type.f32,
+            properties.hint.inference_precision(): Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
             properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY,
             properties.hint.use_hyper_threading(): True,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index bff6714cbd549f..497e8a8a322f3c 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -233,22 +233,16 @@ static constexpr Property<std::string, PropertyMutability::RO> model_name{"NETWO
 static constexpr Property<uint32_t, PropertyMutability::RO> optimal_number_of_infer_requests{
     "OPTIMAL_NUMBER_OF_INFER_REQUESTS"};
 
-/**
- * @brief Hint for device to use specified precision for inference
- * @ingroup ov_runtime_cpp_prop_api
- */
-static constexpr Property<element::Type, PropertyMutability::RW> inference_precision{"INFERENCE_PRECISION_HINT"};
-
 /**
  * @brief Namespace with hint properties
  */
 namespace hint {
 
 /**
- * @brief An alias for inference_precision property for backward compatibility
+ * @brief Hint for device to use specified precision for inference
  * @ingroup ov_runtime_cpp_prop_api
  */
-using ov::inference_precision;
+static constexpr Property<element::Type, PropertyMutability::RW> inference_precision{"INFERENCE_PRECISION_HINT"};
 
 /**
  * @brief Enum to define possible priorities hints
@@ -271,7 +265,7 @@ inline std::ostream& operator<<(std::ostream& os, const Priority& priority) {
     case Priority::HIGH:
         return os << "HIGH";
     default:
-        OPENVINO_THROW("Unsupported performance measure hint");
+        OPENVINO_THROW("Unsupported model priority value");
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 8407639d8737e4..cc0f8d621fc887 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -176,7 +176,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             if (!device_id.empty()) {
                 IE_THROW() << "CPU plugin supports only '' as device id";
             }
-        } else if (key == ov::inference_precision.name()) {
+        } else if (key == ov::hint::inference_precision.name()) {
             if (val == "bf16") {
                 if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
                     enforceBF16 = true;
@@ -186,7 +186,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             } else if (val == "f32") {
                 enforceBF16 = false;
             } else {
-                IE_THROW() << "Wrong value for property key " << ov::inference_precision.name()
+                IE_THROW() << "Wrong value for property key " << ov::hint::inference_precision.name()
                     << ". Supported values: bf16, f32";
             }
         } else if (PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) {
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index 3617829e86bba4..ed9f4e01e790a7 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -309,7 +309,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::affinity.name()),
             RO_property(ov::inference_num_threads.name()),
             RO_property(ov::enable_profiling.name()),
-            RO_property(ov::inference_precision.name()),
+            RO_property(ov::hint::inference_precision.name()),
             RO_property(ov::hint::performance_mode.name()),
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
@@ -347,10 +347,10 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::enable_profiling.name()) {
         const bool perfCount = config.collectPerfCounters;
         return decltype(ov::enable_profiling)::value_type(perfCount);
-    } else if (name == ov::inference_precision) {
+    } else if (name == ov::hint::inference_precision) {
         const auto enforceBF16 = config.enforceBF16;
         const auto inference_precision = enforceBF16 ? ov::element::bf16 : ov::element::f32;
-        return decltype(ov::inference_precision)::value_type(inference_precision);
+        return decltype(ov::hint::inference_precision)::value_type(inference_precision);
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 1cb154e5c48975..9b0a3705bcc77d 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -577,10 +577,10 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::enable_profiling.name()) {
         const bool perfCount = engConfig.collectPerfCounters;
         return decltype(ov::enable_profiling)::value_type(perfCount);
-    } else if (name == ov::inference_precision) {
+    } else if (name == ov::hint::inference_precision) {
         const auto enforceBF16 = engConfig.enforceBF16;
         const auto inference_precision = enforceBF16 ? ov::element::bf16 : ov::element::f32;
-        return decltype(ov::inference_precision)::value_type(inference_precision);
+        return decltype(ov::hint::inference_precision)::value_type(inference_precision);
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
@@ -675,7 +675,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::affinity.name()),
                                                     RW_property(ov::inference_num_threads.name()),
                                                     RW_property(ov::enable_profiling.name()),
-                                                    RW_property(ov::inference_precision.name()),
+                                                    RW_property(ov::hint::inference_precision.name()),
                                                     RW_property(ov::hint::performance_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
                                                     RW_property(ov::hint::scheduling_core_type.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index b166cfd9bb7ade..c5c8eb3ea36b1b 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -279,13 +279,13 @@ TEST(OVClassBasicTest, smoke_SetConfigHintInferencePrecision) {
     auto value = ov::element::f32;
     const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
 
-    OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::inference_precision));
+    OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
     ASSERT_EQ(precision, value);
 
     const auto forcedPrecision = ov::element::f32;
 
-    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::inference_precision(forcedPrecision)));
-    OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::inference_precision));
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::inference_precision(forcedPrecision)));
+    OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
     ASSERT_EQ(value, forcedPrecision);
 
     OPENVINO_SUPPRESS_DEPRECATED_START
diff --git a/src/plugins/intel_gna/src/gna_plugin_config.cpp b/src/plugins/intel_gna/src/gna_plugin_config.cpp
index bcd003334e22bc..9df427ec867c0a 100644
--- a/src/plugins/intel_gna/src/gna_plugin_config.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin_config.cpp
@@ -172,7 +172,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
             }
         } else if (key == ov::hint::performance_mode) {
             performance_mode = ov::util::from_string(value, ov::hint::performance_mode);
-        } else if (key == ov::inference_precision) {
+        } else if (key == ov::hint::inference_precision) {
             inference_precision = ov::util::from_string<ov::element::Type>(value);
             if ((inference_precision != ov::element::i8) && (inference_precision != ov::element::i16)) {
                 THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be I16 or I8, but was: " << value;
@@ -187,7 +187,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
                                     << value;
             }
             // Update gnaPrecision basing on execution_mode only if inference_precision is not set
-            if (config.count(ov::inference_precision.name()) == 0) {
+            if (config.count(ov::hint::inference_precision.name()) == 0) {
                 gnaPrecision = execution_mode == ov::hint::ExecutionMode::PERFORMANCE ? InferenceEngine::Precision::I8
                                                                                       : InferenceEngine::Precision::I16;
             }
@@ -320,7 +320,7 @@ void Config::AdjustKeyMapValues() {
         gnaFlags.exclusive_async_requests ? PluginConfigParams::YES : PluginConfigParams::NO;
     keyConfigMap[ov::hint::performance_mode.name()] = ov::util::to_string(performance_mode);
     if (inference_precision != ov::element::undefined) {
-        keyConfigMap[ov::inference_precision.name()] = ov::util::to_string(inference_precision);
+        keyConfigMap[ov::hint::inference_precision.name()] = ov::util::to_string(inference_precision);
     } else {
         keyConfigMap[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
     }
@@ -355,7 +355,7 @@ Parameter Config::GetParameter(const std::string& name) const {
         return DeviceToHwGeneration(target->get_user_set_compile_target());
     } else if (name == ov::hint::performance_mode) {
         return performance_mode;
-    } else if (name == ov::inference_precision) {
+    } else if (name == ov::hint::inference_precision) {
         return inference_precision;
     } else {
         auto result = keyConfigMap.find(name);
@@ -375,7 +375,7 @@ const Parameter Config::GetImpactingModelCompilationProperties(bool compiled) {
         {ov::intel_gna::compile_target.name(), model_mutability},
         {ov::intel_gna::pwl_design_algorithm.name(), model_mutability},
         {ov::intel_gna::pwl_max_error_percent.name(), model_mutability},
-        {ov::inference_precision.name(), model_mutability},
+        {ov::hint::inference_precision.name(), model_mutability},
         {ov::hint::execution_mode.name(), model_mutability},
         {ov::hint::num_requests.name(), model_mutability},
     };
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
index f57484850d3ecc..72b439430203b7 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
@@ -193,7 +193,7 @@ INSTANTIATE_TEST_SUITE_P(
     ::testing::Combine(
         ::testing::Values("GNA"),
         ::testing::Values(ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                          ov::inference_precision(ngraph::element::i8),
+                          ov::hint::inference_precision(ngraph::element::i8),
                           ov::hint::num_requests(2),
                           ov::intel_gna::pwl_design_algorithm(ov::intel_gna::PWLDesignAlgorithm::UNIFORM_DISTRIBUTION),
                           ov::intel_gna::pwl_max_error_percent(0.2),
@@ -221,8 +221,8 @@ INSTANTIATE_TEST_SUITE_P(
                           ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_FP32),
                           ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::AUTO),
                           ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"input", 1.0f}}),
-                          ov::inference_precision(ov::element::i8),
-                          ov::inference_precision(ov::element::i16),
+                          ov::hint::inference_precision(ov::element::i8),
+                          ov::hint::inference_precision(ov::element::i16),
                           ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
                           ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
                           ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED),
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 1aa99fe9df9817..2e644951b566c6 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -116,11 +116,11 @@ TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedPrecisionHint) {
     ov::Core core;
     ov::element::Type precision;
 
-    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
+    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     ASSERT_EQ(ov::element::undefined, precision);
 
-    OV_ASSERT_NO_THROW(core.set_property("GNA", ov::inference_precision(ov::element::i8)));
-    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
+    OV_ASSERT_NO_THROW(core.set_property("GNA", ov::hint::inference_precision(ov::element::i8)));
+    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     ASSERT_EQ(ov::element::i8, precision);
 
     OPENVINO_SUPPRESS_DEPRECATED_START
@@ -128,23 +128,23 @@ TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedPrecisionHint) {
     OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     OPENVINO_SUPPRESS_DEPRECATED_END
 
-    OV_ASSERT_NO_THROW(core.set_property("GNA", ov::inference_precision(ov::element::i16)));
-    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
+    OV_ASSERT_NO_THROW(core.set_property("GNA", ov::hint::inference_precision(ov::element::i16)));
+    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     ASSERT_EQ(ov::element::i16, precision);
 
-    OV_ASSERT_NO_THROW(core.set_property("GNA", {{ov::inference_precision.name(), "I8"}}));
-    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
+    OV_ASSERT_NO_THROW(core.set_property("GNA", {{ov::hint::inference_precision.name(), "I8"}}));
+    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     ASSERT_EQ(ov::element::i8, precision);
 
-    OV_ASSERT_NO_THROW(core.set_property("GNA", {{ov::inference_precision.name(), "I16"}}));
-    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::inference_precision));
+    OV_ASSERT_NO_THROW(core.set_property("GNA", {{ov::hint::inference_precision.name(), "I16"}}));
+    OV_ASSERT_NO_THROW(precision = core.get_property("GNA", ov::hint::inference_precision));
     ASSERT_EQ(ov::element::i16, precision);
 
     OV_ASSERT_NO_THROW(
-        core.set_property("GNA", {ov::inference_precision(ov::element::i8), {GNA_CONFIG_KEY(PRECISION), "I16"}}));
-    ASSERT_THROW(core.set_property("GNA", ov::inference_precision(ov::element::i32)), ov::Exception);
-    ASSERT_THROW(core.set_property("GNA", ov::inference_precision(ov::element::undefined)), ov::Exception);
-    ASSERT_THROW(core.set_property("GNA", {{ov::inference_precision.name(), "ABC"}}), ov::Exception);
+        core.set_property("GNA", {ov::hint::inference_precision(ov::element::i8), {GNA_CONFIG_KEY(PRECISION), "I16"}}));
+    ASSERT_THROW(core.set_property("GNA", ov::hint::inference_precision(ov::element::i32)), ov::Exception);
+    ASSERT_THROW(core.set_property("GNA", ov::hint::inference_precision(ov::element::undefined)), ov::Exception);
+    ASSERT_THROW(core.set_property("GNA", {{ov::hint::inference_precision.name(), "ABC"}}), ov::Exception);
 }
 
 TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedPerformanceHint) {
diff --git a/src/plugins/intel_gna/tests/unit/gna_export_import_test.cpp b/src/plugins/intel_gna/tests/unit/gna_export_import_test.cpp
index 0856912be3eab6..707c60a05910aa 100644
--- a/src/plugins/intel_gna/tests/unit/gna_export_import_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_export_import_test.cpp
@@ -169,7 +169,7 @@ class GNAExportImportTest : public ::testing::Test {
 
 TEST_F(GNAExportImportTest, ExportImportI16) {
     const ov::AnyMap gna_config = {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
-                                   ov::inference_precision(ngraph::element::i16)};
+                                   ov::hint::inference_precision(ngraph::element::i16)};
     exported_file_name = "export_test.bin";
     ExportModel(exported_file_name, gna_config);
     ImportModel(exported_file_name, gna_config);
@@ -177,7 +177,7 @@ TEST_F(GNAExportImportTest, ExportImportI16) {
 
 TEST_F(GNAExportImportTest, ExportImportI8) {
     const ov::AnyMap gna_config = {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
-                                   ov::inference_precision(ngraph::element::i8)};
+                                   ov::hint::inference_precision(ngraph::element::i8)};
     exported_file_name = "export_test.bin";
     ExportModel(exported_file_name, gna_config);
     ImportModel(exported_file_name, gna_config);
diff --git a/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp b/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp
index ce771167da4a47..cd7d27997b4fdf 100644
--- a/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp
@@ -85,13 +85,13 @@ TEST_F(GNAHwPrecisionTest, GNAHwPrecisionTestDefault) {
 
 TEST_F(GNAHwPrecisionTest, GNAHwPrecisionTestI16) {
     Run({ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
-         ov::inference_precision(ngraph::element::i16)});
+         ov::hint::inference_precision(ngraph::element::i16)});
     compare(ngraph::element::i16, ngraph::element::i32, sizeof(int16_t), sizeof(uint32_t));
 }
 
 TEST_F(GNAHwPrecisionTest, GNAHwPrecisionTestI8) {
     Run({ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
-         ov::inference_precision(ngraph::element::i8)});
+         ov::hint::inference_precision(ngraph::element::i8)});
     compare(ngraph::element::i16,
             ngraph::element::i32,
             sizeof(int8_t),
@@ -100,7 +100,7 @@ TEST_F(GNAHwPrecisionTest, GNAHwPrecisionTestI8) {
 
 TEST_F(GNAHwPrecisionTest, GNAHwPrecisionTestI8LP) {
     Run({ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
-         ov::inference_precision(ngraph::element::i8)},
+         ov::hint::inference_precision(ngraph::element::i8)},
         true);
     compare(ngraph::element::i8, ngraph::element::i32, sizeof(int8_t), sizeof(int8_t));
 }
diff --git a/src/plugins/intel_gna/tests/unit/gna_input_preproc_test.cpp b/src/plugins/intel_gna/tests/unit/gna_input_preproc_test.cpp
index a06b7b12ff6f92..e9b0edb4637327 100644
--- a/src/plugins/intel_gna/tests/unit/gna_input_preproc_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_input_preproc_test.cpp
@@ -122,13 +122,13 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 8.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 0.125f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                        }),
                        ::testing::Values(true),   // gna device
                        ::testing::Values(false),  // use low precision
@@ -148,13 +148,13 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 4.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 0.25f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                        }),
                        ::testing::Values(true),  // gna device
                        ::testing::Values(true),  // use low precision
@@ -200,13 +200,13 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 4.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 0.25f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                        }),
                        ::testing::Values(true),   // gna device
                        ::testing::Values(false),  // use low precision
@@ -227,13 +227,13 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map,
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 10.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 20.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                        }),
                        ::testing::Values(true),  // gna device
                        ::testing::Values(true),  // use low precision
@@ -254,10 +254,10 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 8.0f}}),
-                            ov::inference_precision(ngraph::element::i16)},
+                            ov::hint::inference_precision(ngraph::element::i16)},
                        }),
                        ::testing::Values(true),   // gna device
                        ::testing::Values(false),  // use low precision
@@ -278,10 +278,10 @@ INSTANTIATE_TEST_SUITE_P(
                            // gna config map
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 1.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                            {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT),
                             ov::intel_gna::scale_factors_per_input(std::map<std::string, float>{{"0", 4.0f}}),
-                            ov::inference_precision(ngraph::element::i8)},
+                            ov::hint::inference_precision(ngraph::element::i8)},
                        }),
                        ::testing::Values(true),  // gna device
                        ::testing::Values(true),  // use low precision
diff --git a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
index c3a9aeaf7d8ce8..62a1cba9f2271a 100644
--- a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
@@ -247,9 +247,9 @@ TEST_F(GNAPluginConfigTest, GnaConfigExecutionModeUpdatesGnaPrecision) {
 }
 
 TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionUpdatesGnaPrecision) {
-    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
+    SetAndCompare(ov::hint::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
     EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I8);
-    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
+    SetAndCompare(ov::hint::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
     EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I16);
 }
 
@@ -257,7 +257,7 @@ TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionHasHigherPriorityI16) {
     SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name());
     SetAndCompare(ov::hint::execution_mode.name(),
                   ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::PERFORMANCE));
-    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
+    SetAndCompare(ov::hint::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i16));
     EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I16);
 }
 
@@ -265,6 +265,6 @@ TEST_F(GNAPluginConfigTest, GnaConfigInferencePrecisionHasHigherPriorityI8) {
     SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name());
     SetAndCompare(ov::hint::execution_mode.name(),
                   ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::ACCURACY));
-    SetAndCompare(ov::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
+    SetAndCompare(ov::hint::inference_precision.name(), ov::util::to_string<ov::element::Type>(ov::element::i8));
     EXPECT_EQ(config.gnaPrecision, InferenceEngine::Precision::I8);
 }
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
index a45891801ea695..5d2e7e22749e57 100644
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -325,7 +325,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
             ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RO},
             ov::PropertyName{ov::num_streams.name(), PropertyMutability::RO},
             ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
-            ov::PropertyName{ov::inference_precision.name(), PropertyMutability::RO},
+            ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
             ov::PropertyName{ov::device::id.name(), PropertyMutability::RO},
             ov::PropertyName{ov::execution_devices.name(), PropertyMutability::RO}
         };
diff --git a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
index 56c92bc84bc457..4945e76716b717 100644
--- a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
+++ b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
@@ -14,7 +14,7 @@ bool LegacyAPIHelper::is_new_api_property(const std::pair<std::string, ov::Any>&
     static const std::vector<std::string> new_properties_list = {
         ov::intel_gpu::hint::queue_priority.name(),
         ov::intel_gpu::hint::queue_throttle.name(),
-        ov::inference_precision.name(),
+        ov::hint::inference_precision.name(),
         ov::compilation_num_threads.name(),
         ov::num_streams.name(),
     };
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index ddf75aefaafed5..d652fa9f354cf6 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -671,7 +671,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         cachingProperties.push_back(ov::PropertyName(ov::device::architecture.name(), PropertyMutability::RO));
         cachingProperties.push_back(ov::PropertyName(ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO));
         cachingProperties.push_back(ov::PropertyName(ov::intel_gpu::driver_version.name(), PropertyMutability::RO));
-        cachingProperties.push_back(ov::PropertyName(ov::inference_precision.name(), PropertyMutability::RW));
+        cachingProperties.push_back(ov::PropertyName(ov::hint::inference_precision.name(), PropertyMutability::RW));
         cachingProperties.push_back(ov::PropertyName(ov::hint::execution_mode.name(), PropertyMutability::RW));
         return decltype(ov::caching_properties)::value_type(cachingProperties);
     } else if (name == ov::intel_gpu::driver_version) {
@@ -730,7 +730,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RW},
         ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
-        ov::PropertyName{ov::inference_precision.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
         ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
     };
 
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index d73dfda4ec93b3..7abc6e759b56c6 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -40,7 +40,7 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::cache_dir, ""),
         std::make_tuple(ov::num_streams, 1),
         std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency()))),
-        std::make_tuple(ov::inference_precision, ov::element::f16, InferencePrecisionValidator()),
+        std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()),
         std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM),
         std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()),
         std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
@@ -123,14 +123,14 @@ Any ExecutionConfig::get_property(const std::string& name) const {
 void ExecutionConfig::apply_execution_hints(const cldnn::device_info& info) {
     if (is_set_by_user(ov::hint::execution_mode)) {
         const auto mode = get_property(ov::hint::execution_mode);
-        if (!is_set_by_user(ov::inference_precision)) {
+        if (!is_set_by_user(ov::hint::inference_precision)) {
             if (mode == ov::hint::ExecutionMode::ACCURACY) {
-                set_property(ov::inference_precision(ov::element::f32));
+                set_property(ov::hint::inference_precision(ov::element::f32));
             } else if (mode == ov::hint::ExecutionMode::PERFORMANCE) {
                 if (info.supports_fp16)
-                    set_property(ov::inference_precision(ov::element::f16));
+                    set_property(ov::hint::inference_precision(ov::element::f16));
                 else
-                    set_property(ov::inference_precision(ov::element::f32));
+                    set_property(ov::hint::inference_precision(ov::element::f32));
             }
         }
     }
diff --git a/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp b/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp
index 215ea840516b74..2032b71cebeed4 100644
--- a/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp
+++ b/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp
@@ -39,7 +39,7 @@ TEST_P(InferencePrecisionTests, smoke_canSetInferencePrecisionAndInfer) {
     std::tie(model_precision, inference_precision) = GetParam();
     auto function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(CommonTestUtils::DEVICE_GPU, {1, 1, 32, 32}, model_precision);
     ov::CompiledModel compiled_model;
-    OV_ASSERT_NO_THROW(compiled_model = core->compile_model(function, CommonTestUtils::DEVICE_GPU, ov::inference_precision(inference_precision)));
+    OV_ASSERT_NO_THROW(compiled_model = core->compile_model(function, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(inference_precision)));
     auto req = compiled_model.create_infer_request();
     OV_ASSERT_NO_THROW(req.infer());
 }
@@ -67,7 +67,7 @@ TEST(ExecutionModeTest, SetCompileGetInferPrecisionAndExecMode) {
     core.set_property(CommonTestUtils::DEVICE_GPU, ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE));
     auto model = ngraph::builder::subgraph::makeConvPoolRelu();
     {
-        auto compiled_model = core.compile_model(model, CommonTestUtils::DEVICE_GPU, ov::inference_precision(ov::element::f32));
+        auto compiled_model = core.compile_model(model, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
         ASSERT_EQ(ov::hint::ExecutionMode::PERFORMANCE, compiled_model.get_property(ov::hint::execution_mode));
         ASSERT_EQ(ov::element::f32, compiled_model.get_property(ov::hint::inference_precision));
     }
diff --git a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
index eda756c53eb513..846f5b177310ec 100644
--- a/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
+++ b/src/tests/functional/plugin/gpu/concurrency/gpu_concurrency_tests.cpp
@@ -55,7 +55,7 @@ TEST_P(OVConcurrencyTest, canInferTwoExecNets) {
         auto fn = fn_ptrs[i];
 
         auto exec_net = ie.compile_model(fn_ptrs[i], CommonTestUtils::DEVICE_GPU,
-                                         ov::num_streams(num_streams), ov::inference_precision(ov::element::f32));
+                                         ov::num_streams(num_streams), ov::hint::inference_precision(ov::element::f32));
 
         auto input = fn_ptrs[i]->get_parameters().at(0);
         auto output = fn_ptrs[i]->get_results().at(0);
@@ -115,7 +115,7 @@ TEST(canSwapTensorsBetweenInferRequests, inputs) {
     auto fn = ngraph::builder::subgraph::makeSplitMultiConvConcat();
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::inference_precision(ov::element::f32));
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     const int infer_requests_num = 2;
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
@@ -193,7 +193,7 @@ TEST(smoke_InferRequestDeviceMemoryAllocation, usmHostIsNotChanged) {
     auto fn = ngraph::builder::subgraph::makeDetectionOutput(ngraph::element::Type_t::f32);
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::inference_precision(ov::element::f32));
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
     ov::InferRequest infer_request2 = compiled_model.create_infer_request();
@@ -232,7 +232,7 @@ TEST(smoke_InferRequestDeviceMemoryAllocation, canSetSystemHostTensor) {
     auto fn = ngraph::builder::subgraph::makeDetectionOutput(ngraph::element::Type_t::f32);
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::inference_precision(ov::element::f32));
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
     ov::InferRequest infer_request2 = compiled_model.create_infer_request();
@@ -258,7 +258,7 @@ TEST(canSwapTensorsBetweenInferRequests, outputs) {
     auto fn = ngraph::builder::subgraph::makeSplitMultiConvConcat();
 
     auto ie = ov::Core();
-    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::inference_precision(ov::element::f32));
+    auto compiled_model = ie.compile_model(fn, CommonTestUtils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
 
     const int infer_requests_num = 2;
     ov::InferRequest infer_request1 = compiled_model.create_infer_request();
diff --git a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
index 96904a9eead270..ecf8575d4fbf2a 100644
--- a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -40,7 +40,7 @@ class RemoteBlob_Test : public CommonTestUtils::TestsCommon, public testing::Wit
                             {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "0"},
                             };
             }
-        config.insert({ov::inference_precision.name(), "f32"});
+        config.insert({ov::hint::inference_precision.name(), "f32"});
         fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
     }
     static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
@@ -230,7 +230,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -277,7 +277,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -305,7 +305,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -375,7 +375,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -404,7 +404,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -469,7 +469,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_infer_call_many_times) {
     auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
 
     auto ie = PluginCache::get().ie();
-    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_regular = ie->LoadNetwork(net, deviceName, {{ov::hint::inference_precision.name(), "f32"}});
 
     // regular inference
     auto inf_req_regular = exec_net_regular.CreateInferRequest();
@@ -498,7 +498,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_infer_call_many_times) {
     // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
     // without calling thread blocks
     auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context, {{ov::hint::inference_precision.name(), "f32"}});
     auto inf_req_shared = exec_net_shared.CreateInferRequest();
 
     // Allocate shared buffers for input and output data which will be set to infer request
@@ -601,7 +601,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
 
     /* XXX: is it correct to set KEY_CLDNN_NV12_TWO_INPUTS in case of remote blob? */
     auto exec_net_b = ie.LoadNetwork(net_remote, CommonTestUtils::DEVICE_GPU,
-                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES}, {ov::inference_precision.name(), "f32"} });
+                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES}, {ov::hint::inference_precision.name(), "f32"} });
     auto inf_req_remote = exec_net_b.CreateInferRequest();
     auto cldnn_context = exec_net_b.GetContext();
     cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
@@ -670,7 +670,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
     net_local.getInputsInfo().begin()->second->setPrecision(Precision::U8);
     net_local.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
 
-    auto exec_net_b1 = ie.LoadNetwork(net_local, CommonTestUtils::DEVICE_GPU, {{ov::inference_precision.name(), "f32"}});
+    auto exec_net_b1 = ie.LoadNetwork(net_local, CommonTestUtils::DEVICE_GPU, {{ov::hint::inference_precision.name(), "f32"}});
 
     auto inf_req_local = exec_net_b1.CreateInferRequest();
 
@@ -742,7 +742,7 @@ TEST_P(TwoNets_Test, canInferTwoExecNets) {
 
         auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU,
                                        {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)},
-                                        {ov::inference_precision.name(), "f32"}});
+                                        {ov::hint::inference_precision.name(), "f32"}});
 
         for (int j = 0; j < num_streams * num_requests; j++) {
             outputs.push_back(net.getOutputsInfo().begin()->first);
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 7c223fc92b8e98..c70581d99847ba 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -350,13 +350,13 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetInferencePrecisionNoThrow) {
     auto value = ov::element::undefined;
     const auto expected_default_precision = ov::element::f16;
 
-    OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::inference_precision));
+    OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::hint::inference_precision));
     ASSERT_EQ(expected_default_precision, value);
 
     const auto forced_precision = ov::element::f32;
 
-    OV_ASSERT_NO_THROW(ie.set_property(target_device, ov::inference_precision(forced_precision)));
-    OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::inference_precision));
+    OV_ASSERT_NO_THROW(ie.set_property(target_device, ov::hint::inference_precision(forced_precision)));
+    OV_ASSERT_NO_THROW(value = ie.get_property(target_device, ov::hint::inference_precision));
     ASSERT_EQ(value, forced_precision);
 
     OPENVINO_SUPPRESS_DEPRECATED_START
@@ -728,7 +728,7 @@ auto gpuCorrectConfigsWithSecondaryProperties = []() {
     return std::vector<ov::AnyMap>{
         {ov::device::properties(CommonTestUtils::DEVICE_GPU,
                                 ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE),
-                                ov::inference_precision(ov::element::f32))},
+                                ov::hint::inference_precision(ov::element::f32))},
         {ov::device::properties(CommonTestUtils::DEVICE_GPU,
                                 ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
                                 ov::hint::allow_auto_batching(false))},
@@ -821,7 +821,7 @@ TEST_P(OVClassGetMetricTest_CACHING_PROPERTIES, GetMetricAndPrintNoThrow) {
         ov::device::architecture.name(),
         ov::intel_gpu::execution_units_count.name(),
         ov::intel_gpu::driver_version.name(),
-        ov::inference_precision.name(),
+        ov::hint::inference_precision.name(),
         ov::hint::execution_mode.name(),
     };
 
diff --git a/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp b/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
index 5de247f6f0d529..5eb9fb1b402ef8 100644
--- a/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
+++ b/src/tests/functional/plugin/shared/src/execution_graph_tests/normalize_l2_decomposition.cpp
@@ -36,7 +36,7 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeAppliedForNonContiguousAxes
       auto core = ov::Core();
       ov::AnyMap config;
       if (device_name == CommonTestUtils::DEVICE_GPU)
-        config.insert(ov::inference_precision(ov::element::f32));
+        config.insert(ov::hint::inference_precision(ov::element::f32));
       const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() < compiled_model.get_runtime_model()->get_ops().size()); // decomposition applied
@@ -56,7 +56,7 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeAppliedForNormalizeOverAllA
       auto core = ov::Core();
       ov::AnyMap config;
       if (device_name == CommonTestUtils::DEVICE_GPU)
-        config.insert(ov::inference_precision(ov::element::f32));
+        config.insert(ov::hint::inference_precision(ov::element::f32));
       const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() < compiled_model.get_runtime_model()->get_ops().size()); // decomposition applied
@@ -76,7 +76,7 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeNotAppliedForNotSorted) {
       auto core = ov::Core();
       ov::AnyMap config;
       if (device_name == CommonTestUtils::DEVICE_GPU)
-        config.insert(ov::inference_precision(ov::element::f32));
+        config.insert(ov::hint::inference_precision(ov::element::f32));
       const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() >= compiled_model.get_runtime_model()->get_ops().size()); // decomposition not applied
@@ -96,7 +96,7 @@ TEST_P(ExecGrapDecomposeNormalizeL2, CheckIfDecomposeNotAppliedForSingleAxis) {
       auto core = ov::Core();
       ov::AnyMap config;
       if (device_name == CommonTestUtils::DEVICE_GPU)
-        config.insert(ov::inference_precision(ov::element::f32));
+        config.insert(ov::hint::inference_precision(ov::element::f32));
       const auto compiled_model = core.compile_model(model, device_name, config);
 
       ASSERT_TRUE(model->get_ops().size() >= compiled_model.get_runtime_model()->get_ops().size()); // decomposition not applied
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
index ee6c57ca694222..f4d36beefa5a78 100644
--- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -226,7 +226,7 @@ void SubgraphBaseTest::compile_model() {
                 break;
             }
         }
-        configuration.insert({ov::inference_precision.name(), hint});
+        configuration.insert({ov::hint::inference_precision.name(), hint});
     }
 
     compiledModel = core->compile_model(function, targetDevice, configuration);
diff --git a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp
index 3ea4432c33a9be..30560a943cfe31 100644
--- a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp
@@ -54,7 +54,7 @@ void SnippetsTestsCommon::validateOriginalLayersNamesByType(const std::string& l
     ASSERT_TRUE(false) << "Layer type '" << layerType << "' was not found in compiled model";
 }
 void SnippetsTestsCommon::setInferenceType(ov::element::Type type) {
-    configuration.emplace(ov::inference_precision(type));
+    configuration.emplace(ov::hint::inference_precision(type));
 }
 
 }  // namespace test

From 2a01695370c78dd80bd3c00bfa0860d1d7de41ed Mon Sep 17 00:00:00 2001
From: Artyom Anokhov <artyom.anokhov@intel.com>
Date: Wed, 29 Mar 2023 18:34:26 +0100
Subject: [PATCH 166/296] Deployment Manager: updated configs with 2023.0.0
 layout and versions (#16633)

---
 tools/deployment_manager/configs/darwin.json  | 31 +++++++++--------
 tools/deployment_manager/configs/linux.json   | 33 +++++++++++--------
 tools/deployment_manager/configs/windows.json |  3 +-
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/tools/deployment_manager/configs/darwin.json b/tools/deployment_manager/configs/darwin.json
index 1ff7c676b3e716..cd2bab675681b7 100644
--- a/tools/deployment_manager/configs/darwin.json
+++ b/tools/deployment_manager/configs/darwin.json
@@ -16,28 +16,33 @@
     "ie_core": {
       "group": ["ie"],
       "files": [
-        "runtime/lib/intel64/Release/libopenvino.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino.2230.dylib",
+        "runtime/lib/intel64/Release/libopenvino.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino.2300.dylib",
         "runtime/lib/intel64/Release/libopenvino.dylib",
         "runtime/lib/intel64/Release/libopenvino_gapi_preproc.so",
-        "runtime/lib/intel64/Release/libopenvino_c.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino_c.2230.dylib",
+        "runtime/lib/intel64/Release/libopenvino_c.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_c.2300.dylib",
         "runtime/lib/intel64/Release/libopenvino_c.dylib",
         "runtime/lib/intel64/Release/libopenvino_hetero_plugin.so",
         "runtime/lib/intel64/Release/libopenvino_auto_plugin.so",
         "runtime/lib/intel64/Release/libopenvino_auto_batch_plugin.so",
-        "runtime/lib/intel64/Release/libopenvino_ir_frontend.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino_ir_frontend.2230.dylib",
-        "runtime/lib/intel64/Release/libopenvino_onnx_frontend.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino_onnx_frontend.2230.dylib",
+        "runtime/lib/intel64/Release/libopenvino_ir_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_ir_frontend.2300.dylib",
+        "runtime/lib/intel64/Release/libopenvino_onnx_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_onnx_frontend.2300.dylib",
         "runtime/lib/intel64/Release/libopenvino_onnx_frontend.dylib",
-        "runtime/lib/intel64/Release/libopenvino_paddle_frontend.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino_paddle_frontend.2230.dylib",
+        "runtime/lib/intel64/Release/libopenvino_paddle_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_paddle_frontend.2300.dylib",
         "runtime/lib/intel64/Release/libopenvino_paddle_frontend.dylib",
-        "runtime/lib/intel64/Release/libopenvino_tensorflow_frontend.2022.3.0.dylib",
-        "runtime/lib/intel64/Release/libopenvino_tensorflow_frontend.2230.dylib",
+        "runtime/lib/intel64/Release/libopenvino_pytorch_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_pytorch_frontend.2300.dylib",
+        "runtime/lib/intel64/Release/libopenvino_pytorch_frontend.dylib",
+        "runtime/lib/intel64/Release/libopenvino_tensorflow_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_tensorflow_frontend.2300.dylib",
         "runtime/lib/intel64/Release/libopenvino_tensorflow_frontend.dylib",
-        "runtime/lib/intel64/Release/plugins.xml",
+        "runtime/lib/intel64/Release/libopenvino_tensorflow_lite_frontend.2023.0.0.dylib",
+        "runtime/lib/intel64/Release/libopenvino_tensorflow_lite_frontend.2300.dylib",
+        "runtime/lib/intel64/Release/libopenvino_tensorflow_lite_frontend.dylib",
         "runtime/3rdparty/tbb"
       ]
     },
diff --git a/tools/deployment_manager/configs/linux.json b/tools/deployment_manager/configs/linux.json
index a0f601c0c0c391..6e3d1987c43f5a 100644
--- a/tools/deployment_manager/configs/linux.json
+++ b/tools/deployment_manager/configs/linux.json
@@ -22,28 +22,33 @@
     "ie_core": {
       "group": ["ie"],
       "files": [
-        "runtime/lib/intel64/libopenvino.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino.so.2230",
+        "runtime/lib/intel64/libopenvino.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino.so.2300",
         "runtime/lib/intel64/libopenvino.so",
         "runtime/lib/intel64/libopenvino_gapi_preproc.so",
-        "runtime/lib/intel64/libopenvino_c.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino_c.so.2230",
+        "runtime/lib/intel64/libopenvino_c.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_c.so.2300",
         "runtime/lib/intel64/libopenvino_c.so",
         "runtime/lib/intel64/libopenvino_hetero_plugin.so",
         "runtime/lib/intel64/libopenvino_auto_plugin.so",
         "runtime/lib/intel64/libopenvino_auto_batch_plugin.so",
-        "runtime/lib/intel64/libopenvino_ir_frontend.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino_ir_frontend.so.2230",
-        "runtime/lib/intel64/libopenvino_onnx_frontend.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino_onnx_frontend.so.2230",
+        "runtime/lib/intel64/libopenvino_ir_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_ir_frontend.so.2300",
+        "runtime/lib/intel64/libopenvino_onnx_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_onnx_frontend.so.2300",
         "runtime/lib/intel64/libopenvino_onnx_frontend.so",
-        "runtime/lib/intel64/libopenvino_paddle_frontend.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino_paddle_frontend.so.2230",
+        "runtime/lib/intel64/libopenvino_paddle_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_paddle_frontend.so.2300",
         "runtime/lib/intel64/libopenvino_paddle_frontend.so",
-        "runtime/lib/intel64/libopenvino_tensorflow_frontend.so.2022.3.0",
-        "runtime/lib/intel64/libopenvino_tensorflow_frontend.so.2230",
+        "runtime/lib/intel64/libopenvino_pytorch_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_pytorch_frontend.so.2300",
+        "runtime/lib/intel64/libopenvino_pytorch_frontend.so",
+        "runtime/lib/intel64/libopenvino_tensorflow_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_tensorflow_frontend.so.2300",
         "runtime/lib/intel64/libopenvino_tensorflow_frontend.so",
-        "runtime/lib/intel64/plugins.xml"
+        "runtime/lib/intel64/libopenvino_tensorflow_lite_frontend.so.2023.0.0",
+        "runtime/lib/intel64/libopenvino_tensorflow_lite_frontend.so.2300",
+        "runtime/lib/intel64/libopenvino_tensorflow_lite_frontend.so"
       ]
     },
     "cpu": {
@@ -69,7 +74,7 @@
       "dependencies" : ["ie_core"],
       "files": [
         "runtime/lib/intel64/libgna.so.3",
-        "runtime/lib/intel64/libgna.so.3.0.0.1910",
+        "runtime/lib/intel64/libgna.so.3.5.0.1906",
         "runtime/lib/intel64/libopenvino_intel_gna_plugin.so"
       ]
     },
diff --git a/tools/deployment_manager/configs/windows.json b/tools/deployment_manager/configs/windows.json
index aabe3d8daeb2b2..4589d184e4bdf9 100644
--- a/tools/deployment_manager/configs/windows.json
+++ b/tools/deployment_manager/configs/windows.json
@@ -25,8 +25,9 @@
         "runtime/bin/intel64/Release/openvino_ir_frontend.dll",
         "runtime/bin/intel64/Release/openvino_onnx_frontend.dll",
         "runtime/bin/intel64/Release/openvino_paddle_frontend.dll",
+        "runtime/bin/intel64/Release/openvino_pytorch_frontend.dll",
         "runtime/bin/intel64/Release/openvino_tensorflow_frontend.dll",
-        "runtime/bin/intel64/Release/plugins.xml",
+        "runtime/bin/intel64/Release/openvino_tensorflow_lite_frontend.dll",
         "runtime/3rdparty/tbb"
       ]
     },

From b0e6b1e83ca774f5cc5f7c5189476b0f80fe3c8b Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Wed, 29 Mar 2023 20:34:23 +0200
Subject: [PATCH 167/296] [TF FE] NgramCompilation test fix (#16636)

* [TF FE] NgramCompilation test fixed

---------

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/tests/compilation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/frontends/tensorflow/tests/compilation.cpp b/src/frontends/tensorflow/tests/compilation.cpp
index 85de31971e8274..6fc06b7661fc93 100644
--- a/src/frontends/tensorflow/tests/compilation.cpp
+++ b/src/frontends/tensorflow/tests/compilation.cpp
@@ -34,7 +34,7 @@ class CompileModelsTests : public ::testing::Test {};
 
 TEST_F(CompileModelsTests, NgramCompilation) {
     ov::Core core;
-    auto model = convert_model("model_ngram/model_ngram.pb");
+    auto model = convert_model("model_ngram/model_ngram.pbtxt");
     ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
     const auto runtime_model = compiled_model.get_runtime_model();
 

From 712d1b99d18cf5b8f89178731ee628233b7bbbb5 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 30 Mar 2023 08:23:55 +0200
Subject: [PATCH 168/296] DOCS shift to rst - Post-training Quantization with
 NNCF (#16631)

---
 .../nncf/ptq/basic_quantization_flow.md       |  2 +-
 .../nncf/ptq/ptq_introduction.md              | 20 +++++++++++--------
 .../ptq/quantization_w_accuracy_control.md    |  8 ++++----
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md
index 2f315c04705fbd..fb5ab52aa0211d 100644
--- a/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md
+++ b/docs/optimization_guide/nncf/ptq/basic_quantization_flow.md
@@ -1,4 +1,4 @@
-# Basic Quantization Flow {#basic_qauntization_flow}
+# Basic Quantization Flow {#basic_quantization_flow}
 
 @sphinxdirective
 
diff --git a/docs/optimization_guide/nncf/ptq/ptq_introduction.md b/docs/optimization_guide/nncf/ptq/ptq_introduction.md
index a87e5f9d29339b..2cd880b50602f8 100644
--- a/docs/optimization_guide/nncf/ptq/ptq_introduction.md
+++ b/docs/optimization_guide/nncf/ptq/ptq_introduction.md
@@ -6,17 +6,21 @@
    :maxdepth: 1
    :hidden:
 
-   basic_qauntization_flow
+   basic_quantization_flow
    quantization_w_accuracy_control
 
-@endsphinxdirective
 
-Neural Network Compression Framework (NNCF) provides a new post-training quantization API available in Python that is aimed at reusing the code for model training or validation that is usually available with the model in the source framework, for example, PyTorch* or TensroFlow*. The API is cross-framework and currently supports models representing in the following frameworks: PyTorch, TensorFlow 2.x, ONNX, and OpenVINO. 
+Neural Network Compression Framework (NNCF) provides a new post-training quantization API available in Python that is aimed at reusing the code for model training or validation that is usually available with the model in the source framework, for example, PyTorch or TensroFlow. The API is cross-framework and currently supports models representing in the following frameworks: PyTorch, TensorFlow 2.x, ONNX, and OpenVINO.
+
 This API has two main capabilities to apply 8-bit post-training quantization:
-* [Basic quantization](@ref basic_qauntization_flow) - the simplest quantization flow that allows to apply 8-bit integer quantization to the model.
-* [Quantization with accuracy control](@ref quantization_w_accuracy_control) - the most advanced quantization flow that allows to apply 8-bit quantization to the model with accuracy control.
 
-## See also
+* :doc:`Basic quantization <basic_quantization_flow>` - the simplest quantization flow that allows to apply 8-bit integer quantization to the model.
+* :doc:`Quantization with accuracy control <quantization_w_accuracy_control>` - the most advanced quantization flow that allows to apply 8-bit quantization to the model with accuracy control.
+
+Additional Resources
+####################
 
-* [NNCF GitHub](https://github.com/openvinotoolkit/nncf)
-* [Optimizing Models at Training Time](@ref tmo_introduction)
\ No newline at end of file
+* `NNCF GitHub <https://github.com/openvinotoolkit/nncf>`__
+* :doc:`Optimizing Models at Training Time <tmo_introduction>`
+
+@endsphinxdirective
diff --git a/docs/optimization_guide/nncf/ptq/quantization_w_accuracy_control.md b/docs/optimization_guide/nncf/ptq/quantization_w_accuracy_control.md
index 65d5ede50e4d8e..fec080c0b0aafc 100644
--- a/docs/optimization_guide/nncf/ptq/quantization_w_accuracy_control.md
+++ b/docs/optimization_guide/nncf/ptq/quantization_w_accuracy_control.md
@@ -5,12 +5,12 @@
 Introduction
 ####################
 
-This is the advanced quantization flow that allows to apply 8-bit quantization to the model with control of accuracy metric. This is achieved by keeping the most impactful operations within the model in the original precision. The flow is based on the :doc:`Basic 8-bit quantization <basic_qauntization_flow>` and has the following differences:
+This is the advanced quantization flow that allows to apply 8-bit quantization to the model with control of accuracy metric. This is achieved by keeping the most impactful operations within the model in the original precision. The flow is based on the :doc:`Basic 8-bit quantization <basic_quantization_flow>` and has the following differences:
 
 * Beside the calibration dataset, a **validation dataset** is required to compute accuracy metric. They can refer to the same data in the simplest case.
 * **Validation function**, used to compute accuracy metric is required. It can be a function that is already available in the source framework or a custom function.
-* Since accuracy validation is run several times during the quantization process, quantization with accuracy control can take more time than the [Basic 8-bit quantization](@ref basic_qauntization_flow) flow.
-* The resulted model can provide smaller performance improvement than the :doc:`Basic 8-bit quantization <basic_qauntization_flow>` flow because some of the operations are kept in the original precision.
+* Since accuracy validation is run several times during the quantization process, quantization with accuracy control can take more time than the [Basic 8-bit quantization](@ref basic_quantization_flow) flow.
+* The resulted model can provide smaller performance improvement than the :doc:`Basic 8-bit quantization <basic_quantization_flow>` flow because some of the operations are kept in the original precision.
 
 .. note:: Currently, this flow is available only for models in OpenVINO representation.
 
@@ -19,7 +19,7 @@ The steps for the quantization with accuracy control are described below.
 Prepare datasets
 ####################
 
-This step is similar to the :doc:`Basic 8-bit quantization <basic_qauntization_flow>` flow. The only difference is that two datasets, calibration and validation, are required.
+This step is similar to the :doc:`Basic 8-bit quantization <basic_quantization_flow>` flow. The only difference is that two datasets, calibration and validation, are required.
 
 .. tab:: OpenVINO
 

From 3573a38e0be303862ddc0d1fcb170d78a3a42e66 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 30 Mar 2023 08:24:22 +0200
Subject: [PATCH 169/296] DOCS shift to rst - Model Optimizer Usage (#16630)

---
 .../Deep_Learning_Model_Optimizer_DevGuide.md | 138 +++++++++++-------
 .../convert_model/Converting_Model.md         |  99 +++++++------
 .../prepare_model/model_inputs_outputs.md     |  16 +-
 .../images}/BASIC_FLOW_MO_simplified.svg      |   0
 4 files changed, 145 insertions(+), 108 deletions(-)
 rename docs/{MO_DG/img => _static/images}/BASIC_FLOW_MO_simplified.svg (100%)

diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
index 91e31630183975..9a39933fde6fb4 100644
--- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
+++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
@@ -15,102 +15,128 @@
    openvino_docs_MO_DG_FP16_Compression
    openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ
 
-@endsphinxdirective
+
 
 Model Optimizer is a cross-platform command-line tool that facilitates the transition between training and deployment environments, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices.
 
-To use it, you need a pre-trained deep learning model in one of the supported formats: TensorFlow, PyTorch, PaddlePaddle, MXNet, Caffe, Kaldi, or ONNX. Model Optimizer converts the model to the OpenVINO Intermediate Representation format (IR), which you can infer later with [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md).
+To use it, you need a pre-trained deep learning model in one of the supported formats: TensorFlow, PyTorch, PaddlePaddle, MXNet, Caffe, Kaldi, or ONNX. Model Optimizer converts the model to the OpenVINO Intermediate Representation format (IR), which you can infer later with :doc:`OpenVINO™ Runtime <openvino_docs_OV_UG_OV_Runtime_User_Guide>`.
 
 Note that Model Optimizer does not infer models.
 
 The figure below illustrates the typical workflow for deploying a trained deep learning model:
 
-![](img/BASIC_FLOW_MO_simplified.svg)
+.. image:: _static/images/BASIC_FLOW_MO_simplified.svg
 
 where IR is a pair of files describing the model:
 
-*  <code>.xml</code> - Describes the network topology.
+* ``.xml`` - Describes the network topology.
 
-*  <code>.bin</code> - Contains the weights and biases binary data.
+* ``.bin`` - Contains the weights and biases binary data.
 
-The OpenVINO IR can be additionally optimized for inference by [Post-training optimization](../../tools/pot/docs/Introduction.md) that applies post-training quantization methods.
+The OpenVINO IR can be additionally optimized for inference by :doc:`Post-training optimization <pot_introduction>` that applies post-training quantization methods.
 
-## How to Run Model Optimizer
+How to Run Model Optimizer
+##########################
 
 To convert a model to IR, you can run Model Optimizer by using the following command:
 
-```sh
-mo --input_model INPUT_MODEL
-```
+.. code-block:: sh
+
+   mo --input_model INPUT_MODEL
 
-If the out-of-the-box conversion (only the `--input_model` parameter is specified) is not successful, use the parameters mentioned below to override input shapes and cut the model:
 
-- Model Optimizer provides two parameters to override original input shapes for model conversion: `--input` and `--input_shape`.
-For more information about these parameters, refer to the [Setting Input Shapes](prepare_model/convert_model/Converting_Model.md) guide.
+If the out-of-the-box conversion (only the ``--input_model`` parameter is specified) is not successful, use the parameters mentioned below to override input shapes and cut the model:
+
+- Model Optimizer provides two parameters to override original input shapes for model conversion: ``--input`` and ``--input_shape``.
+For more information about these parameters, refer to the :doc:`Setting Input Shapes <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
 - To cut off unwanted parts of a model (such as unsupported operations and training sub-graphs),
-use the `--input` and `--output` parameters to define new inputs and outputs of the converted model.
-For a more detailed description, refer to the [Cutting Off Parts of a Model](prepare_model/convert_model/Cutting_Model.md) guide.
+use the ``--input`` and ``--output`` parameters to define new inputs and outputs of the converted model.
+For a more detailed description, refer to the :doc:`Cutting Off Parts of a Model <openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model>` guide.
 
 You can also insert additional input pre-processing sub-graphs into the converted model by using
-the `--mean_values`, `scales_values`, `--layout`, and other parameters described
-in the [Embedding Preprocessing Computation](prepare_model/Additional_Optimizations.md) article.
+the ``--mean_values``, ``scales_values``, ``--layout``, and other parameters described
+in the :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_Additional_Optimization_Use_Cases>` article.
 
-The `--compress_to_fp16` compression parameter in Model Optimizer allows generating IR with constants (for example, weights for convolutions and matrix multiplications) compressed to `FP16` data type. For more details, refer to the [Compression of a Model to FP16](prepare_model/FP16_Compression.md) guide.
+The ``--compress_to_fp16`` compression parameter in Model Optimizer allows generating IR with constants (for example, weights for convolutions and matrix multiplications) compressed to ``FP16`` data type. For more details, refer to the :doc:`Compression of a Model to FP16 <openvino_docs_MO_DG_FP16_Compression>` guide.
 
 To get the full list of conversion parameters available in Model Optimizer, run the following command:
 
-```sh
-mo --help
-```
+.. code-block:: sh
+
+   mo --help
 
-## Examples of CLI Commands
+
+Examples of CLI Commands
+########################
 
 Below is a list of separate examples for different frameworks and Model Optimizer parameters:
 
 1. Launch Model Optimizer for a TensorFlow MobileNet model in the binary protobuf format:
-```sh
-mo --input_model MobileNet.pb
-```
-Launch Model Optimizer for a TensorFlow BERT model in the SavedModel format with three inputs. Specify input shapes explicitly
-where the batch size and the sequence length equal 2 and 30 respectively:
-```sh
-mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
-```
-For more information, refer to the [Converting a TensorFlow Model](prepare_model/convert_model/Convert_Model_From_TensorFlow.md) guide.
+
+   .. code-block:: sh
+
+      mo --input_model MobileNet.pb
+
+
+   Launch Model Optimizer for a TensorFlow BERT model in the SavedModel format with three inputs. Specify input shapes explicitly where the batch size and the sequence length equal 2 and 30 respectively:
+
+   .. code-block:: sh
+
+      mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
+
+      For more information, refer to the :doc:`Converting a TensorFlow Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
 
 2. Launch Model Optimizer for an ONNX OCR model and specify new output explicitly:
-```sh
-mo --input_model ocr.onnx --output probabilities
-```
-For more information, refer to the [Converting an ONNX Model](prepare_model/convert_model/Convert_Model_From_ONNX.md) guide.
 
-> **NOTE**: PyTorch models must be exported to the ONNX format before conversion into IR. More information can be found in [Converting a PyTorch Model](prepare_model/convert_model/Convert_Model_From_PyTorch.md).
+   .. code-block:: sh
+
+      mo --input_model ocr.onnx --output probabilities
+
+
+   For more information, refer to the :doc:`Converting an ONNX Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX>` guide.
+
+   .. note::
+
+      PyTorch models must be exported to the ONNX format before conversion into IR. More information can be found in :doc:`Converting a PyTorch Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch>`.
 
 3. Launch Model Optimizer for a PaddlePaddle UNet model and apply mean-scale normalization to the input:
-```sh
-mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
-```
-For more information, refer to the [Converting a PaddlePaddle Model](prepare_model/convert_model/Convert_Model_From_Paddle.md) guide.
+
+   .. code-block:: sh
+
+      mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
+
+
+   For more information, refer to the :doc:`Converting a PaddlePaddle Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle>` guide.
 
 4. Launch Model Optimizer for an Apache MXNet SSD Inception V3 model and specify first-channel layout for the input:
-```sh
-mo --input_model ssd_inception_v3-0000.params --layout NCHW
-```
-For more information, refer to the [Converting an Apache MXNet Model](prepare_model/convert_model/Convert_Model_From_MxNet.md) guide.
+
+   .. code-block:: sh
+
+      mo --input_model ssd_inception_v3-0000.params --layout NCHW
+
+
+   For more information, refer to the :doc:`Converting an Apache MXNet Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>` guide.
 
 5. Launch Model Optimizer for a Caffe AlexNet model with input channels in the RGB format which needs to be reversed:
-```sh
-mo --input_model alexnet.caffemodel --reverse_input_channels
-```
-For more information, refer to the [Converting a Caffe Model](prepare_model/convert_model/Convert_Model_From_Caffe.md) guide.
+
+   .. code-block:: sh
+
+      mo --input_model alexnet.caffemodel --reverse_input_channels
+
+
+   For more information, refer to the :doc:`Converting a Caffe Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>` guide.
 
 6. Launch Model Optimizer for a Kaldi LibriSpeech nnet2 model:
-```sh
-mo --input_model librispeech_nnet2.mdl --input_shape [1,140]
-```
-For more information, refer to the [Converting a Kaldi Model](prepare_model/convert_model/Convert_Model_From_Kaldi.md) guide.
-
-- To get conversion recipes for specific TensorFlow, ONNX, PyTorch, Apache MXNet, and Kaldi models,
-refer to the [Model Conversion Tutorials](prepare_model/convert_model/Convert_Model_Tutorials.md).
-- For more information about IR, see [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](IR_and_opsets.md).
+
+   .. code-block:: sh
+
+      mo --input_model librispeech_nnet2.mdl --input_shape [1,140]
+
+
+   For more information, refer to the :doc:`Converting a Kaldi Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi>` guide.
+
+- To get conversion recipes for specific TensorFlow, ONNX, PyTorch, Apache MXNet, and Kaldi models, refer to the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>`.
+- For more information about IR, see :doc:`Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™ <openvino_docs_MO_DG_IR_and_opsets>`.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
index c829936310da89..3accddd855122e 100644
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@@ -6,86 +6,93 @@ With Model Optimizer you can increase your model's efficiency by providing an ad
 
 .. _when_to_specify_input_shapes:
 
-@endsphinxdirective
 
+Specifying --input_shape Command-line Parameter
+###############################################
 
-## Specifying --input_shape Command-line Parameter
 Model Optimizer supports conversion of models with dynamic input shapes that contain undefined dimensions.
 However, if the shape of data is not going to change from one inference request to another,
 it is recommended to set up static shapes (when all dimensions are fully defined) for the inputs.
 Doing it at this stage, instead of during inference in runtime, can be beneficial in terms of performance and memory consumption.
-To set up static shapes, Model Optimizer provides the `--input_shape` parameter.
-For more information on input shapes under runtime, refer to the [Changing input shapes](../../../OV_Runtime_UG/ShapeInference.md) guide.
-To learn more about dynamic shapes in runtime, refer to the [Dynamic Shapes](../../../OV_Runtime_UG/ov_dynamic_shapes.md) guide.
+To set up static shapes, Model Optimizer provides the ``--input_shape`` parameter.
+For more information on input shapes under runtime, refer to the :doc:`Changing input shapes <openvino_docs_OV_UG_ShapeInference>` guide.
+To learn more about dynamic shapes in runtime, refer to the :doc:`Dynamic Shapes <openvino_docs_OV_UG_DynamicShapes>` guide.
 
-The OpenVINO Runtime API may present certain limitations in inferring models with undefined dimensions on some hardware. See the [Features support matrix](../../../OV_Runtime_UG/supported_plugins/Device_Plugins.md) for reference.
-In this case, the `--input_shape` parameter and the [reshape method](../../../OV_Runtime_UG/ShapeInference.md) can help to resolve undefined dimensions.
+The OpenVINO Runtime API may present certain limitations in inferring models with undefined dimensions on some hardware. See the :doc:`Features support matrix <openvino_docs_OV_UG_Working_with_devices>` for reference.
+In this case, the ``--input_shape`` parameter and the :doc:`reshape method <openvino_docs_OV_UG_ShapeInference>` can help to resolve undefined dimensions.
 
-Sometimes, Model Optimizer is unable to convert models out-of-the-box (only the `--input_model` parameter is specified).
+Sometimes, Model Optimizer is unable to convert models out-of-the-box (only the ``--input_model`` parameter is specified).
 Such problem can relate to models with inputs of undefined ranks and a case of cutting off parts of a model.
-In this case, input shapes must be specified explicitly with the `--input_shape` parameter.
+In this case, input shapes must be specified explicitly with the ``--input_shape`` parameter.
 
 For example, run Model Optimizer for the TensorFlow MobileNet model with the single input
-and specify the input shape of `[2,300,300,3]`:
+and specify the input shape of ``[2,300,300,3]``:
 
-```sh
-mo --input_model MobileNet.pb --input_shape [2,300,300,3]
-```
+.. code-block:: sh
 
-If a model has multiple inputs, `--input_shape` must be used in conjunction with `--input` parameter.
-The `--input` parameter contains a list of input names, for which shapes in the same order are defined via `--input_shape`.
-For example, launch Model Optimizer for the ONNX OCR model with a pair of inputs `data` and `seq_len`
-and specify shapes `[3,150,200,1]` and `[3]` for them:
+   mo --input_model MobileNet.pb --input_shape [2,300,300,3]
 
-```sh
-mo --input_model ocr.onnx --input data,seq_len --input_shape [3,150,200,1],[3]
-```
 
-Alternatively, specify input shapes, using the `--input` parameter as follows:
+If a model has multiple inputs, ``--input_shape`` must be used in conjunction with ``--input`` parameter.
+The ``--input`` parameter contains a list of input names, for which shapes in the same order are defined via ``--input_shape``.
+For example, launch Model Optimizer for the ONNX OCR model with a pair of inputs ``data`` and ``seq_len``
+and specify shapes ``[3,150,200,1]`` and ``[3]`` for them:
 
-```sh
-mo --input_model ocr.onnx --input data[3,150,200,1],seq_len[3]
-```
+.. code-block:: sh
 
-The `--input_shape` parameter allows overriding original input shapes to ones compatible with a given model.
+   mo --input_model ocr.onnx --input data,seq_len --input_shape [3,150,200,1],[3]
+
+
+Alternatively, specify input shapes, using the ``--input`` parameter as follows:
+
+.. code-block:: sh
+
+   mo --input_model ocr.onnx --input data[3,150,200,1],seq_len[3]
+
+
+The ``--input_shape`` parameter allows overriding original input shapes to ones compatible with a given model.
 Dynamic shapes, i.e. with dynamic dimensions, can be replaced in the original model with static shapes for the converted model, and vice versa.
-The dynamic dimension can be marked in Model Optimizer command-line as `-1`* or *`?`.
+The dynamic dimension can be marked in Model Optimizer command-line as ``-1``* or *``?``.
 For example, launch Model Optimizer for the ONNX OCR model and specify dynamic batch dimension for inputs:
 
-```sh
-mo --input_model ocr.onnx --input data,seq_len --input_shape [-1,150,200,1],[-1]
-```
+.. code-block:: sh
+
+   mo --input_model ocr.onnx --input data,seq_len --input_shape [-1,150,200,1],[-1]
+
 
 To optimize memory consumption for models with undefined dimensions in run-time, Model Optimizer provides the capability to define boundaries of dimensions.
 The boundaries of undefined dimension can be specified with ellipsis.
 For example, launch Model Optimizer for the ONNX OCR model and specify a boundary for the batch dimension:
 
-```sh
-mo --input_model ocr.onnx --input data,seq_len --input_shape [1..3,150,200,1],[1..3]
-```
+.. code-block:: sh
+
+   mo --input_model ocr.onnx --input data,seq_len --input_shape [1..3,150,200,1],[1..3]
 
-@sphinxdirective
 
 Practically, some models are not ready for input shapes change.
 In this case, a new input shape cannot be set via Model Optimizer.
 For more information about shape follow the :doc:`inference troubleshooting <troubleshooting_reshape_errors>` 
 and :ref:`ways to relax shape inference flow <how-to-fix-non-reshape-able-model>` guides.
 
-@endsphinxdirective
+Specifying --static_shape Command-line Parameter
+################################################
 
-
-## Specifying --static_shape Command-line Parameter
-Model Optimizer provides the `--static_shape` parameter that allows evaluating shapes of all operations in the model for fixed input shapes
+Model Optimizer provides the ``--static_shape`` parameter that allows evaluating shapes of all operations in the model for fixed input shapes
 and folding shape computing sub-graphs into constants. The resulting IR may be more compact in size and the loading time for such IR may decrease.
-However, the resulting IR will not be reshape-able with the help of the [reshape method](../../../OV_Runtime_UG/ShapeInference.md) from OpenVINO Runtime API.
-It is worth noting that the `--input_shape` parameter does not affect reshapeability of the model.
+However, the resulting IR will not be reshape-able with the help of the :doc:`reshape method <openvino_docs_OV_UG_ShapeInference>` from OpenVINO Runtime API.
+It is worth noting that the ``--input_shape`` parameter does not affect reshapeability of the model.
+
+For example, launch Model Optimizer for the ONNX OCR model using ``--static_shape``:
+
+.. code-block:: sh
+
+   mo --input_model ocr.onnx --input data[3,150,200,1],seq_len[3] --static_shape
+
 
-For example, launch Model Optimizer for the ONNX OCR model using `--static_shape`:
+Additional Resources
+####################
 
-```sh
-mo --input_model ocr.onnx --input data[3,150,200,1],seq_len[3] --static_shape
-```
+* :doc:`Introduction to converting models with Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+* :doc:`Cutting Off Parts of a Model <openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model>`
 
-## Additional Resources
-* [Introduction to converting models with Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Cutting Off Parts of a Model](Cutting_Model.md)
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/MO_DG/prepare_model/model_inputs_outputs.md b/docs/MO_DG/prepare_model/model_inputs_outputs.md
index f64f6aa6719029..abd8e36da5096f 100644
--- a/docs/MO_DG/prepare_model/model_inputs_outputs.md
+++ b/docs/MO_DG/prepare_model/model_inputs_outputs.md
@@ -1,13 +1,17 @@
 # Model Inputs and Outputs, Shapes and Layouts {#openvino_docs_model_inputs_outputs}
 
-Users interact with a model by passing data to its _inputs_ before the inference and retrieving data from its _outputs_ after the inference. A model may have one or multiple inputs and outputs. Normally, in OpenVINO™ toolkit, all inputs and outputs in the converted model are identified in the same way as in the original framework model.
+@sphinxdirective
 
-OpenVINO uses the _names of tensors_ for identification. Depending on the framework, the names of tensors are formed differently.
+Users interact with a model by passing data to its *inputs* before the inference and retrieving data from its *outputs* after the inference. A model may have one or multiple inputs and outputs. Normally, in OpenVINO™ toolkit, all inputs and outputs in the converted model are identified in the same way as in the original framework model.
 
-A model accepts inputs and produces outputs of some _shape_. Shape defines the number of dimensions in a tensor and their order. For example, an image classification model can accept tensor of shape [1, 3, 240, 240] and produces tensor of shape [1, 1000].
+OpenVINO uses the *names of tensors* for identification. Depending on the framework, the names of tensors are formed differently.
 
-The meaning of each dimension in the shape is specified by its _layout_. Layout is an interpretation of shape dimensions. OpenVINO toolkit conversion tools and APIs keep all dimensions and their order unchanged and aligned with the original framework model. Usually, original models do not contain layout information explicitly, but in various pre-processing and post-processing scenarios in the OpenVINO Runtime API, sometimes it is required to have the layout specified explicitly. We recommend specifying layouts for inputs/outputs during the model conversion.
+A model accepts inputs and produces outputs of some *shape*. Shape defines the number of dimensions in a tensor and their order. For example, an image classification model can accept tensor of shape [1, 3, 240, 240] and produces tensor of shape [1, 1000].
 
-OpenVINO also supports _partially defined shapes_, where part of the dimensions is undefined. Undefined dimensions are also kept intact in the final IR file and you can  define them later, during runtime. Undefined dimensions can be used as [dynamic dimensions](../../OV_Runtime_UG/ov_dynamic_shapes.md) for certain hardware and models, which enables you to change shapes of input data dynamically in each infer request. For example, the sequence length dimension in the BERT model can be left undefined and variously sized data along this dimension can be fed on the CPU.
+The meaning of each dimension in the shape is specified by its *layout*. Layout is an interpretation of shape dimensions. OpenVINO toolkit conversion tools and APIs keep all dimensions and their order unchanged and aligned with the original framework model. Usually, original models do not contain layout information explicitly, but in various pre-processing and post-processing scenarios in the OpenVINO Runtime API, sometimes it is required to have the layout specified explicitly. We recommend specifying layouts for inputs/outputs during the model conversion.
 
-To learn about how the model is represented in OpenVINO™ Runtime, see the [Model Representation in OpenVINO™ Runtime](../../OV_Runtime_UG/model_representation.md).
\ No newline at end of file
+OpenVINO also supports *partially defined shapes*, where part of the dimensions is undefined. Undefined dimensions are also kept intact in the final IR file and you can  define them later, during runtime. Undefined dimensions can be used as :doc:`dynamic dimensions <openvino_docs_OV_UG_DynamicShapes>` for certain hardware and models, which enables you to change shapes of input data dynamically in each infer request. For example, the sequence length dimension in the BERT model can be left undefined and variously sized data along this dimension can be fed on the CPU.
+
+To learn about how the model is represented in OpenVINO™ Runtime, see the :doc:`Model Representation in OpenVINO™ Runtime <openvino_docs_OV_UG_Model_Representation>`.
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/MO_DG/img/BASIC_FLOW_MO_simplified.svg b/docs/_static/images/BASIC_FLOW_MO_simplified.svg
similarity index 100%
rename from docs/MO_DG/img/BASIC_FLOW_MO_simplified.svg
rename to docs/_static/images/BASIC_FLOW_MO_simplified.svg

From 5eea99d96c57083e6ca2a6f5977c996eaf8773ab Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Thu, 30 Mar 2023 10:27:51 +0300
Subject: [PATCH 170/296] Update timetest tool to support ip and op params
 config (#15916)

* User can set input and output precision for timetest tool

* Update run_timetest.py with the ip and op options as well

* Use only one getType function

* Add extra line at the end of the file

* Remove unused parameters

* Update comment accordingly

---------

Co-authored-by: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
---
 tests/lib/src/common_utils.cpp                | 31 +++++++++++++++++++
 tests/lib/src/common_utils.h                  |  6 ++++
 tests/time_tests/scripts/run_timetest.py      | 10 ++++++
 .../src/timetests/timetest_infer.cpp          |  1 +
 .../src/timetests/timetest_infer_api_2.cpp    | 30 ++++++++++++++++--
 tests/time_tests/src/timetests_helper/cli.h   | 18 +++++++++++
 .../time_tests/src/timetests_helper/main.cpp  |  3 +-
 7 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/tests/lib/src/common_utils.cpp b/tests/lib/src/common_utils.cpp
index ce5b68586091e6..00350511093615 100644
--- a/tests/lib/src/common_utils.cpp
+++ b/tests/lib/src/common_utils.cpp
@@ -65,3 +65,34 @@ void fillBlobs(InferenceEngine::InferRequest inferRequest,
         }
     }
 }
+
+/**
+ * @brief Get input/output precision
+ */
+ov::element::Type getType(const std::string &value) {
+    static const std::unordered_map<std::string, ov::element::Type> supported_precisions = {
+        {"FP32", ov::element::f32},
+        {"FP16", ov::element::f16},
+        {"BF16", ov::element::bf16},
+        {"U64", ov::element::u64},
+        {"I64", ov::element::i64},
+        {"U32", ov::element::u32},
+        {"I32", ov::element::i32},
+        {"U16", ov::element::u16},
+        {"I16", ov::element::i16},
+        {"U8", ov::element::u8},
+        {"I8", ov::element::i8},
+        {"BOOL", ov::element::boolean},
+    };
+
+    std::string val = value;
+    std::transform(val.begin(), val.end(), val.begin(), ::toupper);
+
+    const auto precision = supported_precisions.find(value);
+    if (precision == supported_precisions.end())
+    {
+        throw std::logic_error("\"" + val + "\"" + " is not a valid precision");
+    }
+
+    return precision->second;
+}
diff --git a/tests/lib/src/common_utils.h b/tests/lib/src/common_utils.h
index ac15570f38ce2e..445ca8cc7d1176 100644
--- a/tests/lib/src/common_utils.h
+++ b/tests/lib/src/common_utils.h
@@ -176,3 +176,9 @@ void fillTensors(ov::InferRequest &infer_request, std::vector<T> &inputs) {
 void fillBlobs(InferenceEngine::InferRequest inferRequest,
                const InferenceEngine::ConstInputsDataMap &inputsInfo,
                const size_t &batchSize);
+
+
+/**
+ * @brief Get input/output precision (OV API 2)
+ */
+ov::element::Type getType(const std::string &value);
diff --git a/tests/time_tests/scripts/run_timetest.py b/tests/time_tests/scripts/run_timetest.py
index cf35d1c382211f..d935f48aa7dcb7 100644
--- a/tests/time_tests/scripts/run_timetest.py
+++ b/tests/time_tests/scripts/run_timetest.py
@@ -61,6 +61,8 @@ def prepare_executable_cmd(args: dict):
         str(args["executable"].resolve(strict=True)),
         "-m", str(args["model"].resolve(strict=True)),
         "-d", args["device"],
+        "-ip", args["input_precision"],
+        "-op", args["output_precision"],
         "-c" if args["model_cache"] else ""
     ]
 
@@ -143,6 +145,14 @@ def cli_parser():
                         dest="model_cache",
                         action="store_true",
                         help="Enable model cache usage")
+    parser.add_argument("-ip",
+                        dest="input_precision",
+                        type=str,
+                        help="Model input precision")
+    parser.add_argument("-op",
+                        dest="output_precision",
+                        type=str,
+                        help="Model output precision")
 
     args = parser.parse_args()
 
diff --git a/tests/time_tests/src/timetests/timetest_infer.cpp b/tests/time_tests/src/timetests/timetest_infer.cpp
index df50e50823e321..6bd5ae77a9b8f8 100644
--- a/tests/time_tests/src/timetests/timetest_infer.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer.cpp
@@ -15,6 +15,7 @@
  * handling it by itself.
  */
 int runPipeline(const std::string &model, const std::string &device, const bool isCacheEnabled,
+                const std::string &, const std::string &,
                 std::map<std::string, ov::PartialShape> reshapeShapes,
                 std::map<std::string, std::vector<size_t>> dataShapes) {
     auto pipeline = [](const std::string &model, const std::string &device, const bool isCacheEnabled,
diff --git a/tests/time_tests/src/timetests/timetest_infer_api_2.cpp b/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
index aaebbfee252d00..4350df01093bad 100644
--- a/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
@@ -17,9 +17,11 @@
  * handling it by itself.
  */
 int runPipeline(const std::string &model, const std::string &device, const bool isCacheEnabled,
+                const std::string &inputPrecision, const std::string &outputPrecision,
                 std::map<std::string, ov::PartialShape> reshapeShapes,
                 std::map<std::string, std::vector<size_t>> dataShapes) {
     auto pipeline = [](const std::string &model, const std::string &device, const bool isCacheEnabled,
+                       const std::string &inputPrecision, const std::string &outputPrecision,
                        std::map<std::string, ov::PartialShape> reshapeShapes,
                        std::map<std::string, std::vector<size_t>> dataShapes) {
         ov::Core ie;
@@ -33,6 +35,14 @@ int runPipeline(const std::string &model, const std::string &device, const bool
         if (!reshapeShapes.empty()) {
             reshape = true;
         }
+        bool ip = false;
+        if (!inputPrecision.empty()) {
+            ip = true;
+        }
+        bool op = false;
+        if (!outputPrecision.empty()) {
+            op = true;
+        }
 
          // first_inference_latency = time_to_inference + first_inference
         {
@@ -67,6 +77,22 @@ int runPipeline(const std::string &model, const std::string &device, const bool
                                     cnnNetwork->reshape(reshapeShapes);
                                 }
                             }
+                            if (ip || op) {
+                                auto preprocessor = ov::preprocess::PrePostProcessor(cnnNetwork);
+                                if (ip) {
+                                    const auto inputs = cnnNetwork->inputs();
+                                    for (size_t i = 0; i < inputs.size(); i++) {
+                                        preprocessor.input(i).tensor().set_element_type(getType(inputPrecision));
+                                    }
+                                }
+                                if (op) {
+                                    const auto outputs = cnnNetwork->outputs();
+                                    for (size_t i = 0; i < outputs.size(); i++) {
+                                        preprocessor.output(i).tensor().set_element_type(getType(outputPrecision));
+                                    }
+                                }
+                                cnnNetwork = preprocessor.build();
+                            }
                             {
                                 SCOPED_TIMER(load_network);
                                 exeNetwork = ie.compile_model(cnnNetwork, device);
@@ -99,7 +125,7 @@ int runPipeline(const std::string &model, const std::string &device, const bool
     };
 
     try {
-        pipeline(model, device, isCacheEnabled, reshapeShapes, dataShapes);
+        pipeline(model, device, isCacheEnabled, inputPrecision, outputPrecision, reshapeShapes, dataShapes);
     } catch (const ov::Exception &iex) {
         std::cerr
                 << "Inference Engine pipeline failed with Inference Engine exception:\n"
@@ -114,4 +140,4 @@ int runPipeline(const std::string &model, const std::string &device, const bool
         return 3;
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/tests/time_tests/src/timetests_helper/cli.h b/tests/time_tests/src/timetests_helper/cli.h
index 37545ab2b665ca..b573978f0abfda 100644
--- a/tests/time_tests/src/timetests_helper/cli.h
+++ b/tests/time_tests/src/timetests_helper/cli.h
@@ -46,6 +46,14 @@ static const char data_shapes_message[] =
 static const char statistics_path_message[] =
         "Required. Path to a file to write statistics.";
 
+/// @brief message for input precision argument
+static const char input_precision[] =
+    "Not required. Use this key to change input precision.";
+
+/// @brief message for output precision argument
+static const char output_precision[] =
+    "Not required. Use this key to change output precision.";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -76,6 +84,14 @@ DEFINE_bool(c, false, model_cache_message);
 /// It is a required parameter
 DEFINE_string(s, "", statistics_path_message);
 
+/// @brief Define parameter for changing input precision <br>
+/// It is a non-required parameter
+DEFINE_string(ip, "", input_precision);
+
+/// @brief Define parameter for changing output precision <br>
+/// It is a non-required parameter
+DEFINE_string(op, "", output_precision);
+
 /**
  * @brief This function show a help message
  */
@@ -91,4 +107,6 @@ static void showUsage() {
     std::cout << "    -c                   " << model_cache_message << std::endl;
     std::cout << "    -reshape_shapes      " << reshape_shapes_message << std::endl;
     std::cout << "    -data_shapes         " << data_shapes_message << std::endl;
+    std::cout << "    -ip                  " << input_precision << std::endl;
+    std::cout << "    -op                  " << output_precision << std::endl;
 }
diff --git a/tests/time_tests/src/timetests_helper/main.cpp b/tests/time_tests/src/timetests_helper/main.cpp
index 79e01f98c9e847..7738226766a2cf 100644
--- a/tests/time_tests/src/timetests_helper/main.cpp
+++ b/tests/time_tests/src/timetests_helper/main.cpp
@@ -11,6 +11,7 @@
 
 
 int runPipeline(const std::string &model, const std::string &device, const bool isCacheEnabled,
+                const std::string &inputPrecision, const std::string &outputPrecision,
                 std::map<std::string, ov::PartialShape> reshapeShapes,
                 std::map<std::string, std::vector<size_t>> dataShapes);
 
@@ -45,7 +46,7 @@ bool parseAndCheckCommandLine(int argc, char **argv) {
 int _runPipeline(std::map<std::string, ov::PartialShape> dynamicShapes,
                  std::map<std::string, std::vector<size_t>> staticShapes) {
     SCOPED_TIMER(full_run);
-    return runPipeline(FLAGS_m, FLAGS_d, FLAGS_c, dynamicShapes, staticShapes);
+    return runPipeline(FLAGS_m, FLAGS_d, FLAGS_c, FLAGS_ip, FLAGS_op, dynamicShapes, staticShapes);
 }
 
 /**

From 5b203efb9c49feda391c82b61266be6cbd0e04c5 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 30 Mar 2023 12:24:17 +0400
Subject: [PATCH 171/296] Disable PDPD test on Linux debian post-commit
 (#16644)

---
 .ci/azure/linux.yml        | 9 +++------
 .ci/azure/linux_debian.yml | 5 +++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
index d20e48b4d47831..997db08fb2b4fa 100644
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@@ -390,10 +390,6 @@ jobs:
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ConditionalCompilation.xml
     displayName: 'Conditional Compilation Tests'
 
-  # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
-  #- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-PaddleTests.xml
-  #  displayName: 'Paddle Tests'
-
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-IRFrontend.xml
     displayName: 'IR Frontend Tests'
 
@@ -401,8 +397,9 @@ jobs:
     displayName: 'ONNX Frontend Tests'
 
   # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
-  #- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
-  #  displayName: 'Paddle Frontend UT'
+  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
+    displayName: 'Paddle Frontend UT'
+    enabled: 'false'
 
   - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml
     displayName: 'TensorFlow Frontend Unit Tests'
diff --git a/.ci/azure/linux_debian.yml b/.ci/azure/linux_debian.yml
index c2b590bbf9398f..93db8c0ca71446 100644
--- a/.ci/azure/linux_debian.yml
+++ b/.ci/azure/linux_debian.yml
@@ -305,11 +305,12 @@ jobs:
       LD_LIBRARY_PATH: $(INSTALL_TEST_DIR)
     displayName: 'ONNX Frontend Tests'
 
-  - script: |
-      $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
+  # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904)
+  - script: $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml
     env:
       LD_LIBRARY_PATH: $(INSTALL_TEST_DIR)
     displayName: 'Paddle Frontend UT'
+    enabled: 'false'
 
   - script: $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml
     env:

From ccf9c19f6176a869ee713bccd34e44940a9c0e94 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 30 Mar 2023 13:48:19 +0400
Subject: [PATCH 172/296] Deprecated UNDEFINED values for execution /
 performance hints (#16563)

* Deprecated UNDEFINED values for execution / performance hints

* Update src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp

* Fixes

* Fixes
---
 docs/OV_Runtime_UG/performance_hints.md          |  1 -
 samples/cpp/benchmark_app/main.cpp               |  6 ++++++
 .../pyopenvino/core/properties/properties.cpp    |  3 ++-
 .../python/tests/test_runtime/test_properties.py |  3 +--
 .../include/openvino/runtime/properties.hpp      | 16 +++++++++-------
 src/plugins/auto/auto_executable_network.cpp     | 13 +++++++++----
 src/plugins/auto/plugin_config.cpp               |  4 ++--
 .../behavior/ov_plugin/core_integration.cpp      |  6 ++----
 .../behavior/ov_plugin/properties_tests.cpp      |  5 +----
 src/plugins/intel_gna/src/gna_plugin_config.hpp  |  2 +-
 .../ov_executable_network/get_metric.cpp         |  1 -
 .../behavior/ov_plugin/core_integration.cpp      |  3 ---
 .../tests/unit/gna_plugin_config_test.cpp        |  2 +-
 .../intel_gpu/src/runtime/execution_config.cpp   |  2 ++
 src/plugins/template/src/config.hpp              |  2 +-
 .../behavior/ov_plugin/core_integration.cpp      |  4 ++--
 .../behavior/ov_plugin/core_integration.cpp      |  2 --
 .../behavior/ov_plugin/properties_tests.cpp      |  4 ----
 .../behavior/ov_plugin/core_integration.hpp      |  2 --
 .../src/behavior/ov_plugin/properties_tests.cpp  |  5 ++---
 20 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/docs/OV_Runtime_UG/performance_hints.md b/docs/OV_Runtime_UG/performance_hints.md
index c66bb552b473ac..fcf1d930b861fb 100644
--- a/docs/OV_Runtime_UG/performance_hints.md
+++ b/docs/OV_Runtime_UG/performance_hints.md
@@ -17,7 +17,6 @@ As discussed in the :doc:`Optimization Guide <openvino_docs_deployment_optimizat
 Throughput and latency are some of the most widely used metrics that measure the overall performance of an application.
 
 Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely `ov::hint::PerformanceMode::THROUGHPUT <enumov_1_1hint_1_1PerformanceMode.html#doxid-group-ov-runtime-cpp-prop-api-1gga032aa530efa40760b79af14913d48d73a50f9b1f40c078d242af7ec323ace44b3>`__ and `ov::hint::PerformanceMode::LATENCY <enumov_1_1hint_1_1PerformanceMode.html#doxid-group-ov-runtime-cpp-prop-api-1gga032aa530efa40760b79af14913d48d73a501069dd75f76384ba18f133fdce99c2>`__.
-A special `ov::hint::PerformanceMode::UNDEFINED <enumov_1_1hint_1_1PerformanceMode.html#doxid-group-ov-runtime-cpp-prop-api-1gga032aa530efa40760b79af14913d48d73a0db45d2a4141101bdfe48e3314cfbca3>`__ hint acts the same as specifying no hint.
 
 For more information on conducting performance measurements with the ``benchmark_app``, refer to the last section in this document.
 
diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 107de484a1b462..9e142b2ae84d38 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -120,7 +120,9 @@ void next_step(const std::string additional_info = "") {
 }
 
 ov::hint::PerformanceMode get_performance_hint(const std::string& device, const ov::Core& core) {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ov::hint::PerformanceMode ov_perf_hint = ov::hint::PerformanceMode::UNDEFINED;
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto supported_properties = core.get_property(device, ov::supported_properties);
     if (std::find(supported_properties.begin(), supported_properties.end(), ov::hint::performance_mode) !=
         supported_properties.end()) {
@@ -132,7 +134,9 @@ ov::hint::PerformanceMode get_performance_hint(const std::string& device, const
             } else if (FLAGS_hint == "cumulative_throughput" || FLAGS_hint == "ctput") {
                 ov_perf_hint = ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT;
             } else if (FLAGS_hint == "none") {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 ov_perf_hint = ov::hint::PerformanceMode::UNDEFINED;
+                OPENVINO_SUPPRESS_DEPRECATED_END
             } else {
                 throw std::logic_error(
                     "Incorrect performance hint. Please set -hint option to"
@@ -416,6 +420,7 @@ int main(int argc, char* argv[]) {
                 return std::find(std::begin(supported_properties), std::end(supported_properties), key) !=
                        std::end(supported_properties);
             };
+            OPENVINO_SUPPRESS_DEPRECATED_START
             // the rest are individual per-device settings (overriding the values set with perf modes)
             auto set_throughput_streams = [&]() {
                 std::string key = getDeviceTypeFromName(device) + "_THROUGHPUT_STREAMS";
@@ -476,6 +481,7 @@ int main(int argc, char* argv[]) {
                 if (it_streams != device_config.end())
                     device_nstreams[device] = it_streams->second.as<std::string>();
             };
+            OPENVINO_SUPPRESS_DEPRECATED_END
 
             auto set_infer_precision = [&] {
                 auto it_device_infer_precision = device_infer_precision.find(device);
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 51c88082a8646a..105695e5bc911e 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -51,11 +51,13 @@ void regmodule_properties(py::module m) {
         .value("HIGH", ov::hint::Priority::HIGH)
         .value("DEFAULT", ov::hint::Priority::DEFAULT);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     py::enum_<ov::hint::PerformanceMode>(m_hint, "PerformanceMode", py::arithmetic())
         .value("UNDEFINED", ov::hint::PerformanceMode::UNDEFINED)
         .value("LATENCY", ov::hint::PerformanceMode::LATENCY)
         .value("THROUGHPUT", ov::hint::PerformanceMode::THROUGHPUT)
         .value("CUMULATIVE_THROUGHPUT", ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     py::enum_<ov::hint::SchedulingCoreType>(m_hint, "SchedulingCoreType", py::arithmetic())
         .value("ANY_CORE", ov::hint::SchedulingCoreType::ANY_CORE)
@@ -63,7 +65,6 @@ void regmodule_properties(py::module m) {
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
-        .value("UNDEFINED", ov::hint::ExecutionMode::UNDEFINED)
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
         .value("ACCURACY", ov::hint::ExecutionMode::ACCURACY);
 
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 1b00524b54915f..f2a905d38ba622 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -71,7 +71,6 @@ def test_properties_rw_base():
         (
             properties.hint.ExecutionMode,
             (
-                (properties.hint.ExecutionMode.UNDEFINED, "ExecutionMode.UNDEFINED", -1),
                 (properties.hint.ExecutionMode.PERFORMANCE, "ExecutionMode.PERFORMANCE", 1),
                 (properties.hint.ExecutionMode.ACCURACY, "ExecutionMode.ACCURACY", 2),
             ),
@@ -244,7 +243,7 @@ def test_properties_ro(ov_property_ro, expected_value):
         (
             properties.hint.execution_mode,
             "EXECUTION_MODE_HINT",
-            ((properties.hint.ExecutionMode.UNDEFINED, properties.hint.ExecutionMode.UNDEFINED),),
+            ((properties.hint.ExecutionMode.PERFORMANCE, properties.hint.ExecutionMode.PERFORMANCE),),
         ),
         (
             properties.hint.num_requests,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 497e8a8a322f3c..ed70eb8d438f10 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -278,6 +278,8 @@ inline std::istream& operator>>(std::istream& is, Priority& priority) {
         priority = Priority::MEDIUM;
     } else if (str == "HIGH") {
         priority = Priority::HIGH;
+    } else if (str == "DEFAULT") {
+        priority = Priority::DEFAULT;
     } else {
         OPENVINO_THROW("Unsupported model priority: ", str);
     }
@@ -297,7 +299,8 @@ static constexpr Property<Priority> model_priority{"MODEL_PRIORITY"};
  * @ingroup ov_runtime_cpp_prop_api
  */
 enum class PerformanceMode {
-    UNDEFINED = -1,             //!<  Undefined value, performance setting may vary from device to device
+    UNDEFINED OPENVINO_ENUM_DEPRECATED("Please use actual value instead. Will be removed in 2024.0") =
+        -1,                     //!<  Undefined value, performance setting may vary from device to device
     LATENCY = 1,                //!<  Optimize for latency
     THROUGHPUT = 2,             //!<  Optimize for throughput
     CUMULATIVE_THROUGHPUT = 3,  //!<  Optimize for cumulative throughput
@@ -306,8 +309,10 @@ enum class PerformanceMode {
 /** @cond INTERNAL */
 inline std::ostream& operator<<(std::ostream& os, const PerformanceMode& performance_mode) {
     switch (performance_mode) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
     case PerformanceMode::UNDEFINED:
         return os << "UNDEFINED";
+        OPENVINO_SUPPRESS_DEPRECATED_END
     case PerformanceMode::LATENCY:
         return os << "LATENCY";
     case PerformanceMode::THROUGHPUT:
@@ -329,7 +334,9 @@ inline std::istream& operator>>(std::istream& is, PerformanceMode& performance_m
     } else if (str == "CUMULATIVE_THROUGHPUT") {
         performance_mode = PerformanceMode::CUMULATIVE_THROUGHPUT;
     } else if (str == "UNDEFINED") {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         performance_mode = PerformanceMode::UNDEFINED;
+        OPENVINO_SUPPRESS_DEPRECATED_END
     } else {
         OPENVINO_THROW("Unsupported performance mode: ", str);
     }
@@ -443,16 +450,13 @@ static constexpr Property<bool, PropertyMutability::RW> allow_auto_batching{"ALL
  * @ingroup ov_runtime_cpp_prop_api
  */
 enum class ExecutionMode {
-    UNDEFINED = -1,   //!<  Undefined value, settings may vary from device to device
-    PERFORMANCE = 1,  //!<  Optimize for max performance
+    PERFORMANCE = 1,  //!<  Optimize for max performance, may apply properties which slightly affect accuracy
     ACCURACY = 2,     //!<  Optimize for max accuracy
 };
 
 /** @cond INTERNAL */
 inline std::ostream& operator<<(std::ostream& os, const ExecutionMode& mode) {
     switch (mode) {
-    case ExecutionMode::UNDEFINED:
-        return os << "UNDEFINED";
     case ExecutionMode::PERFORMANCE:
         return os << "PERFORMANCE";
     case ExecutionMode::ACCURACY:
@@ -469,8 +473,6 @@ inline std::istream& operator>>(std::istream& is, ExecutionMode& mode) {
         mode = ExecutionMode::PERFORMANCE;
     } else if (str == "ACCURACY") {
         mode = ExecutionMode::ACCURACY;
-    } else if (str == "UNDEFINED") {
-        mode = ExecutionMode::UNDEFINED;
     } else {
         OPENVINO_THROW("Unsupported execution mode: ", str);
     }
diff --git a/src/plugins/auto/auto_executable_network.cpp b/src/plugins/auto/auto_executable_network.cpp
index cd83a1beca9092..64e0e8f1a91d4d 100644
--- a/src/plugins/auto/auto_executable_network.cpp
+++ b/src/plugins/auto/auto_executable_network.cpp
@@ -51,14 +51,19 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
         auto value = _autoSContext->_performanceHint;
         if (!_autoSContext->_core->isNewAPI())
             return value;
-        if (value == InferenceEngine::PluginConfigParams::THROUGHPUT)
+        if (value == InferenceEngine::PluginConfigParams::THROUGHPUT) {
             return ov::hint::PerformanceMode::THROUGHPUT;
-        else if (value == InferenceEngine::PluginConfigParams::LATENCY)
+        } else if (value == InferenceEngine::PluginConfigParams::LATENCY) {
             return ov::hint::PerformanceMode::LATENCY;
-        else if (value == InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT)
+        } else if (value == InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
             return ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT;
-        else
+        } else if (value == "UNDEFINED") {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             return ov::hint::PerformanceMode::UNDEFINED;
+            OPENVINO_SUPPRESS_DEPRECATED_END
+        } else {
+            OPENVINO_THROW("Unsupported value of ov::hint::PerformanceMode");
+        }
     } else if (name == ov::device::priorities) {
         auto value = _autoSContext->_config.find(ov::device::priorities.name());
         return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
diff --git a/src/plugins/auto/plugin_config.cpp b/src/plugins/auto/plugin_config.cpp
index 7fffa536a5b566..d35a8917fbbeca 100644
--- a/src/plugins/auto/plugin_config.cpp
+++ b/src/plugins/auto/plugin_config.cpp
@@ -24,8 +24,8 @@ void PluginConfig::set_default() {
         std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM),
         std::make_tuple(ov::log::level, ov::log::Level::NO),
         std::make_tuple(ov::intel_auto::device_bind_buffer, false),
-        std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::UNDEFINED),
-        std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::UNDEFINED),
+        std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY),
+        std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
         std::make_tuple(ov::hint::num_requests, 0, UnsignedTypeValidator()),
         std::make_tuple(ov::intel_auto::enable_startup_fallback, true),
         std::make_tuple(ov::intel_auto::enable_runtime_fallback, true),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index c5c8eb3ea36b1b..a1fa42b82e877c 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -359,14 +359,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_AUTO_MULTI_ReturnDefaultHintTest,
                                             ::testing::ValuesIn(auto_multi_default_properties)));
 // For AUTO, User sets perf_hint, AUTO's perf_hint should not return default value LATENCY
 const std::vector<ov::AnyMap> default_auto_properties = {
-    {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
-    {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}};
-// For MULIT, User sets perf_hint or Affinity or num_streams or infer_num_threads, MULTI's perf_hint should
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}};
+// For MULTI, User sets perf_hint or Affinity or num_streams or infer_num_threads, MULTI's perf_hint should
 // not return default value THROUGHPUT
 // For Secondary property test about default hint is in auto_load_network_properties_test.cpp
 const std::vector<ov::AnyMap> default_multi_properties = {
     {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
-    {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
     {ov::affinity(ov::Affinity::NONE)},
     {ov::num_streams(ov::streams::AUTO)},
     {ov::inference_num_threads(1)}};
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index c9c870353105a5..550acd816448c1 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -13,7 +13,6 @@ namespace {
 const std::vector<ov::AnyMap> cpu_properties = {
         {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
         {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
-        {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests,
@@ -23,13 +22,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests,
         OVPropertiesTests::getTestCaseName);
 
 const std::vector<ov::AnyMap> multi_Auto_properties = {
-        {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
-        {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::execution_mode(ov::hint::ExecutionMode::UNDEFINED)},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::intel_auto::device_bind_buffer("YES")},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::intel_auto::device_bind_buffer("NO")},
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::intel_auto::enable_startup_fallback("YES")},
@@ -134,7 +131,7 @@ const std::vector<ov::AnyMap> default_properties = {
         {ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
         {ov::hint::allow_auto_batching(true)},
         {ov::auto_batch_timeout("1000")},
-        {ov::hint::execution_mode(ov::hint::ExecutionMode::UNDEFINED)},
+        {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
         {ov::intel_auto::device_bind_buffer(false)},
         {ov::intel_auto::enable_startup_fallback(true)},
         {ov::device::priorities("")}
diff --git a/src/plugins/intel_gna/src/gna_plugin_config.hpp b/src/plugins/intel_gna/src/gna_plugin_config.hpp
index 04542e361aaf52..8cb88f2caf0768 100644
--- a/src/plugins/intel_gna/src/gna_plugin_config.hpp
+++ b/src/plugins/intel_gna/src/gna_plugin_config.hpp
@@ -55,7 +55,7 @@ struct Config {
     static const InferenceEngine::Parameter GetImpactingModelCompilationProperties(bool compiled);
     static const InferenceEngine::Parameter GetSupportedProperties(bool compiled = false);
 
-    ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::UNDEFINED;
+    ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::LATENCY;
 
     // default precision of GNA hardware model
     ov::element::Type inference_precision = ov::element::undefined;
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
index 72b439430203b7..f00a189bae5b6c 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
@@ -225,7 +225,6 @@ INSTANTIATE_TEST_SUITE_P(
                           ov::hint::inference_precision(ov::element::i16),
                           ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
                           ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-                          ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED),
                           ov::hint::num_requests(1),
                           ov::intel_gna::execution_target(ov::intel_gna::HWGeneration::GNA_2_0),
                           ov::intel_gna::execution_target(ov::intel_gna::HWGeneration::GNA_3_0),
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 2e644951b566c6..f4afd4ad6d909b 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -151,9 +151,6 @@ TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedPerformanceHint) {
     ov::Core core;
     ov::hint::PerformanceMode mode;
 
-    OV_ASSERT_NO_THROW(mode = core.get_property("GNA", ov::hint::performance_mode));
-    ASSERT_EQ(ov::hint::PerformanceMode::UNDEFINED, mode);
-
     OV_ASSERT_NO_THROW(core.set_property("GNA", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)));
     OV_ASSERT_NO_THROW(mode = core.get_property("GNA", ov::hint::performance_mode));
     ASSERT_EQ(ov::hint::PerformanceMode::LATENCY, mode);
diff --git a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
index 62a1cba9f2271a..8ccfe3ad4300fa 100644
--- a/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_plugin_config_test.cpp
@@ -31,7 +31,7 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
     {GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
     {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
     {CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE},
-    {CONFIG_KEY(PERFORMANCE_HINT), "UNDEFINED"},
+    {CONFIG_KEY(PERFORMANCE_HINT), "LATENCY"},
     {CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), "1"},
     {ov::hint::execution_mode.name(), ov::util::to_string<ov::hint::ExecutionMode>(ov::hint::ExecutionMode::ACCURACY)}};
 IE_SUPPRESS_DEPRECATED_END
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index 7abc6e759b56c6..0e7bc8a9958ec1 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -26,10 +26,12 @@ class PerformanceModeValidator : public BaseValidator {
 public:
     bool is_valid(const ov::Any& v) const override {
         auto mode = v.as<ov::hint::PerformanceMode>();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT ||
                mode == ov::hint::PerformanceMode::THROUGHPUT ||
                mode == ov::hint::PerformanceMode::LATENCY ||
                mode == ov::hint::PerformanceMode::UNDEFINED;
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 };
 
diff --git a/src/plugins/template/src/config.hpp b/src/plugins/template/src/config.hpp
index 5a9732d382d5fe..15478074906755 100644
--- a/src/plugins/template/src/config.hpp
+++ b/src/plugins/template/src/config.hpp
@@ -33,7 +33,7 @@ struct Configuration {
     int device_id = 0;
     bool perf_count = true;
     ov::threading::IStreamsExecutor::Config streams_executor_config;
-    ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::UNDEFINED;
+    ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::LATENCY;
     bool disable_transformations = false;
 };
 // ! [configuration:header]
diff --git a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index eb5eac7405ded4..9fc7bf4b0dec34 100644
--- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -85,8 +85,8 @@ TEST(OVClassBasicTest, smoke_TEMPLATEGetSetConfigNoThrow) {
             std::cout << ov::enable_profiling.name() << " : " << core.get_property(device_name, ov::enable_profiling) << std::endl;
         } else if (ov::hint::performance_mode == property) {
             std::cout << "Default " << ov::hint::performance_mode.name() << " : " << core.get_property(device_name, ov::hint::performance_mode) << std::endl;
-            core.set_property(device_name, ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED));
-            ASSERT_EQ(ov::hint::PerformanceMode::UNDEFINED, core.get_property(device_name, ov::hint::performance_mode));
+            core.set_property(device_name, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
+            ASSERT_EQ(ov::hint::PerformanceMode::LATENCY, core.get_property(device_name, ov::hint::performance_mode));
             core.set_property(device_name, ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
             ASSERT_EQ(ov::hint::PerformanceMode::THROUGHPUT, core.get_property(device_name, ov::hint::performance_mode));
         }
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index c70581d99847ba..6d3c2571826f55 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -320,8 +320,6 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetPerformanceModeNoThrow) {
 
     std::cout << "Default PERFORMANCE_HINT: \"" << defaultMode << "\"" << std::endl;
 
-    ie.set_property(target_device, ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED));
-    ASSERT_EQ(ov::hint::PerformanceMode::UNDEFINED, ie.get_property(target_device, ov::hint::performance_mode));
     ie.set_property(target_device, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
     ASSERT_EQ(ov::hint::PerformanceMode::LATENCY, ie.get_property(target_device, ov::hint::performance_mode));
     ie.set_property(target_device, ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index 623388b36d80a8..479141520a742f 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -13,7 +13,6 @@ namespace {
 const std::vector<ov::AnyMap> gpu_properties = {
         {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
         {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
-        {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests,
@@ -24,8 +23,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests,
 
 auto auto_multi_properties = []() {
     return std::vector<ov::AnyMap>{
-        {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
-         ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
         {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
          ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
         {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
@@ -220,5 +217,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
                          ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_MULTI),
                                             ::testing::ValuesIn(multiExeDeviceConfigs)),
                          OVCompileModelGetExecutionDeviceTests::getTestCaseName);
-
 } // namespace
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index 4beca9da873252..c6566cac1a0114 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -605,8 +605,6 @@ TEST_P(OVClassSetExecutionModeHintConfigTest, SetConfigNoThrow) {
     ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::execution_mode));
     (void)defaultMode;
 
-    ie.set_property(target_device, ov::hint::execution_mode(ov::hint::ExecutionMode::UNDEFINED));
-    ASSERT_EQ(ov::hint::ExecutionMode::UNDEFINED, ie.get_property(target_device, ov::hint::execution_mode));
     ie.set_property(target_device, ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
     ASSERT_EQ(ov::hint::ExecutionMode::ACCURACY, ie.get_property(target_device, ov::hint::execution_mode));
     ie.set_property(target_device, ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE));
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index fb43dc1b4f0b41..6633a562c8507e 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -308,7 +308,7 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithComplieModelProps::getPropertiesVal
         res.push_back({{ov::PropertyName(ov::hint::model_priority.name(), ov::hint::model_priority.mutability), priority}});
     }
 
-    ov::hint::PerformanceMode performance_modes[] = {ov::hint::PerformanceMode::UNDEFINED , ov::hint::PerformanceMode::LATENCY,
+    ov::hint::PerformanceMode performance_modes[] = {ov::hint::PerformanceMode::LATENCY,
             ov::hint::PerformanceMode::THROUGHPUT, ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT};
     for (auto &performance_mode : performance_modes) {
         res.push_back({{ov::PropertyName(ov::hint::performance_mode.name(), ov::hint::performance_mode.mutability), performance_mode}});
@@ -319,8 +319,7 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithComplieModelProps::getPropertiesVal
     res.push_back({{ov::PropertyName(ov::hint::allow_auto_batching.name(), ov::hint::allow_auto_batching.mutability), true}});
     res.push_back({{ov::PropertyName(ov::hint::allow_auto_batching.name(), ov::hint::allow_auto_batching.mutability), false}});
 
-    ov::hint::ExecutionMode execution_modes[] = {ov::hint::ExecutionMode::UNDEFINED , ov::hint::ExecutionMode::PERFORMANCE,
-                                                 ov::hint::ExecutionMode::ACCURACY};
+    ov::hint::ExecutionMode execution_modes[] = {ov::hint::ExecutionMode::PERFORMANCE, ov::hint::ExecutionMode::ACCURACY};
     for (auto &execution_mode : execution_modes) {
         res.push_back({{ov::PropertyName(ov::hint::execution_mode.name(), ov::hint::execution_mode.mutability), execution_mode}});
     }

From 086ee93bcd3e9b2a21ca2cd1a796f72e7b71a43b Mon Sep 17 00:00:00 2001
From: totoka-intel <107121967+totoka-intel@users.noreply.github.com>
Date: Thu, 30 Mar 2023 12:40:20 +0200
Subject: [PATCH 173/296] [doc] Install guide openvino_2022 link location fix
 (#16572)

---
 docs/install_guides/installing-openvino-from-archive-linux.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/install_guides/installing-openvino-from-archive-linux.md b/docs/install_guides/installing-openvino-from-archive-linux.md
index 772b566103cd9d..ad602dc0802d85 100644
--- a/docs/install_guides/installing-openvino-from-archive-linux.md
+++ b/docs/install_guides/installing-openvino-from-archive-linux.md
@@ -126,13 +126,14 @@ Step 1: Download and Install the OpenVINO Core Components
    
    .. code-block:: sh
    
-      cd /opt/intel/openvino_2022.3.0/
+      cd /opt/intel/openvino_2022.3.0
       sudo -E ./install_dependencies/install_openvino_dependencies.sh 
 
 6. For simplicity, it is useful to create a symbolic link as below:
    
    .. code-block:: sh
    
+      cd /opt/intel
       sudo ln -s openvino_2022.3.0 openvino_2022
   
    .. note::

From 87365fa21d186a7e8f77fbe85b5bc52dfba1f2e5 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Thu, 30 Mar 2023 14:45:49 +0400
Subject: [PATCH 174/296] [CONFORMANCE] Parallelization over HW devices
 (#16431)

* init

* just fix version

* Update merge script

* remove extra code

* Uncomment correct func

* dd

* validate_nvidia

* Small refactoring

* Trigger linux build

* Update main.cpp

revert

* trigger

* fix build

* Update main.cpp
---
 .../layer_tests_summary/merge_xmls.py         | 30 ++++--
 .../layer_tests_summary/run_conformance.py    |  8 +-
 .../layer_tests_summary/run_parallel.py       | 96 ++++++++++++++-----
 .../layer_tests_summary/utils/constants.py    |  2 +-
 .../utils/get_available_devices.py            | 43 +++++++++
 5 files changed, 140 insertions(+), 39 deletions(-)
 create mode 100644 src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/get_available_devices.py

diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
index 5e1e8d01779363..3e5fbab1a9a09b 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
@@ -27,11 +27,13 @@ def parse_arguments():
     output_folders_help = "Path to folder to save report"
     output_filename_help = "Output report filename"
     report_type_help = "Report type: OP or API"
+    merge_device_id_help = "Merge all devices with suffix to one main device. Example: GPU.0 and GPU.1 -> GPU"
 
     parser.add_argument("-i", "--input_folders", help=input_folders_help, nargs="*", required=True)
     parser.add_argument("-o", "--output_folder", help=output_folders_help, default=".")
     parser.add_argument("-f", "--output_filename", help=output_filename_help, default="report")
     parser.add_argument("-t", "--report_type", help=report_type_help, default="OP")
+    parser.add_argument("-m", "--merge_device_id", help=merge_device_id_help, default=False)
 
     return parser.parse_args()
 
@@ -53,7 +55,8 @@ def update_result_node(xml_node: SubElement, aggregated_res: SubElement):
         aggregated_res.set(attr_name, str(xml_value + aggregated_value))
 
 
-def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, report_type: str):
+def aggregate_test_results(aggregated_results: SubElement, xml_reports: list,
+                           report_type: str, merge_device_suffix=False):
     aggregated_timestamp = None
     for xml in xml_reports:
         # logger.info(f" Processing: {xml}")
@@ -67,16 +70,22 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re
         if aggregated_timestamp is None or xml_timestamp < aggregated_timestamp:
             aggregated_timestamp = xml_timestamp
         for xml_device_entry in xml_results:
+            device_name = xml_device_entry.tag
+            if merge_device_suffix and "." in xml_device_entry.tag:
+                device_name = xml_device_entry.tag[:xml_device_entry.tag.find("."):]
+                new_data = ET.tostring(xml_device_entry).decode('utf8').replace(xml_device_entry.tag, device_name)
+                xml_device_entry = ET.fromstring(new_data)
             aggregated_device_results = aggregated_results.find(xml_device_entry.tag)
-            if aggregated_device_results is None:
-                aggregated_results.append(xml_device_entry)
-                aggregated_device_results = aggregated_results.find(xml_device_entry.tag)
-            # op or api_type
             for xml_results_entry in xml_device_entry:
-                aggregated_results_entry = aggregated_device_results.find(xml_results_entry.tag)
+                aggregated_results_entry = None
+                if not aggregated_device_results is None:
+                    aggregated_results_entry = aggregated_device_results.find(xml_results_entry.tag)
                 if aggregated_results_entry is None:
                     stat_update_utils.update_rel_values(xml_results_entry)
-                    aggregated_device_results.append(xml_results_entry)
+                    if aggregated_device_results is None:
+                        aggregated_results.append(xml_device_entry)
+                    else:
+                        aggregated_device_results.append(xml_results_entry)
                     continue
                 if report_type == "OP":
                     update_result_node(xml_results_entry, aggregated_results_entry)
@@ -91,7 +100,8 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re
     return aggregated_timestamp
 
 
-def merge_xml(input_folder_paths: list, output_folder_paths: str, output_filename: str, report_type: str):
+def merge_xml(input_folder_paths: list, output_folder_paths: str, output_filename: str,
+              report_type: str, merge_device_suffix=False):
     logger.info(f" Processing is finished")
 
     summary = Element("report")
@@ -133,7 +143,7 @@ def merge_xml(input_folder_paths: list, output_folder_paths: str, output_filenam
         for entity in xml_root.find(entity_name):
             if entity_list.find(entity.tag) is None:
                 SubElement(entity_list, entity.tag)
-        timestamp = aggregate_test_results(results, xml_reports, report_type)
+        timestamp = aggregate_test_results(results, xml_reports, report_type, merge_device_suffix)
         if report_type == "OP":
             stat_update_utils.update_passrates(results)
         else:
@@ -154,4 +164,4 @@ def merge_xml(input_folder_paths: list, output_folder_paths: str, output_filenam
 
 if __name__ == "__main__":
     arguments = parse_arguments()
-    merge_xml(arguments.input_folders, arguments.output_folder, arguments.output_filename, arguments.report_type)
+    merge_xml(arguments.input_folders, arguments.output_folder, arguments.output_filename, arguments.report_type, arguments.merge_device_id)
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
index 8ab653652c41ab..fac5835c8ccba7 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
@@ -19,12 +19,12 @@
 from utils import file_utils
 
 logger = get_logger('conformance_runner')
-is_hash = True
+has_python_api = True
 try:
     from rename_conformance_ir import create_hash
 except:
     logger.warning("Please set the above env variable to get the same conformance ir names run by run!")
-    is_hash = False
+    has_python_api = False
 
 API_CONFORMANCE_BIN_NAME = "apiConformanceTests"
 OP_CONFORMANCE_BIN_NAME = "conformanceTests"
@@ -135,7 +135,7 @@ def __dump_subgraph(self):
             logger.error("Process failed on step: 'Subgraph dumping'")
             exit(-1)
         self._model_path = conformance_ir_path
-        if is_hash:
+        if has_python_api:
             create_hash(Path(self._model_path))
             logger.info(f"All conformance IRs in {self._ov_bin_path} were renamed based on hash")
         else:
@@ -173,7 +173,7 @@ def __run_conformance(self):
 
         final_report_name = f'report_{self._type.lower()}'
         # API Conformance contains both report type
-        merge_xml([parallel_report_dir], report_dir, final_report_name, self._type)
+        merge_xml([parallel_report_dir], report_dir, final_report_name, self._type, True)
         if self._type == constants.API_CONFORMANCE:
             final_op_report_name = f'report_{constants.OP_CONFORMANCE.lower()}'
             merge_xml([parallel_report_dir], report_dir, final_op_report_name, constants.OP_CONFORMANCE.lower())
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
index 62e7111372ea3f..9fa3983cdc220d 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
@@ -13,7 +13,6 @@
 import os
 import sys
 import threading
-import platform
 import csv
 import datetime
 import shlex
@@ -23,14 +22,20 @@
 else:
     import thread
 
+has_python_api = True
+logger = get_logger('test_parallel_runner')
+try:
+    from utils.get_available_devices import get_available_devices
+except:
+    logger.warning("Please set the above env variable to get the same conformance ir names run by run!")
+    has_python_api = False
+
 FILENAME_LENGTH = 255
 LOG_NAME_REPLACE_STR = "##NAME##"
 DEFAULT_PROCESS_TIMEOUT = 3600
 DEFAULT_TEST_TIMEOUT = 900
 MAX_LENGHT = 4096 if not constants.IS_WIN else 8191
 
-logger = get_logger('test_parallel_runner')
-
 def parse_arguments():
     parser = ArgumentParser()
     exec_file_path_help = "Path to the test executable file"
@@ -38,10 +43,12 @@ def parse_arguments():
     worker_num_help = "Worker number. Default value is `cpu_count-1` "
     working_dir_num_help = "Working dir"
     process_timeout_help = "Process timeout in s"
+    parallel_help = "Parallel over HW devices. For example run tests over GPU.0, GPU.1 and etc"
 
     parser.add_argument("-e", "--exec_file", help=exec_file_path_help, type=str, required=True)
     parser.add_argument("-c", "--cache_path", help=cache_path_help, type=str, required=False, default="")
     parser.add_argument("-j", "--workers", help=worker_num_help, type=int, required=False, default=(os.cpu_count() - 1) if os.cpu_count() > 2 else 1)
+    parser.add_argument("-p", "--parallel_devices", help=parallel_help, type=int, required=False, default=1)
     parser.add_argument("-w", "--working_dir", help=working_dir_num_help, type=str, required=False, default=".")
     parser.add_argument("-t", "--process_timeout", help=process_timeout_help, type=int, required=False, default=DEFAULT_PROCESS_TIMEOUT)
     return parser.parse_args()
@@ -55,6 +62,21 @@ def get_test_command_line_args():
             break
     return command_line_args
 
+def get_device_by_args(args: list):
+    device = None
+    is_device = False
+    for argument in args:
+        if "--device" in argument:
+            is_device = True
+            if argument.find("=") == -1:
+                continue
+            device = argument[argument.find("=")+1:]
+            break
+        if is_device and argument[0] != "-":
+            device = argument
+            break
+    return device
+
 # Class to read test cache    
 class TestStructure:
     _name = ""
@@ -67,7 +89,7 @@ def __init__(self, name, time):
 class TaskManager:
     process_timeout = -1
 
-    def __init__(self, command_list:list, working_dir: os.path, prev_run_cmd_length = 0):
+    def __init__(self, command_list:list, working_dir: os.path, prev_run_cmd_length=0, device=None, available_devices=list()):
         self._command_list = command_list
         self._process_list = list()
         self._workers = list()
@@ -75,6 +97,14 @@ def __init__(self, command_list:list, working_dir: os.path, prev_run_cmd_length
         self._log_filename = os.path.join(working_dir, f"log_{LOG_NAME_REPLACE_STR}.log")
         self._prev_run_cmd_length = prev_run_cmd_length
         self._idx = 0
+        self._device = device
+        if self._device is None:
+            self._device = "NOT_AFFECTED_BY_DEVICE"
+        if len(available_devices) > 0:
+            self._available_devices = available_devices
+        else:
+            self._available_devices = [self._device]
+        self._device_cnt = len(self._available_devices)
 
     def __create_thread(self, func):
         thread = threading.Thread(target=func)
@@ -86,19 +116,23 @@ def init_worker(self):
         if len(self._command_list) <= self._idx:
             logger.warning(f"Skip worker initialiazation. Command list lenght <= worker index")
             return
-        log_file_name = self._log_filename.replace(LOG_NAME_REPLACE_STR, str(self._idx + self._prev_run_cmd_length))
-        with open(log_file_name, "w") as log_file:
-            args = self._command_list[self._idx]
-            if not constants.IS_WIN:
-                args = shlex.split(self._command_list[self._idx])
-            worker = self.__create_thread(
-                self._process_list.append(Popen(args, shell=constants.IS_WIN, stdout=log_file, stderr=log_file)))
-            self._workers.append(worker)
-            worker.join()
-            self._timers.append(datetime.datetime.now())
-            log_file.close()
-        # logger.info(f"{self._idx}/{len(self._command_list)} is started")
-        self._idx += 1
+        if self._device_cnt == 0:
+            logger.error(f"Empty available devices! Check your device!")
+            exit(-1)
+        for target_device in self._available_devices:
+            log_file_name = self._log_filename.replace(LOG_NAME_REPLACE_STR, str(self._idx + self._prev_run_cmd_length))
+            with open(log_file_name, "w") as log_file:
+                args = self._command_list[self._idx].replace(self._device, target_device)
+                if not constants.IS_WIN:
+                    args = shlex.split(args)
+                worker = self.__create_thread(
+                    self._process_list.append(Popen(args, shell=constants.IS_WIN, stdout=log_file, stderr=log_file)))
+                self._workers.append(worker)
+                worker.join()
+                self._timers.append(datetime.datetime.now())
+                log_file.close()
+            # logger.info(f"{self._idx}/{len(self._command_list)} is started")
+            self._idx += 1
     
     def __find_free_process(self):
         while True:
@@ -108,24 +142,25 @@ def __find_free_process(self):
                         logger.warning(f"Process {pid} exceed time limetattion per process")
                         self._process_list[pid].kill()
                     self._process_list[pid].wait(timeout=0)
+                    device = get_device_by_args(self._process_list[pid].args)
                     # logger.info(f"{self._idx}/{len(self._command_list)} is started")
-                    return pid
+                    return pid, device
                 except TimeoutExpired:
                     continue
 
-    def __update_process(self, pid:int, log_file):
-        args = self._command_list[self._idx]
+    def __update_process(self, pid:int, log_file, device):
+        args = self._command_list[self._idx].replace(self._device, device)
         if not constants.IS_WIN:
-            args = shlex.split(self._command_list[self._idx])
+            args = shlex.split(args)
         self._process_list[pid] = Popen(args, shell=constants.IS_WIN, stdout=log_file, stderr=log_file)
 
     def update_worker(self):
         if self._idx >= len(self._command_list):
             return False
-        pid = self.__find_free_process()
+        pid, device = self.__find_free_process()
         log_file_name = self._log_filename.replace(LOG_NAME_REPLACE_STR, str(self._idx + self._prev_run_cmd_length))
         with open(log_file_name, "w") as log_file:
-            self._workers[pid] = self.__create_thread(self.__update_process(pid, log_file))
+            self._workers[pid] = self.__create_thread(self.__update_process(pid, log_file, device))
             self._workers[pid].join()
             self._timers[pid] = datetime.datetime.now()
         self._idx += 1
@@ -165,6 +200,12 @@ def __init__(self, exec_file_path: os.path, test_command_line: list, worker_num:
         self._is_save_cache = True
         self._disabled_tests = list()
         self._total_test_cnt = 0
+        self._available_devices = None
+        self._device = get_device_by_args(self._command.split())
+        if has_python_api:
+            self._available_devices = get_available_devices(self._device)
+        else:
+            self._available_devices = [self._device] if not self._device is None else []
 
     def __init_basic_command_line_for_exec_file(self, test_command_line: list):
         command = f'{self._exec_file_path}'
@@ -350,7 +391,7 @@ def __get_filters(self):
         
     def __execute_tests(self, filters: list(), prev_worker_cnt = 0):
         commands = [f'{self._command} --gtest_filter={filter}' for filter in filters]
-        task_manager = TaskManager(commands, self._working_dir, prev_worker_cnt)
+        task_manager = TaskManager(commands, self._working_dir, prev_worker_cnt, self._device, self._available_devices)
         for _ in progressbar(range(self._worker_num), "Worker initialization: ", 40):
             task_manager.init_worker()
         for _ in progressbar(range(len(commands) - self._worker_num), "Worker execution: ", 40):
@@ -362,6 +403,8 @@ def run(self):
         if TaskManager.process_timeout == -1:
             TaskManager.process_timeout = DEFAULT_PROCESS_TIMEOUT
         logger.info(f"Run test parallel is started. Worker num is {self._worker_num}")
+        if len(self._available_devices) > 1:
+            logger.info(f"Tests will be run over devices: {self._available_devices} instead of {self._device}")
         t_start = datetime.datetime.now()
         
         filters_cache, filters_runtime = self.__get_filters()
@@ -436,6 +479,11 @@ def __save_log(logs_dir, dir, test_name):
                         test_cnt_expected = line.count(':')
                     if constants.RUN in line:
                         test_name = line[line.find(constants.RUN) + len(constants.RUN) + 1:-1:]
+                        if self._device != None and self._available_devices != None:
+                            for device_name in self._available_devices:
+                                if device_name in test_name:
+                                    test_name = test_name.replace(device_name, self._device)
+                                    break
                     if constants.REF_COEF in line:
                         ref_k = float(line[line.rfind(' ') + 1:])
                     if dir is None:
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
index 851fc57fc97d97..88132cc6e692e0 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@@ -22,7 +22,7 @@
 ENV_SEPARATOR = ";" if IS_WIN else ":"
 PYTHON_NAME = "python" if IS_WIN else "python3"
 PIP_NAME = "pip" if IS_WIN else "pip3"
-LD_LIB_PATH_NAME = "PATH" if IS_WIN or platform == "darwin" else "LD_LIBRARY_PATH"
+LD_LIB_PATH_NAME = "PATH" if IS_WIN else "LD_LIBRARY_PATH"
 
 OPENVINO_NAME = 'openvino'
 PY_OPENVINO = "python_api"
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/get_available_devices.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/get_available_devices.py
new file mode 100644
index 00000000000000..8667a7e21ca9cb
--- /dev/null
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/get_available_devices.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+
+try:
+    from openvino.runtime import Core
+except:
+    from utils.file_utils import get_ov_path, find_latest_dir
+    import os
+    from utils.constants import PY_OPENVINO, LD_LIB_PATH_NAME
+    from utils.conformance_utils import get_logger, set_env_variable
+
+    logger = get_logger("get_available_device")
+
+    script_dir, _ = os.path.split(os.path.abspath(__file__))
+    ov_bin_path = get_ov_path(script_dir, None, True)
+    if PY_OPENVINO in os.listdir(ov_bin_path):
+        env = os.environ
+        py_ov = os.path.join(ov_bin_path, PY_OPENVINO)
+        py_ov = os.path.join(py_ov, find_latest_dir(py_ov))
+
+        env = set_env_variable(env, "PYTHONPATH", py_ov)
+        env = set_env_variable(env, LD_LIB_PATH_NAME, ov_bin_path)
+        logger.warning("Set the following env varibles to rename conformance ir based on hash: ")
+        logger.warning(f'PYTHONPATH={env["PYTHONPATH"]}')
+        logger.warning(f'{LD_LIB_PATH_NAME}={env[LD_LIB_PATH_NAME]}')
+        exit(0)
+    else:
+        logger.error(f'Impossible to run the tool! PyOpenVINO was not built!')
+        exit(-1)
+
+def get_available_devices(target_device = None, exclude_device = None):
+    result = list()
+    core = Core()
+    if exclude_device is None:
+        exclude_device = "NOT_EXISTED_DEVICE"
+    for device in core.available_devices:
+        if target_device is None or target_device in device:
+            if exclude_device in device:
+                continue
+            result.append(device)
+    return result

From 7983e00b00e4628aafa2e21b7d5b72200352109d Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 30 Mar 2023 13:05:53 +0200
Subject: [PATCH 175/296] DOCS shift to rst - `Cutting Off Parts of a Model`
 article (#16640)

---
 .../prepare_model/Model_Optimizer_FAQ.md      |   8 +-
 .../convert_model/Cutting_Model.md            | 769 ++++++++++--------
 .../images}/inception_v1_first_block.svg      |   0
 .../images}/inception_v1_std_input.svg        |   0
 .../images}/inception_v1_std_output.svg       |   0
 5 files changed, 427 insertions(+), 350 deletions(-)
 rename docs/{MO_DG/img => _static/images}/inception_v1_first_block.svg (100%)
 rename docs/{MO_DG/img => _static/images}/inception_v1_std_input.svg (100%)
 rename docs/{MO_DG/img => _static/images}/inception_v1_std_output.svg (100%)

diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
index 855e124fab80fa..9f0d1a478a53fd 100644
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@@ -240,7 +240,13 @@ However, if your model contains more than one input, Model Optimizer is able to
 
 **A** : This error occurs when an incorrect combination of the `--input` and `--input_shape` command line options is used. Using both `--input` and `--input_shape` is valid only if `--input` points to the `Placeholder` node, a node with one input port or `--input` has the form `PORT:NODE`, where `PORT` is an integer port index of input for node `NODE`. Otherwise, the combination of `--input` and `--input_shape` is incorrect.
 
-@anchor FAQ30
+@sphinxdirective
+
+.. _question-30:
+
+@endsphinxdirective
+
+
 #### Q30. What does the message "Input port > 0 in --input is not supported if --input_shape is not provided. Node: NAME_OF_THE_NODE. Omit port index and all input ports will be replaced by placeholders. Or provide --input_shape" mean?
 
 **A** : When using the `PORT:NODE` notation for the `--input` command line argument and `PORT` > 0, you should specify `--input_shape` for this input. This is a limitation of the current Model Optimizer implementation.
diff --git a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
index 9f3117af1f317d..55cd3679351bfd 100644
--- a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
@@ -1,391 +1,462 @@
 # Cutting Off Parts of a Model  {#openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model}
 
+@sphinxdirective
+
 Sometimes, it is necessary to remove parts of a model when converting it to OpenVINO IR. This chapter describes how to do it, using Model Optimizer command-line options. Model cutting applies mostly to TensorFlow models, which is why TensorFlow will be used in this chapter's examples, but it may be also useful for other frameworks.
 
-## Purpose of Model Cutting
+Purpose of Model Cutting
+########################
 
 The following examples are the situations when model cutting is useful or even required:
 
-*   A model has pre- or post-processing parts that cannot be translated to existing OpenVINO operations.
-*   A model has a training part that is convenient to be kept in the model but not used during inference.
-*   A model is too complex be converted at once, because it contains a lot of unsupported operations that cannot be easily implemented as custom layers.
-*   A problem occurs with model conversion in Model Optimizer or inference in OpenVINO™ Runtime. To identify the issue, limit the conversion scope by iterative search for problematic areas in the model.
-*   A single custom layer or a combination of custom layers is isolated for debugging purposes.
+* A model has pre- or post-processing parts that cannot be translated to existing OpenVINO operations.
+* A model has a training part that is convenient to be kept in the model but not used during inference.
+* A model is too complex be converted at once, because it contains a lot of unsupported operations that cannot be easily implemented as custom layers.
+* A problem occurs with model conversion in Model Optimizer or inference in OpenVINO™ Runtime. To identify the issue, limit the conversion scope by iterative search for problematic areas in the model.
+* A single custom layer or a combination of custom layers is isolated for debugging purposes.
+
+Command-Line Options
+####################
 
-## Command-Line Options
+Model Optimizer provides command line options ``--input`` and ``--output`` to specify new entry and exit nodes, while ignoring the rest of the model:
 
-Model Optimizer provides command line options `--input` and `--output` to specify new entry and exit nodes, while ignoring the rest of the model:
+* ``--input`` option accepts a comma-separated list of layer names of the input model that should be treated as new entry points to the model.
+* ``--output`` option accepts a comma-separated list of layer names of the input model that should be treated as new exit points from the model.
 
-*   `--input` option accepts a comma-separated list of layer names of the input model that should be treated as new entry points to the model.
-*   `--output` option accepts a comma-separated list of layer names of the input model that should be treated as new exit points from the model.
+The ``--input`` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and ``--input_shape`` or ``--mean_values`` options are used, the ``--input`` option specifies the order of input nodes for correct mapping between multiple items provided in ``--input_shape`` and ``--mean_values`` and the inputs in the model.
 
-The `--input` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and `--input_shape` or `--mean_values` options are used, the `--input` option specifies the order of input nodes for correct mapping between multiple items provided in `--input_shape` and `--mean_values` and the inputs in the model. 
+Model cutting is illustrated with the Inception V1 model, found in the ``models/research/slim`` repository. To proceed with this chapter, make sure you do the necessary steps to :doc:`prepare the model for Model Optimizer <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
 
-Model cutting is illustrated with the Inception V1 model, found in the `models/research/slim` repository. To proceed with this chapter, make sure you do the necessary steps to [prepare the model for Model Optimizer](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model).
+Default Behavior without --input and --output
+#############################################
 
-## Default Behavior without --input and --output
+The input model is converted as a whole if neither ``--input`` nor ``--output`` command line options are used. All ``Placeholder`` operations in a TensorFlow graph are automatically identified as entry points. The ``Input`` layer type is generated for each of them. All nodes that have no consumers are automatically identified as exit points.
 
-The input model is converted as a whole if neither `--input` nor `--output` command line options are used. All `Placeholder` operations in a TensorFlow graph are automatically identified as entry points. The `Input` layer type is generated for each of them. All nodes that have no consumers are automatically identified as exit points.
+For Inception_V1, there is one ``Placeholder``: input. If the model is viewed in TensorBoard, the input operation is easy to find:
 
-For Inception_V1, there is one `Placeholder`: input. If the model is viewed in TensorBoard, the input operation is easy to find:
- 
-![Placeholder in Inception V1](../../img/inception_v1_std_input.svg)
+.. image:: _static/images/inception_v1_std_input.svg
+   :alt: Placeholder in Inception V1
 
-`Reshape` is the only output operation, which is enclosed in a nested name scope of `InceptionV1/Logits/Predictions`, under the full name of `InceptionV1/Logits/Predictions/Reshape_1`.
+``Reshape`` is the only output operation, which is enclosed in a nested name scope of ``InceptionV1/Logits/Predictions``, under the full name of ``InceptionV1/Logits/Predictions/Reshape_1``.
 
 In TensorBoard, along with some of its predecessors, it looks as follows:
 
-![TensorBoard with predecessors](../../img/inception_v1_std_output.svg)
+.. image:: _static/images/inception_v1_std_output.svg
+   :alt: TensorBoard with predecessors
 
 Convert this model and put the results in a writable output directory:
-```sh
-mo --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
-```
-(The other examples on this page assume that you first go to the `model_optimizer` directory and add the `--output_dir` argument with a directory where you have read/write permissions.)
-
-The output `.xml` file with an Intermediate Representation contains the `Input` layer among other layers in the model:
-```xml
-<layer id="286" name="input" precision="FP32" type="Input">
-    <output>
-        <port id="0">
-            <dim>1</dim>
-            <dim>3</dim>
-            <dim>224</dim>
-            <dim>224</dim>
-        </port>
-    </output>
-</layer>
-```
-The `input` layer is converted from the TensorFlow graph `Placeholder` operation `input` and has the same name.
-
-The `-b` option is used here for conversion to override a possible undefined batch size (coded as -1 in TensorFlow models). If a model was frozen with a defined batch size, you may omit this option in all the examples.
-
-The last layer in the model is `InceptionV1/Logits/Predictions/Reshape_1`, which matches an output operation in the TensorFlow graph:
-```xml
-<layer id="389" name="InceptionV1/Logits/Predictions/Reshape_1" precision="FP32" type="Reshape">
-    <data axis="0" dim="1,1001" num_axes="-1"/>
-    <input>
-        <port id="0">
-            <dim>1</dim>
-            <dim>1001</dim>
-        </port>
-    </input>
-    <output>
-        <port id="1">
-            <dim>1</dim>
-            <dim>1001</dim>
-        </port>
-    </output>
-</layer>
-```
-Due to automatic identification of inputs and outputs, providing the `--input` and `--output` options to convert the whole model is not required. The following commands are equivalent for the Inception V1 model:
-```sh
-mo --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
-
-mo --input_model inception_v1.pb -b 1 --input input --output InceptionV1/Logits/Predictions/Reshape_1 --output_dir <OUTPUT_MODEL_DIR>
-```
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
+
+
+(The other examples on this page assume that you first go to the ``model_optimizer`` directory and add the ``--output_dir`` argument with a directory where you have read/write permissions.)
+
+The output ``.xml`` file with an Intermediate Representation contains the ``Input`` layer among other layers in the model:
+
+.. code-block:: xml
+
+   <layer id="286" name="input" precision="FP32" type="Input">
+       <output>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>224</dim>
+               <dim>224</dim>
+           </port>
+       </output>
+   </layer>
+
+
+The ``input`` layer is converted from the TensorFlow graph ``Placeholder`` operation ``input`` and has the same name.
+
+The ``-b`` option is used here for conversion to override a possible undefined batch size (coded as -1 in TensorFlow models). If a model was frozen with a defined batch size, you may omit this option in all the examples.
+
+The last layer in the model is ``InceptionV1/Logits/Predictions/Reshape_1``, which matches an output operation in the TensorFlow graph:
+
+.. code-block:: xml
+
+   <layer id="389" name="InceptionV1/Logits/Predictions/Reshape_1" precision="FP32" type="Reshape">
+       <data axis="0" dim="1,1001" num_axes="-1"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>1001</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>1001</dim>
+           </port>
+       </output>
+   </layer>
+
+
+Due to automatic identification of inputs and outputs, providing the ``--input`` and ``--output`` options to convert the whole model is not required. The following commands are equivalent for the Inception V1 model:
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
+
+   mo --input_model inception_v1.pb -b 1 --input input --output InceptionV1/Logits/Predictions/Reshape_1 --output_dir <OUTPUT_MODEL_DIR>
+
+
 The Intermediate Representations are identical for both conversions. The same is true if the model has multiple inputs and/or outputs.
 
-## Model Cutting
+Model Cutting
+####################
 
-Now, consider how to cut some parts of the model off. This chapter describes the first convolution block `InceptionV1/InceptionV1/Conv2d_1a_7x7` of the Inception V1 model to illustrate cutting: 
+Now, consider how to cut some parts of the model off. This chapter describes the first convolution block ``InceptionV1/InceptionV1/Conv2d_1a_7x7`` of the Inception V1 model to illustrate cutting:
 
-![Inception V1 first convolution block](../../img/inception_v1_first_block.svg)
+.. image:: _static/images/inception_v1_first_block.svg
+   :alt: Inception V1 first convolution block
 
-### Cutting at the End
+Cutting at the End
+++++++++++++++++++++
 
 If you want to cut your model at the end, you have the following options:
 
-1. The following command cuts off the rest of the model after the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`, making this node the last in the model:
-   ```sh
-   mo --input_model inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-   ```
+1. The following command cuts off the rest of the model after the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu``, making this node the last in the model:
+
+   .. code-block:: sh
+
+      mo --input_model inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
+
+
    The resulting Intermediate Representation has three layers:
-```xml
-<?xml version="1.0" ?>
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="3" name="input" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="5" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="2" pad-y="2" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="3">...</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="37632"/>
-				<biases offset="37632" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="6" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="1">...</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="3" from-port="0" to-layer="5" to-port="0"/>
-		<edge from-layer="5" from-port="3" to-layer="6" to-port="0"/>
-	</edges>
-</net>
-```
-   As shown in the TensorBoard picture, the original model has more nodes than its Intermediate Representation. Model Optimizer has fused batch normalization `InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm` with convolution `InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`, which is why it is not present in the final model. This is not an effect of the `--output` option, it is the typical behavior of Model Optimizer for batch normalizations and convolutions. The effect of the `--output` is that the `ReLU` layer becomes the last one in the converted model.
-
-2. The following command cuts the edge that comes from 0 output port of the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` and the rest of the model, making this node the last one in the model:
-   ```sh
-   mo --input_model inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0 --output_dir <OUTPUT_MODEL_DIR>
-   ```
+
+   .. code-block:: xml
+
+      <?xml version="1.0" ?>
+      <net batch="1" name="model" version="2">
+         <layers>
+            <layer id="3" name="input" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="5" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution" precision="FP32" type="Convolution">
+               <data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="2" pad-y="2" stride="1,1,2,2" stride-x="2" stride-y="2"/>
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="3">...</port>
+               </output>
+               <blobs>
+                  <weights offset="0" size="37632"/>
+                  <biases offset="37632" size="256"/>
+               </blobs>
+            </layer>
+            <layer id="6" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="1">...</port>
+               </output>
+            </layer>
+         </layers>
+         <edges>
+            <edge from-layer="3" from-port="0" to-layer="5" to-port="0"/>
+            <edge from-layer="5" from-port="3" to-layer="6" to-port="0"/>
+         </edges>
+      </net>
+
+
+   As shown in the TensorBoard picture, the original model has more nodes than its Intermediate Representation. Model Optimizer has fused batch normalization ``InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm`` with convolution ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution``, which is why it is not present in the final model. This is not an effect of the ``--output`` option, it is the typical behavior of Model Optimizer for batch normalizations and convolutions. The effect of the ``--output`` is that the ``ReLU`` layer becomes the last one in the converted model.
+
+2. The following command cuts the edge that comes from 0 output port of the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` and the rest of the model, making this node the last one in the model:
+
+   .. code-block::
+
+      mo --input_model inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0 --output_dir <OUTPUT_MODEL_DIR>
+
+
    The resulting Intermediate Representation has three layers, which are the same as in the previous case:
-```xml
-<?xml version="1.0" ?>
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="3" name="input" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="5" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="2" pad-y="2" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="3">...</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="37632"/>
-				<biases offset="37632" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="6" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="1">...</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="3" from-port="0" to-layer="5" to-port="0"/>
-		<edge from-layer="5" from-port="3" to-layer="6" to-port="0"/>
-	</edges>
-</net>
-```
+
+   .. code-block:: xml
+
+      <?xml version="1.0" ?>
+      <net batch="1" name="model" version="2">
+         <layers>
+            <layer id="3" name="input" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="5" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution" precision="FP32" type="Convolution">
+               <data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="2" pad-y="2" stride="1,1,2,2" stride-x="2" stride-y="2"/>
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="3">...</port>
+               </output>
+               <blobs>
+                  <weights offset="0" size="37632"/>
+                  <biases offset="37632" size="256"/>
+               </blobs>
+            </layer>
+            <layer id="6" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="1">...</port>
+               </output>
+            </layer>
+         </layers>
+         <edges>
+         	<edge from-layer="3" from-port="0" to-layer="5" to-port="0"/>
+         	<edge from-layer="5" from-port="3" to-layer="6" to-port="0"/>
+         </edges>
+      </net>
+
+
    This type of cutting is useful for cutting multiple output edges.
 
-3. The following command cuts the edge that comes to 0 input port of the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` and the rest of the model including `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`, deleting this node and making the previous node `InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D` the last in the model:
-   ```sh
-   mo --input_model inception_v1.pb -b 1 --output=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-   ```
+3. The following command cuts the edge that comes to 0 input port of the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` and the rest of the model including ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu``, deleting this node and making the previous node ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D`` the last in the model:
+
+   .. code-block:: sh
+
+      mo --input_model inception_v1.pb -b 1 --output=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
+
+
    The resulting Intermediate Representation has two layers, which are the same as the first two layers in the previous case:
-```xml
-<?xml version="1.0" ?>
-<net batch="1" name="inception_v1" version="2">
-	<layers>
-		<layer id="0" name="input" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="1" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D" precision="FP32" type="Convolution">
-			<data auto_pad="same_upper" dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-b="3" pad-r="3" pad-x="2" pad-y="2" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="3">...</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="37632"/>
-				<biases offset="37632" size="256"/>
-			</blobs>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-	</edges>
-</net>
-```
-
-### Cutting from the Beginning
-
-If you want to go further and cut the beginning of the model, leaving only the `ReLU` layer, you have the following options:
-
-1.  Use the following command line, where `--input` and `--output` specify the same node in the graph:
-   ```sh
-   mo --input_model=inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-   ```
+
+   .. code-block:: xml
+
+      <?xml version="1.0" ?>
+      <net batch="1" name="inception_v1" version="2">
+         <layers>
+            <layer id="0" name="input" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="1" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D" precision="FP32" type="Convolution">
+               <data auto_pad="same_upper" dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-b="3" pad-r="3" pad-x="2" pad-y="2" stride="1,1,2,   2"       stride-x="2" stride-y="2"/>
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="3">...</port>
+               </output>
+               <blobs>
+                  <weights offset="0" size="37632"/>
+                  <biases offset="37632" size="256"/>
+               </blobs>
+            </layer>
+         </layers>
+         <edges>
+            <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+         </edges>
+      </net>
+
+
+Cutting from the Beginning
+++++++++++++++++++++++++++
+
+If you want to go further and cut the beginning of the model, leaving only the ``ReLU`` layer, you have the following options:
+
+1. Use the following command line, where ``--input`` and ``--output`` specify the same node in the graph:
+
+   .. code-block:: sh
+
+      mo --input_model=inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
+
    The resulting Intermediate Representation looks as follows:
-```xml
-<xml version="1.0">
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="2" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="1">...</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-	</edges>
-</net>
-```
-   `Input` layer is automatically created to feed the layer that is converted from the node specified in `--input`, which is `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` in this case. Model Optimizer does not replace the `ReLU` node by the `Input` layer. It produces such Intermediate Representation to make the node the first executable node in the final Intermediate Representation. Therefore, Model Optimizer creates enough `Inputs` to feed all input ports of the node that is passed in `--input`.<br>
-Even though `--input_shape` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape [1,64,112,112] as the model converted as a whole or without cutting off the beginning.
-
-2. Cut the edge incoming to layer by port number. To specify the incoming port, use the following notation `--input=port:input_node`. 
-To cut everything before `ReLU` layer, cut the edge incoming to port 0 of `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` node:
-   ```sh
-   mo --input_model inception_v1.pb -b 1 --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-   ```
+
+   .. code-block:: xml
+
+      <xml version="1.0">
+      <net batch="1" name="model" version="2">
+         <layers>
+            <layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="2" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="1">...</port>
+               </output>
+            </layer>
+         </layers>
+         <edges>
+            <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+         </edges>
+      </net>
+
+
+   ``Input`` layer is automatically created to feed the layer that is converted from the node specified in ``--input``, which is ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` in this case. Model Optimizer does not replace the ``ReLU`` node by the ``Input`` layer. It produces such Intermediate Representation to make the node the first executable node in the final Intermediate Representation. Therefore, Model Optimizer creates enough ``Inputs`` to feed all input ports of the node that is passed in ``--input``.
+
+   Even though ``--input_shape`` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape ``[1,64,112,112]`` as the model converted as a whole or without cutting off the beginning.
+
+2. Cut the edge incoming to layer by port number. To specify the incoming port, use the following notation ``--input=port:input_node``. To cut everything before ``ReLU`` layer, cut the edge incoming to port 0 of ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` node:
+
+   .. code-block:: sh
+
+      mo --input_model inception_v1.pb -b 1 --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
+
    The resulting Intermediate Representation looks as follows:
-```xml
-<xml version="1.0">
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="2" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="1">...</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-	</edges>
-</net>
-```
-   `Input` layer is automatically created to feed the layer that is converted from the node specified in `--input`, which is `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` in this case. Model Optimizer does not replace the `ReLU` node by the `Input` layer, it produces such Intermediate Representation to make the node be the first executable node in the final Intermediate Representation. Therefore, Model Optimizer creates enough `Inputs` to feed all input ports of the node that is passed in `--input`.<br>
-Even though `--input_shape` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape [1,64,112,112] as the model converted as a whole or without cutting off the beginning.
-
-3. Cut edge outcoming from layer by port number. To specify the outcoming port, use the following notation `--input=input_node:port`.
-To cut everything before `ReLU` layer, cut edge from `InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1` node to `ReLU`:
-   ```sh
-   mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-   ```
+
+   .. code-block:: xml
+
+      <xml version="1.0">
+      <net batch="1" name="model" version="2">
+         <layers>
+            <layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="2" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="1">...</port>
+               </output>
+            </layer>
+         </layers>
+         <edges>
+            <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+         </edges>
+      </net>
+
+
+   ``Input`` layer is automatically created to feed the layer that is converted from the node specified in ``--input``, which is ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` in this case. Model Optimizer does not replace the ``ReLU`` node by the ``Input`` layer, it produces such Intermediate Representation to make the node be the first executable node in the final Intermediate Representation. Therefore, Model Optimizer creates enough ``Inputs`` to feed all input ports of the node that is passed in ``--input``.
+
+   Even though ``--input_shape`` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape ``[1,64,112,112]`` as the model converted as a whole or without cutting off the beginning.
+
+3. Cut edge outcoming from layer by port number. To specify the outcoming port, use the following notation ``--input=input_node:port``. To cut everything before ``ReLU`` layer, cut edge from ``InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1`` node to ``ReLU``:
+
+   .. code-block:: sh
+
+      mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir    <OUTPUT_MODEL_DIR>
+
+
    The resulting Intermediate Representation looks as follows:
-```xml
-<xml version="1.0">
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1/placeholder_out_port_0" precision="FP32" type="Input">
-			<output>
-				<port id="0">...</port>
-			</output>
-		</layer>
-		<layer id="1" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-			<input>
-				<port id="0">...</port>
-			</input>
-			<output>
-				<port id="1">...</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-	</edges>
-</net>
-```
-
-## Shape Override for New Inputs
-
-The input shape can be overridden with `--input_shape`. In this case, the shape is applied to the node referenced in `--input`, not to the original `Placeholder` in the model. For example, the command below
-```sh
- mo --input_model inception_v1.pb --input_shape=[1,5,10,20] --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
-```
-
-gives the following shapes in the `Input` and `ReLU` layers:
-
-```xml
-<layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
-    <output>
-        <port id="0">
-            <dim>1</dim>
-            <dim>20</dim>
-            <dim>5</dim>
-            <dim>10</dim>
-        </port>
-    </output>
-</layer>
-<layer id="3" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
-    <input>
-        <port id="0">
-            <dim>1</dim>
-            <dim>20</dim>
-            <dim>5</dim>
-            <dim>10</dim>
-        </port>
-    </input>
-    <output>
-        <port id="1">
-            <dim>1</dim>
-            <dim>20</dim>
-            <dim>5</dim>
-            <dim>10</dim>
-        </port>
-    </output>
-</layer>
-```
-An input shape [1,20,5,10] in the final Intermediate Representation differs from the shape [1,5,10,20] specified in the command line, because the original TensorFlow model uses NHWC layout, but the Intermediate Representation uses NCHW layout. Thus, usual NHWC to NCHW layout conversion occurred.
-
-When `--input_shape` is specified, shape inference inside Model Optimizer is not performed for the nodes in the beginning of the model that are not included in the translated region. It differs from the case when `--input_shape` is not specified as noted in the previous section, where the shape inference is still performed for such nodes to deduce shape for the layers that should fall into the final Intermediate Representation. Therefore, `--input_shape` should be used for a model with a complex graph with loops, which are not supported by Model Optimizer, to exclude such parts from the Model Optimizer shape inference process completely.
-
-## Inputs with Multiple Input Ports
-
-There are operations that contain more than one input port. In the example considered here, the convolution `InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution` is such operation. When `--input_shape` is not provided, a new `Input` layer is created for each dynamic input port for the node. If a port is evaluated to a constant blob, this constant remains in the model and a corresponding input layer is not created. TensorFlow convolution used in this model contains two ports:
-
-*   port 0: input tensor for convolution (dynamic)
-*   port 1: convolution weights (constant)
-
-Following this behavior, Model Optimizer creates an `Input` layer for port 0 only, leaving port 1 as a constant. Thus, the result of:
-
-```sh
-mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --output_dir <OUTPUT_MODEL_DIR>
-```
+
+   .. code-block:: xml
+
+      <xml version="1.0">
+      <net batch="1" name="model" version="2">
+         <layers>
+            <layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1/placeholder_out_port_0" precision="FP32" type="Input">
+               <output>
+                  <port id="0">...</port>
+               </output>
+            </layer>
+            <layer id="1" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+               <input>
+                  <port id="0">...</port>
+               </input>
+               <output>
+                  <port id="1">...</port>
+               </output>
+               layer>
+         </layers>
+         <edges>
+            <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+         </edges>
+      </net>
+
+
+Shape Override for New Inputs
+#############################
+
+The input shape can be overridden with ``--input_shape``. In this case, the shape is applied to the node referenced in ``--input``, not to the original ``Placeholder`` in the model. For example, the command below:
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb --input_shape=[1,5,10,20] --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
+
+
+gives the following shapes in the ``Input`` and ``ReLU`` layers:
+
+.. code-block:: xml
+
+   <layer id="0" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu/placeholder_port_0" precision="FP32" type="Input">
+       <output>
+           <port id="0">
+               <dim>1</dim>
+               <dim>20</dim>
+               <dim>5</dim>
+               <dim>10</dim>
+           </port>
+       </output>
+   </layer>
+   <layer id="3" name="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu" precision="FP32" type="ReLU">
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>20</dim>
+               <dim>5</dim>
+               <dim>10</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>20</dim>
+               <dim>5</dim>
+               <dim>10</dim>
+           </port>
+       </output>
+   </layer>
+
+
+An input shape ``[1,20,5,10]`` in the final Intermediate Representation differs from the shape ``[1,5,10,20]`` specified in the command line, because the original TensorFlow model uses NHWC layout, but the Intermediate Representation uses NCHW layout. Thus, usual NHWC to NCHW layout conversion occurred.
+
+When ``--input_shape`` is specified, shape inference inside Model Optimizer is not performed for the nodes in the beginning of the model that are not included in the translated region. It differs from the case when ``--input_shape`` is not specified as noted in the previous section, where the shape inference is still performed for such nodes to deduce shape for the layers that should fall into the final Intermediate Representation. Therefore, ``--input_shape`` should be used for a model with a complex graph with loops, which are not supported by Model Optimizer, to exclude such parts from the Model Optimizer shape inference process completely.
+
+Inputs with Multiple Input Ports
+################################
+
+There are operations that contain more than one input port. In the example considered here, the convolution ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`` is such operation. When ``--input_shape`` is not provided, a new ``Input`` layer is created for each dynamic input port for the node. If a port is evaluated to a constant blob, this constant remains in the model and a corresponding input layer is not created. TensorFlow convolution used in this model contains two ports:
+
+* port 0: input tensor for convolution (dynamic)
+* port 1: convolution weights (constant)
+
+Following this behavior, Model Optimizer creates an ``Input`` layer for port 0 only, leaving port 1 as a constant. Thus, the result of:
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --output_dir <OUTPUT_MODEL_DIR>
+
 
 is identical to the result of conversion of the model as a whole, because this convolution is the first executable operation in Inception V1.
 
-Different behavior occurs when `--input_shape` is also used as an attempt to override the input shape:
-```sh
-mo --input_model inception_v1.pb--input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape [1,224,224,3]  --output_dir <OUTPUT_MODEL_DIR>
-```
-An error occurs (for more information, see the [Model Optimizer FAQ](@ref FAQ30):
-```sh
-[ ERROR ]  Node InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution has more than 1 input and input shapes were provided.
-Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer.
-For more information, see FAQ #30
-```
-When `--input_shape` is specified and the node contains multiple input ports, you need to provide an input port index together with an input node name. The input port index is specified in front of the node name with ':' as a separator (`PORT:NODE`). In this case, the port index 0 of the node `InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution` should be specified as `0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`.
+Different behavior occurs when ``--input_shape`` is also used as an attempt to override the input shape:
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb--input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape [1,224,224,3]  --output_dir <OUTPUT_MODEL_DIR>
+
+
+An error occurs (for more information, see the :ref:`Model Optimizer FAQ <question-30>`):
+
+.. code-block:: sh
+
+   [ ERROR ]  Node InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution has more than 1 input and input shapes were provided.
+   Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer.
+   For more information, see FAQ #30
+
+When ``--input_shape`` is specified and the node contains multiple input ports, you need to provide an input port index together with an input node name. The input port index is specified in front of the node name with ``‘:’`` as a separator (``PORT:NODE``). In this case, the port index 0 of the node ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`` should be specified as ``0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution``.
 
 The correct command line is:
-```sh
-mo --input_model inception_v1.pb --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3] --output_dir <OUTPUT_MODEL_DIR>
-```
+
+.. code-block:: sh
+
+   mo --input_model inception_v1.pb --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3] --output_dir <OUTPUT_MODEL_DIR>
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/img/inception_v1_first_block.svg b/docs/_static/images/inception_v1_first_block.svg
similarity index 100%
rename from docs/MO_DG/img/inception_v1_first_block.svg
rename to docs/_static/images/inception_v1_first_block.svg
diff --git a/docs/MO_DG/img/inception_v1_std_input.svg b/docs/_static/images/inception_v1_std_input.svg
similarity index 100%
rename from docs/MO_DG/img/inception_v1_std_input.svg
rename to docs/_static/images/inception_v1_std_input.svg
diff --git a/docs/MO_DG/img/inception_v1_std_output.svg b/docs/_static/images/inception_v1_std_output.svg
similarity index 100%
rename from docs/MO_DG/img/inception_v1_std_output.svg
rename to docs/_static/images/inception_v1_std_output.svg

From 392b67f082076c29676c5370939a2210f6feec45 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Thu, 30 Mar 2023 13:55:53 +0200
Subject: [PATCH 176/296] Fix pooling padding update (#16531)

* Review adaptive max pool shape inference

* Review AvgPool and MaxPool

* Review convolution operator

* Review GroupConvolution shape inference

* Review ConvolutionBackpropData operator

* Review GroupConvolutionBackpropData op

* Review BinaryConvolution operator
- add common bases for convolution ops
- refactor convolution ops

* Review DeformableConvolution operator

* Use new convolution shape_infer in GPU

* Fix build and test issues

* Correct set output spatial shape
in default constructed back prop convolutions

* The convolution shape_infer use pads as parameters
the external padding can be operators or other class padding properties shape_infer should not modify operators padding when
called from plugin

* Apply code formatting

* Fix padding validation and update

* Max and Avg pool don't update op properties
from plugin shape inference
- use ShapeInferWithPadding for pooling operators

* Remove not used function in shape_inference

* Fix evaluates in MaxPool

* Relax convolution shape infer inputs size check

* Remove unused entryFallbackWithPadding class

* Remove unused dilations variable

* Remove unused resize_attributes from max_pool_base

---------

Co-authored-by: mitruska <katarzyna.mitrus@intel.com>
---
 .../include/avg_pool_shape_inference.hpp      |  24 +--
 .../include/convolution_shape_inference.hpp   |   2 +-
 .../include/max_pool_shape_inference.hpp      |  46 ++---
 .../include/pooling_shape_inference_util.hpp  | 161 ++++++++++++------
 src/core/src/op/avg_pool.cpp                  |   2 +-
 src/core/src/op/max_pool.cpp                  |  12 +-
 .../utils/shape_inference/shape_inference.cpp |  66 +------
 7 files changed, 155 insertions(+), 158 deletions(-)

diff --git a/src/core/shape_inference/include/avg_pool_shape_inference.hpp b/src/core/shape_inference/include/avg_pool_shape_inference.hpp
index 577d014a6c9fda..fe6df898366db4 100644
--- a/src/core/shape_inference/include/avg_pool_shape_inference.hpp
+++ b/src/core/shape_inference/include/avg_pool_shape_inference.hpp
@@ -36,23 +36,23 @@ inline void valid_dilated_kernel_with_padding(const v1::AvgPool* op,
 }  // namespace pooling
 
 namespace v1 {
-
-template <class TShape>
-std::vector<TShape> shape_infer(const AvgPool* op, const std::vector<TShape>& input_shapes) {
+template <class TShape, class TContainer>
+std::vector<TShape> shape_infer(const AvgPool* op,
+                                const std::vector<TShape>& input_shapes,
+                                TContainer& pads_begin,
+                                TContainer& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     NODE_VALIDATION_CHECK(op, input_shapes.size() == 1);
     const auto& data_shape = input_shapes[0];
-
     const auto dilations = Strides(op->get_kernel().size(), 1);
 
-    pooling::update_and_validate_attributes(const_cast<AvgPool*>(op), data_shape, dilations);
-
-    auto output_shape = pooling::out_shape_infer(op, data_shape, dilations);
-    return {output_shape};
-}
+    auto num_spatial = dilations.size();
+    pooling::resize_empty_padding(num_spatial, pads_begin, pads_end);
+    pooling::validate::padding(op, pads_begin, pads_end);
+    pooling::validate::attributes(op, data_shape, dilations);
+    pooling::apply_padding(op, data_shape, dilations, pads_begin, pads_end);
 
-template <class TShape>
-void shape_infer(const AvgPool* op, const std::vector<TShape>& input_shapes, std::vector<TShape>& output_shapes) {
-    output_shapes = shape_infer(op, input_shapes);
+    return {pooling::out_shape_infer(op, data_shape, pads_begin, pads_end, dilations)};
 }
 }  // namespace v1
 }  // namespace op
diff --git a/src/core/shape_inference/include/convolution_shape_inference.hpp b/src/core/shape_inference/include/convolution_shape_inference.hpp
index e47ad7e1d88c77..b1bdeb4317e93b 100644
--- a/src/core/shape_inference/include/convolution_shape_inference.hpp
+++ b/src/core/shape_inference/include/convolution_shape_inference.hpp
@@ -16,7 +16,7 @@ std::vector<TShape> shape_infer(const TFrowardConv* op,
                                 TContainer& pads_begin,
                                 TContainer& pads_end,
                                 const std::map<size_t, HostTensorPtr>& constant_data = {}) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    NODE_VALIDATION_CHECK(op, input_shapes.size() >= 2);
     using namespace ov::util;
 
     const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes);
diff --git a/src/core/shape_inference/include/max_pool_shape_inference.hpp b/src/core/shape_inference/include/max_pool_shape_inference.hpp
index f6136016f6d523..806cd3f3db3db2 100644
--- a/src/core/shape_inference/include/max_pool_shape_inference.hpp
+++ b/src/core/shape_inference/include/max_pool_shape_inference.hpp
@@ -12,43 +12,47 @@ namespace ov {
 namespace op {
 
 namespace v1 {
-template <class TShape>
-std::vector<TShape> shape_infer(const MaxPool* op, const std::vector<TShape>& input_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 1);
+template <class TShape, class TContainer>
+std::vector<TShape> shape_infer(const MaxPool* op,
+                                const std::vector<TShape>& input_shapes,
+                                TContainer& pads_begin,
+                                TContainer& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     const auto& data_shape = input_shapes[0];
-
     const auto dilations = Strides(op->get_kernel().size(), 1);
 
-    pooling::update_and_validate_attributes(const_cast<MaxPool*>(op), data_shape, dilations);
-
-    return {pooling::out_shape_infer(op, data_shape, dilations)};
-}
+    auto num_spatial = dilations.size();
+    pooling::resize_empty_padding(num_spatial, pads_begin, pads_end);
+    pooling::validate::padding(op, pads_begin, pads_end);
+    pooling::validate::attributes(op, data_shape, dilations);
+    pooling::apply_padding(op, data_shape, dilations, pads_begin, pads_end);
 
-template <class TShape>
-void shape_infer(const MaxPool* op, const std::vector<TShape>& input_shapes, std::vector<TShape>& output_shapes) {
-    output_shapes = shape_infer(op, input_shapes);
+    return {pooling::out_shape_infer(op, data_shape, pads_begin, pads_end, dilations)};
 }
 }  // namespace v1
 
 namespace v8 {
-template <class TShape>
-std::vector<TShape> shape_infer(const MaxPool* op, const std::vector<TShape>& input_shapes) {
+template <class TShape, class TContainer>
+std::vector<TShape> shape_infer(const MaxPool* op,
+                                const std::vector<TShape>& input_shapes,
+                                TContainer& pads_begin,
+                                TContainer& pads_end,
+                                const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     NODE_VALIDATION_CHECK(op, input_shapes.size() == 1);
     const auto& data_shape = input_shapes[0];
 
+    auto num_spatial = op->get_kernel().size();
     auto dilations = op->get_dilations();
     if (dilations.empty()) {
-        dilations.resize(op->get_kernel().size(), 1);
+        dilations.resize(num_spatial, 1);
     }
 
-    pooling::update_and_validate_attributes(const_cast<MaxPool*>(op), data_shape, dilations);
-
-    return {2, pooling::out_shape_infer(op, data_shape, dilations)};
-}
+    pooling::resize_empty_padding(num_spatial, pads_begin, pads_end);
+    pooling::validate::padding(op, pads_begin, pads_end);
+    pooling::validate::attributes(op, data_shape, dilations);
+    pooling::apply_padding(op, data_shape, dilations, pads_begin, pads_end);
 
-template <class TShape>
-void shape_infer(const MaxPool* op, const std::vector<TShape>& input_shapes, std::vector<TShape>& output_shapes) {
-    output_shapes = shape_infer(op, input_shapes);
+    return {2, pooling::out_shape_infer(op, data_shape, pads_begin, pads_end, dilations)};
 }
 }  // namespace v8
 }  // namespace op
diff --git a/src/core/shape_inference/include/pooling_shape_inference_util.hpp b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
index 17462a82edb6ab..c3f69904eb1210 100644
--- a/src/core/shape_inference/include/pooling_shape_inference_util.hpp
+++ b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
@@ -12,8 +12,22 @@ namespace op {
 namespace pooling {
 constexpr size_t spatial_dim_offset = 2;
 
+namespace validate {
+template <class TOp, class TContainer>
+void padding(const TOp* op, const TContainer& pads_begin, const TContainer& pads_end) {
+    const auto num_spatial = op->get_kernel().size();
+    NODE_VALIDATION_CHECK(op,
+                          pads_begin.size() == num_spatial,
+                          "Expected pads_begin size to be equal to input size - 2. Got: ",
+                          pads_begin.size());
+    NODE_VALIDATION_CHECK(op,
+                          pads_end.size() == num_spatial,
+                          "Expected pads_end size to be equal to input size - 2. Got: ",
+                          pads_end.size());
+}
+
 template <class TOp, class TShape>
-void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Strides& dilations) {
+void attributes(const TOp* op, const TShape& data_shape, const Strides& dilations) {
     const auto& data_rank = data_shape.rank();
 
     NODE_VALIDATION_CHECK(op,
@@ -22,25 +36,9 @@ void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Str
                           data_shape);
 
     const auto& kernel = op->get_kernel();
-    const auto& auto_pad = op->get_auto_pad();
     const auto num_spatial = kernel.size();
     const auto& strides = op->get_strides();
 
-    if (auto_pad == PadType::VALID || op->get_pads_begin().empty()) {
-        op->set_pads_begin(Shape(num_spatial, 0));
-    }
-    if (auto_pad == PadType::VALID || op->get_pads_end().empty()) {
-        op->set_pads_end(Shape(num_spatial, 0));
-    }
-
-    NODE_VALIDATION_CHECK(op,
-                          op->get_pads_begin().size() == num_spatial,
-                          "Expected pads_begin size to be equal to input size - 2. Got: ",
-                          op->get_pads_begin().size());
-    NODE_VALIDATION_CHECK(op,
-                          op->get_pads_end().size() == num_spatial,
-                          "Expected pads_end size to be equal to input size - 2. Got: ",
-                          op->get_pads_end().size());
     NODE_VALIDATION_CHECK(op,
                           strides.size() == num_spatial,
                           "Expected strides size to be equal to input size - 2. Got: ",
@@ -50,32 +48,10 @@ void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Str
                           "Expected dilations size to be equal to kernel size. Got: ",
                           dilations.size());
 
-    if (data_rank.is_static()) {
-        NODE_VALIDATION_CHECK(op,
-                              num_spatial == (data_shape.size() - spatial_dim_offset),
-                              "Expected kernel size to be equal to input size - 2. Got: ",
-                              num_spatial);
-
-        if (auto_pad == PadType::SAME_UPPER || auto_pad == PadType::SAME_LOWER) {
-            Shape pads_begin, pads_end;
-            pads_begin.reserve(num_spatial);
-            pads_end.reserve(num_spatial);
-
-            auto data_dim = data_shape.cbegin() + spatial_dim_offset;
-            auto pad_begin_ins = std::back_inserter(pads_begin);
-            auto pad_end_ins = std::back_inserter(pads_end);
-            auto& pad_left = auto_pad == PadType::SAME_UPPER ? pad_begin_ins : pad_end_ins;
-            auto& pad_right = auto_pad == PadType::SAME_UPPER ? pad_end_ins : pad_begin_ins;
-
-            for (size_t i = 0; i < num_spatial; ++i, ++pad_left, ++pad_right, ++data_dim) {
-                using namespace ov::util;
-                std::tie(*pad_left, *pad_right) = dim::padding(*data_dim, kernel[i], dilations[i], strides[i]);
-            }
-
-            op->set_pads_begin(pads_begin);
-            op->set_pads_end(std::move(pads_end));
-        }
-    }
+    NODE_VALIDATION_CHECK(op,
+                          data_rank.is_dynamic() || num_spatial == (data_shape.size() - spatial_dim_offset),
+                          "Expected kernel size to be equal to input size - 2. Got: ",
+                          num_spatial);
 
     constexpr auto is_zero = cmp::Equal<size_t>(0);
     NODE_VALIDATION_CHECK(op,
@@ -87,6 +63,65 @@ void update_and_validate_attributes(TOp* op, const TShape& data_shape, const Str
                           "Kernel dilations has zero dimension(s). ",
                           dilations);
 }
+}  // namespace validate
+
+/**
+ * @brief Resize paddings if empty to number of spatial dimensions.
+ *
+ * @param num_spatial  Number of spatial dimensions.
+ * @param pads_begin   Begin padding to resize.
+ * @param pads_end     End padding to resize.
+ */
+template <class TContainer>
+void resize_empty_padding(const size_t num_spatial, TContainer& pads_begin, TContainer& pads_end) {
+    if (pads_begin.empty()) {
+        pads_begin.resize(num_spatial);
+    }
+
+    if (pads_end.empty()) {
+        pads_end.resize(num_spatial);
+    }
+}
+
+/**
+ * @brief Apply pooling operator padding depends on auto pad value.
+ *
+ * @param op          Pointer to Pooling operator to apply padding.
+ * @param data_shape  Shape infer data input shape.
+ * @param dilations   Kernel dilations.
+ * @param pads_begin  Padding begin to update.
+ * @param pads_end    Padding end to update.
+ */
+template <class TOp, class TShape, class TContainer>
+void apply_padding(const TOp* op,
+                   const TShape& data_shape,
+                   const Strides& dilations,
+                   TContainer& pads_begin,
+                   TContainer& pads_end) {
+    const auto& auto_pad = op->get_auto_pad();
+    if (data_shape.rank().is_static() && (auto_pad == PadType::SAME_UPPER || auto_pad == PadType::SAME_LOWER)) {
+        const auto& kernel = op->get_kernel();
+        const auto& strides = op->get_strides();
+        const auto num_spatial = kernel.size();
+        pads_begin.reserve(num_spatial);
+        pads_end.reserve(num_spatial);
+
+        auto data_dim = data_shape.cbegin() + spatial_dim_offset;
+        auto pad_b = auto_pad == PadType::SAME_UPPER ? pads_begin.begin() : pads_end.begin();
+        auto pad_e = auto_pad == PadType::SAME_UPPER ? pads_end.begin() : pads_begin.begin();
+
+        for (size_t i = 0; i < num_spatial; ++i, ++pad_b, ++pad_e, ++data_dim) {
+            using namespace ov::util;
+            std::tie(*pad_b, *pad_e) = dim::padding(*data_dim, kernel[i], dilations[i], strides[i]);
+        }
+    } else if (auto_pad == PadType::VALID) {
+        std::fill_n(pads_begin.begin(), pads_begin.size(), 0);
+        std::fill_n(pads_end.begin(), pads_end.size(), 0);
+    } else if (op->get_auto_pad() == op::PadType::EXPLICIT) {
+        std::copy(op->get_pads_begin().begin(), op->get_pads_begin().end(), pads_begin.begin());
+        std::copy(op->get_pads_end().begin(), op->get_pads_end().end(), pads_end.begin());
+    }
+}
 
 template <class TOp, class TDim>
 void valid_dilated_kernel_with_dim(const TOp* op, const size_t kernel, const TDim& dim, const size_t axis) {
@@ -116,8 +151,23 @@ void valid_dilated_kernel_with_padding(const TOp* op,
                                        const size_t pad_end,
                                        const size_t axis) {}
 
-template <class TOp, class TShape>
-TShape spatial_shape_infer(const TOp* op, const TShape& data_shape, const Strides& dilations) {
+/**
+ * @brief Append spatial shape to the end of output shape for pooling operator shape inference result.
+ *
+ * @param op          Pointer to pooling operator.
+ * @param data_shape  Shape inference input pooling data shape.
+ * @param pads_begin  Pooling pads begin.
+ * @param pads_end    Pooling pads end.
+ * @param dilations   Kernel dilations.
+ * @param out_shape   Output shape for appending the spatial shape of pooling
+ */
+template <class TOp, class TShape, class TContainer>
+void append_spatial_shape(const TOp* op,
+                          const TShape& data_shape,
+                          const TContainer& pads_begin,
+                          const TContainer& pads_end,
+                          const Strides& dilations,
+                          TShape& out_shape) {
     using namespace ov::util;
     const auto spatial_num = data_shape.size() - spatial_dim_offset;
     const auto is_ceil_mode = op->get_rounding_type() == RoundingType::CEIL;
@@ -126,12 +176,7 @@ TShape spatial_shape_infer(const TOp* op, const TShape& data_shape, const Stride
     using TDim = typename TShape::value_type;
     const auto& dim_divide = is_ceil_mode ? dim::ceil_div<TDim> : dim::floor_div<TDim>;
 
-    TShape out_shape;
-    out_shape.reserve(spatial_num);
-
     auto data_dim = data_shape.cbegin() + spatial_dim_offset;
-    const auto& pads_begin = op->get_pads_begin();
-    const auto& pads_end = op->get_pads_end();
     const auto& kernel = op->get_kernel();
     const auto& stride = op->get_strides();
 
@@ -155,15 +200,18 @@ TShape spatial_shape_infer(const TOp* op, const TShape& data_shape, const Stride
             out_shape.emplace_back(dim::inf_bound);
         }
     }
-
-    return out_shape;
 }
 
 /**
  * @brief Shape inference helper used for pooling operators such Max Pool, Avg Pool.
  */
-template <class TOp, class TShape>
-TShape out_shape_infer(const TOp* op, const TShape& data_shape, const Strides& dilations) {
+template <class TOp, class TShape, class TContainer>
+TShape out_shape_infer(const TOp* op,
+                       const TShape& data_shape,
+                       const TContainer& pads_begin,
+                       const TContainer& pads_end,
+                       const Strides& dilations) {
+    const auto out_rank_size = spatial_dim_offset + op->get_kernel().size();
     TShape out_shape;
     if (data_shape.rank().is_static()) {
         const auto& batch_size = data_shape[0];
@@ -174,8 +222,9 @@ TShape out_shape_infer(const TOp* op, const TShape& data_shape, const Strides& d
                               channel_count.is_dynamic() || channel_count.get_length() > 0,
                               "Channel count is zero.");
 
-        out_shape = spatial_shape_infer(op, data_shape, dilations);
-        out_shape.insert(out_shape.begin(), data_shape.begin(), data_shape.begin() + spatial_dim_offset);
+        out_shape.reserve(out_rank_size);
+        std::copy_n(data_shape.begin(), spatial_dim_offset, std::back_inserter(out_shape));
+        pooling::append_spatial_shape(op, data_shape, pads_begin, pads_end, dilations, out_shape);
     } else {
         out_shape.insert(out_shape.begin(), spatial_dim_offset + op->get_kernel().size(), Dimension::dynamic());
     }
diff --git a/src/core/src/op/avg_pool.cpp b/src/core/src/op/avg_pool.cpp
index 8e63d1731441dc..a9edcaf18c7c54 100644
--- a/src/core/src/op/avg_pool.cpp
+++ b/src/core/src/op/avg_pool.cpp
@@ -47,7 +47,7 @@ bool ov::op::v1::AvgPool::visit_attributes(AttributeVisitor& visitor) {
 void ov::op::v1::AvgPool::validate_and_infer_types() {
     OV_OP_SCOPE(v1_AvgPool_validate_and_infer_types);
 
-    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
     set_output_type(0, get_input_element_type(0), output_shapes.front());
 }
 
diff --git a/src/core/src/op/max_pool.cpp b/src/core/src/op/max_pool.cpp
index 13d98688c4d2cd..68c31b26a16da2 100644
--- a/src/core/src/op/max_pool.cpp
+++ b/src/core/src/op/max_pool.cpp
@@ -41,7 +41,7 @@ bool ngraph::op::v1::MaxPool::visit_attributes(AttributeVisitor& visitor) {
 void op::v1::MaxPool::validate_and_infer_types() {
     OV_OP_SCOPE(v1_MaxPool_validate_and_infer_types);
 
-    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
     set_output_type(0, get_input_element_type(0), output_shapes.front());
 }
 
@@ -108,7 +108,9 @@ bool evaluate_maxpool(const HostTensorPtr& arg,
 
 bool op::v1::MaxPool::evaluate_maxpool(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     const auto input_shapes = std::vector<PartialShape>{inputs[0]->get_partial_shape()};
-    auto out_shape = shape_infer(this, input_shapes).front();
+    auto pads_begin = m_pads_begin;
+    auto pads_end = m_pads_end;
+    auto out_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front();
 
     return maxpool::evaluate_maxpool(inputs[0],
                                      outputs[0],
@@ -276,7 +278,7 @@ void op::v8::MaxPool::validate_and_infer_types() {
         m_axis = ngraph::normalize_axis(this, m_axis, input_shape.rank());
     }
 
-    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
     set_output_type(1, m_index_element_type, output_shapes[1]);
 }
@@ -318,7 +320,9 @@ bool op::v8::MaxPool::evaluate(const HostTensorVector& outputs, const HostTensor
     OV_OP_SCOPE(v8_MaxPool_evaluate);
 
     const auto input_shapes = std::vector<PartialShape>{inputs[0]->get_partial_shape()};
-    auto out_shape = shape_infer(this, input_shapes).front();
+    auto pads_begin = m_pads_begin;
+    auto pads_end = m_pads_end;
+    auto out_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front();
 
     return maxpool_v8::evaluate_maxpool(inputs[0],
                                         outputs[0],
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
index f08c33d935219a..8b4342e3be3ba8 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
@@ -261,33 +261,6 @@ class entryFallback : public entryBase {
     }
 };
 
-template <class TContainer>
-ov::CoordinateDiff convertPadding(const TContainer& newPads) {
-    return {newPads.begin(), newPads.end()};
-}
-
-template <typename OP>
-class entryFallbackWithPadding : public entryFallback {
-public:
-    using entryFallback::entryFallback;
-
-    ov::CoordinateDiff pads_begin, pads_end;
-
-    const ov::CoordinateDiff& get_pads_begin() override {
-        return pads_begin;
-    }
-    const ov::CoordinateDiff& get_pads_end() override {
-        return pads_end;
-    }
-
-    void post_validate_and_infer_types(const std::shared_ptr<ov::Node>& local_op) override {
-        auto node = dynamic_cast<OP*>(local_op.get());
-        OPENVINO_ASSERT(node);
-        pads_begin = convertPadding(node->get_pads_begin());
-        pads_end = convertPadding(node->get_pads_end());
-    }
-};
-
 template <typename OP>
 class entryInterpolate : public entryBase {
 public:
@@ -304,39 +277,6 @@ class entryInterpolate : public entryBase {
     }
 };
 
-template <class TOp>
-class ShapeInferWithPaddingConvert : public entryBase {
-public:
-    ShapeInferWithPaddingConvert(std::shared_ptr<Node> node)
-        : entryBase{std::move(node)},
-          m_pads_begin{},
-          m_pads_end{} {}
-
-    IShapeInferCommon::Result infer(const std::vector<StaticShape>& input_shapes,
-                                    const std::map<size_t, ov::HostTensorPtr>& constant_data) override {
-        auto out_shapes = shape_infer(static_cast<TOp*>(node.get()), input_shapes);
-        on_infer_exit();
-        return {std::move(out_shapes), ShapeInferStatus::success};
-    }
-
-    const ov::CoordinateDiff& get_pads_begin() override {
-        return m_pads_begin;
-    }
-
-    const ov::CoordinateDiff& get_pads_end() override {
-        return m_pads_end;
-    }
-
-protected:
-    void on_infer_exit() {
-        auto op = static_cast<TOp*>(node.get());
-        m_pads_begin = convertPadding(op->get_pads_begin());
-        m_pads_end = convertPadding(op->get_pads_end());
-    }
-
-    ov::CoordinateDiff m_pads_begin, m_pads_end;
-};
-
 template <class TOp>
 class ShapeInferWithPadding : public entryBase {
 public:
@@ -516,7 +456,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(AdaptiveAvgPool, entryIOC),
     _OV_OP_SHAPE_INFER_REG(AdaptiveMaxPool, entryIOC),
     _OV_OP_SHAPE_INFER_REG(Assign, entryIO),
-    _OV_OP_SHAPE_INFER_REG(AvgPool, ShapeInferWithPaddingConvert),
+    _OV_OP_SHAPE_INFER_REG(AvgPool, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(BatchToSpace, entryIOC),
     _OV_OP_SHAPE_INFER_REG(BinaryConvolution, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(Broadcast, entryIOC),
@@ -557,7 +497,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(IRDFT, entryIOC),
     _OV_OP_SHAPE_INFER_REG(LSTMCell, entryIO),
     _OV_OP_SHAPE_INFER_REG(MatMul, entryIO),
-    _OV_OP_SHAPE_INFER_REG(MaxPool, ShapeInferWithPaddingConvert),
+    _OV_OP_SHAPE_INFER_REG(MaxPool, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(OneHot, entryIOC),
     _OV_OP_SHAPE_INFER_REG(ov::op::internal::AUGRUCell, entryIO),
     _OV_OP_SHAPE_INFER_REG(ov::op::internal::AUGRUSequence, entryIO),
@@ -617,7 +557,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{
     _OV_OP_SHAPE_INFER_REG(opset1::DetectionOutput, entryIO),
     _OV_OP_SHAPE_INFER_REG(opset1::Interpolate, entryIOC),
     _OV_OP_SHAPE_INFER_REG(opset1::LSTMCell, entryIO),
-    _OV_OP_SHAPE_INFER_REG(opset1::MaxPool, ShapeInferWithPaddingConvert),
+    _OV_OP_SHAPE_INFER_REG(opset1::MaxPool, ShapeInferWithPadding),
     _OV_OP_SHAPE_INFER_REG(opset1::Proposal, entryIO),
     _OV_OP_SHAPE_INFER_REG(opset1::Range, entryIOC),
     _OV_OP_SHAPE_INFER_REG(opset1::ShapeOf, entryIO),

From 961a99586a0a83ae2a0a3f2af3cd7da0c9223527 Mon Sep 17 00:00:00 2001
From: Tatiana Savina <tatiana.savina@intel.com>
Date: Thu, 30 Mar 2023 14:44:31 +0200
Subject: [PATCH 177/296] DOCS shift to rst Supported Model Formats (#16657)

* add model intro doc

* add supported model formats page

* add TF doc

* add pytorch doc

* add paddle  doc

* add mxnet doc

* add caffe doc

* add kaldi doc

* fix format

* fix cide snippets

* fix code snippets

* fix kaldi doc

* kaldi code snippets

* fix format

* fix list

* directive test

* fix note

* move code block

* code snippets style
---
 docs/Documentation/model_introduction.md      |  21 +-
 .../convert_model/Convert_Model_From_Caffe.md | 163 ++++++------
 .../convert_model/Convert_Model_From_Kaldi.md | 107 ++++----
 .../convert_model/Convert_Model_From_MxNet.md |  96 +++++---
 .../convert_model/Convert_Model_From_ONNX.md  |  44 ++--
 .../Convert_Model_From_Paddle.md              |  44 ++--
 .../Convert_Model_From_PyTorch.md             |  74 +++---
 .../Convert_Model_From_TensorFlow.md          | 231 ++++++++++--------
 .../convert_model/supported_model_formats.md  |  20 +-
 9 files changed, 465 insertions(+), 335 deletions(-)

diff --git a/docs/Documentation/model_introduction.md b/docs/Documentation/model_introduction.md
index fb094a4578e6d2..5d524718591828 100644
--- a/docs/Documentation/model_introduction.md
+++ b/docs/Documentation/model_introduction.md
@@ -9,22 +9,23 @@
    openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide
    omz_tools_downloader
 
-@endsphinxdirective
-
 
-Every deep learning workflow begins with obtaining a model. You can choose to prepare a custom one, use a ready-made solution and adjust it to your needs, or even download and run a pre-trained network from an online database, such as OpenVINO's [Open Model Zoo](../model_zoo.md).
+Every deep learning workflow begins with obtaining a model. You can choose to prepare a custom one, use a ready-made solution and adjust it to your needs, or even download and run a pre-trained network from an online database, such as OpenVINO's :doc:`Open Model Zoo <model_zoo>`.
 
-[OpenVINO™ supports several model formats](../MO_DG/prepare_model/convert_model/supported_model_formats.md) and allows to convert them to it's own, OpenVINO IR, providing a tool dedicated to this task.
+:doc:`OpenVINO™ supports several model formats <Supported_Model_Formats>` and allows to convert them to it's own, OpenVINO IR, providing a tool dedicated to this task.
 
-[Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) reads the original model and creates the OpenVINO IR model (.xml and .bin files) so that inference can ultimately be performed without delays due to format conversion. Optionally, Model Optimizer can adjust the model to be more suitable for inference, for example, by [alternating input shapes](../MO_DG/prepare_model/convert_model/Converting_Model.md), [embedding preprocessing](../MO_DG/prepare_model/Additional_Optimizations.md) and [cutting training parts off](../MO_DG/prepare_model/convert_model/Cutting_Model.md).
+:doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` reads the original model and creates the OpenVINO IR model (.xml and .bin files) so that inference can ultimately be performed without delays due to format conversion. Optionally, Model Optimizer can adjust the model to be more suitable for inference, for example, by :doc:`alternating input shapes <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`, :doc:`embedding preprocessing <openvino_docs_MO_DG_Additional_Optimization_Use_Cases>` and :doc:`cutting training parts off <openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model>`.
 
 The approach to fully convert a model is considered the default choice, as it allows the full extent of OpenVINO features. The OpenVINO IR model format is used by other conversion and preparation tools, such as the Post-Training Optimization Tool, for further optimization of the converted model.
 
-Conversion is not required for ONNX, PaddlePaddle, and TensorFlow models (check [TensorFlow Frontend Capabilities and Limitations](../resources/tensorflow_frontend.md)), as OpenVINO provides C++ and Python APIs for importing them to OpenVINO Runtime directly. It provides a convenient way to quickly switch from framework-based code to OpenVINO-based code in your inference application.
+Conversion is not required for ONNX, PaddlePaddle, and TensorFlow models (check :doc:`TensorFlow Frontend Capabilities and Limitations <openvino_docs_MO_DG_TensorFlow_Frontend>`), as OpenVINO provides C++ and Python APIs for importing them to OpenVINO Runtime directly. It provides a convenient way to quickly switch from framework-based code to OpenVINO-based code in your inference application.
 
 This section describes how to obtain and prepare your model for work with OpenVINO to get the best inference results:
-* [See the supported formats and how to use them in your project](../MO_DG/prepare_model/convert_model/supported_model_formats.md)
-* [Convert different model formats to the OpenVINO IR format](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-* [Automate model-related tasks with Model Downloader and additional OMZ Tools](https://docs.openvino.ai/latest/omz_tools_downloader.html).
 
-To begin with, you may want to [browse a database of models for use in your projects](../model_zoo.md).
+* :doc:`See the supported formats and how to use them in your project <Supported_Model_Formats>`.
+* :doc:`Convert different model formats to the OpenVINO IR format <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+* `Automate model-related tasks with Model Downloader and additional OMZ Tools <https://docs.openvino.ai/latest/omz_tools_downloader.html>`__.
+
+To begin with, you may want to :doc:`browse a database of models for use in your projects <model_zoo>`.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
index ab8a067e3e1a33..2b9f17e35be38e 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
@@ -1,85 +1,99 @@
 # Converting a Caffe Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe}
 
-<a name="Convert_From_Caffe"></a>To convert a Caffe model, run Model Optimizer with the path to the input model `.caffemodel` file:
+@sphinxdirective
+
+To convert a Caffe model, run Model Optimizer with the path to the input model ``.caffemodel`` file:
+
+.. code-block:: cpp
+
+   mo --input_model <INPUT_MODEL>.caffemodel
 
-```sh
- mo --input_model <INPUT_MODEL>.caffemodel
-```
 
 The following list provides the Caffe-specific parameters.
 
-```
-Caffe-specific parameters:
-  --input_proto INPUT_PROTO, -d INPUT_PROTO
-                        Deploy-ready prototxt file that contains a topology
-                        structure and layer attributes
-  --caffe_parser_path CAFFE_PARSER_PATH
-                        Path to python Caffe parser generated from caffe.proto
-  -k K                  Path to CustomLayersMapping.xml to register custom
-                        layers
-  --disable_omitting_optional
-                        Disable omitting optional attributes to be used for
-                        custom layers. Use this option if you want to transfer
-                        all attributes of a custom layer to IR. Default
-                        behavior is to transfer the attributes with default
-                        values and the attributes defined by the user to IR.
-  --enable_flattening_nested_params
-                        Enable flattening optional params to be used for
-                        custom layers. Use this option if you want to transfer
-                        attributes of a custom layer to IR with flattened
-                        nested parameters. Default behavior is to transfer the
-                        attributes without flattening nested parameters.
-```
-
-### CLI Examples Using Caffe-Specific Parameters
-
-* Launching Model Optimizer for [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `prototxt` file.
-   This is needed when the name of the Caffe model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file.
-   ```sh
-   mo --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt
-   ```
-* Launching Model Optimizer for [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `CustomLayersMapping` file.
-   This is the legacy method of quickly enabling model conversion if your model has custom layers. This requires the Caffe system on the computer.
-Example of `CustomLayersMapping.xml` can be found in `<OPENVINO_INSTALLATION_DIR>/mo/front/caffe/CustomLayersMapping.xml.example`. The optional parameters without default values and not specified by the user in the `.prototxt` file are removed from the Intermediate Representation, and nested parameters are flattened:
-   ```sh
-   mo --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params
-   ```
-   This example shows a multi-input model with input layers: `data`, `rois`
-   ```
-   layer {
-     name: "data"
-     type: "Input"
-     top: "data"
-     input_param {
-       shape { dim: 1 dim: 3 dim: 224 dim: 224 }
-     }
-   }
-   layer {
-     name: "rois"
-     type: "Input"
-     top: "rois"
-     input_param {
-       shape { dim: 1 dim: 5 dim: 1 dim: 1 }
-     }
-   }
-   ```
-
-* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer. In particular, for data, set the shape to `1,3,227,227`. For rois, set the shape to `1,6,1,1`:
-   ```sh
-   mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1]
-   ```
-## Custom Layer Definition
+.. code-block:: cpp
+
+  Caffe-specific parameters:
+    --input_proto INPUT_PROTO, -d INPUT_PROTO
+                          Deploy-ready prototxt file that contains a topology
+                          structure and layer attributes
+    --caffe_parser_path CAFFE_PARSER_PATH
+                          Path to python Caffe parser generated from caffe.proto
+    -k K                  Path to CustomLayersMapping.xml to register custom
+                          layers
+    --disable_omitting_optional
+                          Disable omitting optional attributes to be used for
+                          custom layers. Use this option if you want to transfer
+                          all attributes of a custom layer to IR. Default
+                          behavior is to transfer the attributes with default
+                          values and the attributes defined by the user to IR.
+    --enable_flattening_nested_params
+                          Enable flattening optional params to be used for
+                          custom layers. Use this option if you want to transfer
+                          attributes of a custom layer to IR with flattened
+                          nested parameters. Default behavior is to transfer the
+                          attributes without flattening nested parameters.
+
+
+CLI Examples Using Caffe-Specific Parameters
+++++++++++++++++++++++++++++++++++++++++++++
+
+* Launching Model Optimizer for `bvlc_alexnet.caffemodel <https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet>`__ with a specified `prototxt` file. This is needed when the name of the Caffe model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file.
+  
+  .. code-block:: cpp
+      
+    mo --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt
+   
+* Launching Model Optimizer for `bvlc_alexnet.caffemodel <https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet>`__ with a specified `CustomLayersMapping` file. This is the legacy method of quickly enabling model conversion if your model has custom layers. This requires the Caffe system on the computer. Example of ``CustomLayersMapping.xml`` can be found in ``<OPENVINO_INSTALLATION_DIR>/mo/front/caffe/CustomLayersMapping.xml.example``. The optional parameters without default values and not specified by the user in the ``.prototxt`` file are removed from the Intermediate Representation, and nested parameters are flattened:
+
+  .. code-block:: cpp
+
+    mo --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params
+   
+   This example shows a multi-input model with input layers: ``data``, ``rois``
+
+  .. code-block:: cpp
+
+    layer {
+      name: "data"
+      type: "Input"
+      top: "data"
+      input_param {
+        shape { dim: 1 dim: 3 dim: 224 dim: 224 }
+      }
+    }
+    layer {
+      name: "rois"
+      type: "Input"
+      top: "rois"
+      input_param {
+        shape { dim: 1 dim: 5 dim: 1 dim: 1 }
+      }
+    }
+
+* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer. In particular, for data, set the shape to ``1,3,227,227``. For rois, set the shape to ``1,6,1,1``:
+
+  .. code-block:: cpp
+
+    mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1]
+
+Custom Layer Definition
+########################
 
 Internally, when you run Model Optimizer, it loads the model, goes through the topology, and tries to find each layer type in a list of known layers. Custom layers are layers that are not included in the list. If your topology contains such kind of layers, Model Optimizer classifies them as custom.
 
-## Supported Caffe Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
+Supported Caffe Layers
+#######################
 
-## Frequently Asked Questions (FAQ)
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>`  page.
 
-Model Optimizer provides explanatory messages when it is unable to complete conversions due to typographical errors, incorrectly used options, or other issues. A message describes the potential cause of the problem and gives a link to [Model Optimizer FAQ](@ref openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ) which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections to help you understand what went wrong.
+Frequently Asked Questions (FAQ)
+################################
 
-## Summary
+Model Optimizer provides explanatory messages when it is unable to complete conversions due to typographical errors, incorrectly used options, or other issues. A message describes the potential cause of the problem and gives a link to :doc:`Model Optimizer FAQ <openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ>`  which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections to help you understand what went wrong.
+
+Summary
+#######
 
 In this document, you learned:
 
@@ -87,5 +101,10 @@ In this document, you learned:
 * Which Caffe models are supported.
 * How to convert a trained Caffe model by using Model Optimizer with both framework-agnostic and Caffe-specific command-line options.
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific Caffe models.
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>`  page for a set of tutorials providing step-by-step instructions for converting specific Caffe models.
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
index f5687f60e732b8..7f93a960fed969 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
@@ -1,65 +1,86 @@
 # Converting a Kaldi Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi}
 
-> **NOTE**: Model Optimizer supports the [nnet1](http://kaldi-asr.org/doc/dnn1.html) and [nnet2](http://kaldi-asr.org/doc/dnn2.html) formats of Kaldi models. The support of the [nnet3](http://kaldi-asr.org/doc/dnn3.html) format is limited.
+@sphinxdirective
+
+.. note:: 
+
+   Model Optimizer supports the `nnet1 <http://kaldi-asr.org/doc/dnn1.html>`__ and `nnet2 <http://kaldi-asr.org/doc/dnn2.html>`__ formats of Kaldi models. The support of the `nnet3 <http://kaldi-asr.org/doc/dnn3.html>`__ format is limited.
  
-<a name="Convert_From_Kaldi"></a>To convert a Kaldi model, run Model Optimizer with the path to the input model `.nnet` or `.mdl` file:
+To convert a Kaldi model, run Model Optimizer with the path to the input model ``.nnet`` or ``.mdl`` file:
 
-```sh
- mo --input_model <INPUT_MODEL>.nnet
-```
+.. code-block:: cpp
 
-## Using Kaldi-Specific Conversion Parameters <a name="kaldi_specific_conversion_params"></a>
+   mo --input_model <INPUT_MODEL>.nnet
+
+Using Kaldi-Specific Conversion Parameters 
+##########################################
 
 The following list provides the Kaldi-specific parameters.
 
-```sh
-Kaldi-specific parameters:
-  --counts COUNTS       A file name with full path to the counts file or empty string to utilize count values from the model file
-  --remove_output_softmax
-                        Removes the Softmax that is the output layer
-  --remove_memory       Remove the Memory layer and add new inputs and outputs instead
-```
+.. code-block:: cpp
+
+   Kaldi-specific parameters:
+   --counts COUNTS       A file name with full path to the counts file or empty string to utilize count values from the model file
+   --remove_output_softmax
+                           Removes the Softmax that is the output layer
+   --remove_memory       Remove the Memory layer and add new inputs and outputs instead
 
-## Examples of CLI Commands
+Examples of CLI Commands
+########################
 
-* To launch Model Optimizer for the `wsj_dnn5b_smbr` model with the specified `.nnet` file:
-   ```sh
-   mo --input_model wsj_dnn5b_smbr.nnet
-   ```
+* To launch Model Optimizer for the ``wsj_dnn5b_smbr`` model with the specified ``.nnet`` file:
+   
+  .. code-block:: cpp
 
-* To launch Model Optimizer for the `wsj_dnn5b_smbr` model with the existing file that contains counts for the last layer with biases:
-   ```sh
-   mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts
-   ```
+    mo --input_model wsj_dnn5b_smbr.nnet
+  
+* To launch Model Optimizer for the ``wsj_dnn5b_smbr`` model with the existing file that contains counts for the last layer with biases:
+
+  .. code-block:: cpp
+
+    mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts
+   
 
   * The Model Optimizer normalizes сounts in the following way:
-	\f[
-	S = \frac{1}{\sum_{j = 0}^{|C|}C_{j}}
-	\f]
-	\f[
-	C_{i}=log(S*C_{i})
-	\f]
-	where \f$C\f$ - the counts array, \f$C_{i} - i^{th}\f$ element of the counts array,
-	\f$|C|\f$ - number of elements in the counts array;
+    
+    .. math::
+    
+      S = \frac{1}{\sum_{j = 0}^{|C|}C_{j}}
+    
+    .. math::
+    
+      C_{i}=log(S\*C_{i})
+    
+  where :math:`C` - the counts array, :math:`C_{i} - i^{th}` element of the counts array, :math:`|C|` - number of elements in the counts array;
+
   * The normalized counts are subtracted from biases of the last or next to last layer (if last layer is SoftMax).
+  
+    .. note:: Model Optimizer will show a warning if a model contains values of counts and the `--counts` option is not used.
 
-     > **NOTE**: Model Optimizer will show a warning if a model contains values of counts and the `--counts` option is not used.
+* If you want to remove the last SoftMax layer in the topology, launch the Model Optimizer with the `--remove_output_softmax` flag:
+
+.. code-block:: cpp
 
-* If you want to remove the last SoftMax layer in the topology, launch the Model Optimizer with the
-`--remove_output_softmax` flag:
-   ```sh
    mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --remove_output_softmax
-   ```
 
-   The Model Optimizer finds the last layer of the topology and removes this layer only if it is a SoftMax layer.
+The Model Optimizer finds the last layer of the topology and removes this layer only if it is a SoftMax layer.
+
+.. note:: Model Optimizer can remove SoftMax layer only if the topology has one output.
+
+* You can use the *OpenVINO Speech Recognition* sample application for the sample inference of Kaldi models. This sample supports models with only one output. If your model has several outputs, specify the desired one with the ``--output`` option.
+
+Supported Kaldi Layers
+######################
+
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` page.
+
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific Kaldi models. Here are some examples:
 
-   > **NOTE**: Model Optimizer can remove SoftMax layer only if the topology has one output.
+* :doc:`Convert Kaldi ASpIRE Chain Time Delay Neural Network (TDNN) Model <openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model>`
 
-* You can use the *OpenVINO Speech Recognition* sample application for the sample inference of Kaldi models. This sample supports models with only one output. If your model has several outputs, specify the desired one with the `--output` option.
 
-## Supported Kaldi Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers ](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
+@endsphinxdirective
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific Kaldi models. Here are some examples:
-* [Convert Kaldi ASpIRE Chain Time Delay Neural Network (TDNN) Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model)
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
index 492d5cff8e29f1..43c3888d06074d 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
@@ -1,51 +1,61 @@
 # Converting an MXNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet}
 
-<a name="ConvertMxNet"></a>To convert an MXNet model, run Model Optimizer with the path to the *`.params`* file of the input model:
+@sphinxdirective
 
-```sh
- mo --input_model model-file-0000.params
-```
+To convert an MXNet model, run Model Optimizer with the path to the ``.params`` file of the input model:
+
+.. code-block:: sh
+
+  mo --input_model model-file-0000.params
+
+
+Using MXNet-Specific Conversion Parameters 
+##########################################
 
-## Using MXNet-Specific Conversion Parameters <a name="mxnet_specific_conversion_params"></a>
 The following list provides the MXNet-specific parameters.
 
-```
-MXNet-specific parameters:
-  --input_symbol <SYMBOL_FILE_NAME>
-            Symbol file (for example, "model-symbol.json") that contains a topology structure and layer attributes
-  --nd_prefix_name <ND_PREFIX_NAME>
-            Prefix name for args.nd and argx.nd files
-  --pretrained_model_name <PRETRAINED_MODEL_NAME>
-            Name of a pre-trained MXNet model without extension and epoch
-            number. This model will be merged with args.nd and argx.nd
-            files
-  --save_params_from_nd
-            Enable saving built parameters file from .nd files
-  --legacy_mxnet_model
-            Enable Apache MXNet loader to make a model compatible with the latest Apache MXNet version.
-            Use only if your model was trained with Apache MXNet version lower than 1.0.0
-  --enable_ssd_gluoncv
-            Enable transformation for converting the gluoncv ssd topologies.
-            Use only if your topology is one of ssd gluoncv topologies
-```
-
-> **NOTE**: By default, Model Optimizer does not use the Apache MXNet loader. It transforms the topology to another format which is compatible with the latest
-> version of Apache MXNet. However, the Apache MXNet loader is required for models trained with lower version of Apache MXNet. If your model was trained with an Apache MXNet version lower than 1.0.0, specify the
-> `--legacy_mxnet_model` key to enable the Apache MXNet loader. Note that the loader does not support models with custom layers. In this case, you must manually
-> recompile Apache MXNet with custom layers and install it in your environment.
-
-## Custom Layer Definition
+.. code-block:: sh
+
+  MXNet-specific parameters:
+    --input_symbol <SYMBOL_FILE_NAME>
+              Symbol file (for example, "model-symbol.json") that contains a topology structure and layer attributes
+    --nd_prefix_name <ND_PREFIX_NAME>
+              Prefix name for args.nd and argx.nd files
+    --pretrained_model_name <PRETRAINED_MODEL_NAME>
+              Name of a pre-trained MXNet model without extension and epoch
+              number. This model will be merged with args.nd and argx.nd
+              files
+    --save_params_from_nd
+              Enable saving built parameters file from .nd files
+    --legacy_mxnet_model
+              Enable Apache MXNet loader to make a model compatible with the latest Apache MXNet version.
+              Use only if your model was trained with Apache MXNet version lower than 1.0.0
+    --enable_ssd_gluoncv
+              Enable transformation for converting the gluoncv ssd topologies.
+              Use only if your topology is one of ssd gluoncv topologies
+
+
+.. note:: 
+
+   By default, Model Optimizer does not use the Apache MXNet loader. It transforms the topology to another format which is compatible with the latest version of Apache MXNet. However, the Apache MXNet loader is required for models trained with lower version of Apache MXNet. If your model was trained with an Apache MXNet version lower than 1.0.0, specify the ``--legacy_mxnet_model`` key to enable the Apache MXNet loader. Note that the loader does not support models with custom layers. In this case, you must manually recompile Apache MXNet with custom layers and install it in your environment.
+
+Custom Layer Definition
+#######################
 
 Internally, when you run Model Optimizer, it loads the model, goes through the topology, and tries to find each layer type in a list of known layers. Custom layers are layers that are not included in the list. If your topology contains such kind of layers, Model Optimizer classifies them as custom.
 
-## Supported MXNet Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
+Supported MXNet Layers
+#######################
 
-## Frequently Asked Questions (FAQ)
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` page.
 
-Model Optimizer provides explanatory messages when it is unable to complete conversions due to typographical errors, incorrectly used options, or other issues. A message describes the potential cause of the problem and gives a link to [Model Optimizer FAQ](@ref openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ) which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections to help you understand what went wrong.
+Frequently Asked Questions (FAQ)
+################################
 
-## Summary
+Model Optimizer provides explanatory messages when it is unable to complete conversions due to typographical errors, incorrectly used options, or other issues. A message describes the potential cause of the problem and gives a link to :doc:`Model Optimizer FAQ <openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ>` which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections to help you understand what went wrong.
+
+Summary
+########
 
 In this document, you learned:
 
@@ -53,7 +63,13 @@ In this document, you learned:
 * Which MXNet models are supported.
 * How to convert a trained MXNet model by using the Model Optimizer with both framework-agnostic and MXNet-specific command-line options.
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific MXNet models. Here are some examples:
-* [Convert MXNet GluonCV Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models)
-* [Convert MXNet Style Transfer Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet)
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific MXNet models. Here are some examples:
+
+* :doc:`Convert MXNet GluonCV Model <openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models>`
+* :doc:`Convert MXNet Style Transfer Model <openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet>`
+
+@endsphinxdirective
+
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
index 5b4c5b6b3396f7..21ebc3b6547f27 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
@@ -1,28 +1,40 @@
 # Converting an ONNX Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX}
 
-## Introduction to ONNX
-[ONNX](https://github.com/onnx/onnx) is a representation format for deep learning models that allows AI developers to easily transfer models between different frameworks. It is hugely popular among deep learning tools, like PyTorch, Caffe2, Apache MXNet, Microsoft Cognitive Toolkit, and many others.
+@sphinxdirective
 
-## Converting an ONNX Model <a name="Convert_From_ONNX"></a>
+Introduction to ONNX
+####################
 
-This page provides instructions on how to convert a model from the ONNX format to the OpenVINO IR format using Model Optimizer. To use Model Optimizer, install OpenVINO Development Tools by following the [installation instructions](@ref openvino_docs_install_guides_install_dev_tools).
+`ONNX <https://github.com/onnx/onnx>`__ is a representation format for deep learning models that allows AI developers to easily transfer models between different frameworks. It is hugely popular among deep learning tools, like PyTorch, Caffe2, Apache MXNet, Microsoft Cognitive Toolkit, and many others.
+
+Converting an ONNX Model
+########################
+
+This page provides instructions on how to convert a model from the ONNX format to the OpenVINO IR format using Model Optimizer. To use Model Optimizer, install OpenVINO Development Tools by following the :doc:`installation instructions <openvino_docs_install_guides_install_dev_tools>`.
 
 The Model Optimizer process assumes you have an ONNX model that was directly downloaded from a public repository or converted from any framework that supports exporting to the ONNX format.
 
-To convert an ONNX model, run Model Optimizer with the path to the input model `.onnx` file:
+To convert an ONNX model, run Model Optimizer with the path to the input model ``.onnx`` file:
+
+.. code-block:: sh
+
+   mo --input_model <INPUT_MODEL>.onnx
+
+There are no ONNX specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see the *General Conversion Parameters* section in the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
+
+Supported ONNX Layers
+#####################
+
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` page.
 
-```sh
- mo --input_model <INPUT_MODEL>.onnx
-```
+Additional Resources
+####################
 
-There are no ONNX specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see the *General Conversion Parameters* section in the [Converting a Model to Intermediate Representation (IR)](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific ONNX models. Here are some examples:
 
-## Supported ONNX Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
+* :doc:`Convert ONNX Faster R-CNN Model <openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN>`
+* :doc:`Convert ONNX GPT-2 Model <openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2>`
+* :doc:`Convert ONNX Mask R-CNN Model <openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN>`
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific ONNX models. Here are some examples:
-* [Convert ONNX Faster R-CNN Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN)
-* [Convert ONNX GPT-2 Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2)
-* [Convert ONNX Mask R-CNN Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN)
+@endsphinxdirective
 
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
index 1388193121a58f..1b6aa7fa824596 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
@@ -1,23 +1,29 @@
 # Converting a PaddlePaddle Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}
 
-To convert a PaddlePaddle model, use the `mo` script and specify the path to the input `.pdmodel` model file:
+@sphinxdirective
+
+To convert a PaddlePaddle model, use the ``mo`` script and specify the path to the input ``.pdmodel`` model file:
+
+.. code-block:: sh
+
+  mo --input_model <INPUT_MODEL>.pdmodel
 
-```sh
- mo --input_model <INPUT_MODEL>.pdmodel
-```
 **For example,** this command converts a yolo v3 PaddlePaddle network to OpenVINO IR network:
 
-```sh
- mo --input_model=yolov3.pdmodel --input=image,im_shape,scale_factor --input_shape=[1,3,608,608],[1,2],[1,2] --reverse_input_channels --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1
-```
+.. code-block:: sh
+
+  mo --input_model=yolov3.pdmodel --input=image,im_shape,scale_factor --input_shape=[1,3,608,608],[1,2],[1,2] --reverse_input_channels --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1
+
+Supported PaddlePaddle Layers
+#############################
+
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` page.
 
-## Supported PaddlePaddle Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
+Officially Supported PaddlePaddle Models
+########################################
 
-## Officially Supported PaddlePaddle Models
 The following PaddlePaddle models have been officially validated and confirmed to work (as of OpenVINO 2022.1):
 
-@sphinxdirective
 .. list-table::
    :widths: 20 25 55
    :header-rows: 1
@@ -67,10 +73,16 @@ The following PaddlePaddle models have been officially validated and confirmed t
    * - BERT
      - language representation
      -  Models are exported from `PaddleNLP <https://github.com/PaddlePaddle/PaddleNLP/tree/v2.1.1>`_. Refer to `README.md <https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/bert#readme>`_.   
-@endsphinxdirective
 
-## Frequently Asked Questions (FAQ)
-When Model Optimizer is unable to run to completion due to typographical errors, incorrectly used options, or other issues, it provides explanatory messages. They describe the potential cause of the problem and give a link to the [Model Optimizer FAQ](@ref openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ), which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
+Frequently Asked Questions (FAQ)
+################################
+
+When Model Optimizer is unable to run to completion due to typographical errors, incorrectly used options, or other issues, it provides explanatory messages. They describe the potential cause of the problem and give a link to the :doc:`Model Optimizer FAQ <openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ>`, which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
+
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific PaddlePaddle models.
+
+@endsphinxdirective
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific PaddlePaddle models.
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
index c49ef7f2e82a86..c05f15314d4fa6 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
@@ -1,39 +1,49 @@
 # Converting a PyTorch Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch}
 
+@sphinxdirective
+
 The PyTorch framework is supported through export to the ONNX format. In order to optimize and deploy a model that was trained with it:
 
-1. [Export a PyTorch model to ONNX](#export-to-onnx).
-2. [Convert the ONNX model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation](@ref openvino_docs_MO_DG_IR_and_opsets) of the model based on the trained network topology, weights, and biases values.
+1. `Export a PyTorch model to ONNX <#Exporting-a-PyTorch-Model-to-ONNX-Format>`__.
+2. :doc:`Convert the ONNX model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX>` to produce an optimized :doc:`Intermediate Representation <openvino_docs_MO_DG_IR_and_opsets>` of the model based on the trained network topology, weights, and biases values.
+
+Exporting a PyTorch Model to ONNX Format 
+########################################
 
-## Exporting a PyTorch Model to ONNX Format <a name="export-to-onnx"></a>
-PyTorch models are defined in Python. To export them, use the `torch.onnx.export()` method. The code to
+PyTorch models are defined in Python. To export them, use the ``torch.onnx.export()`` method. The code to
 evaluate or test the model is usually provided with its code and can be used for its initialization and export.
 The export to ONNX is crucial for this process, but it is covered by PyTorch framework, therefore, It will not be covered here in detail. 
-For more information, refer to the [Exporting PyTorch models to ONNX format](https://pytorch.org/docs/stable/onnx.html) guide.
-
-To export a PyTorch model, you need to obtain the model as an instance of `torch.nn.Module` class and call the `export` function.
-
-```python
-import torch
-
-# Instantiate your model. This is just a regular PyTorch model that will be exported in the following steps.
-model = SomeModel()
-# Evaluate the model to switch some operations from training mode to inference.
-model.eval()
-# Create dummy input for the model. It will be used to run the model inside export function.
-dummy_input = torch.randn(1, 3, 224, 224)
-# Call the export function
-torch.onnx.export(model, (dummy_input, ), 'model.onnx')
-```
-
-## Known Issues
-
-* As of version 1.8.1, not all PyTorch operations can be exported to ONNX opset 9 which is used by default.
-It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use `opset_version`
-option of the `torch.onnx.export`. For more information about ONNX opset, refer to the [Operator Schemas](https://github.com/onnx/onnx/blob/master/docs/Operators.md) page.
-
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific PyTorch models. Here are some examples:
-* [Convert PyTorch BERT-NER Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner)
-* [Convert PyTorch RCAN Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN)
-* [Convert PyTorch YOLACT Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT)
+For more information, refer to the `Exporting PyTorch models to ONNX format <https://pytorch.org/docs/stable/onnx.html>`__ guide.
+
+To export a PyTorch model, you need to obtain the model as an instance of ``torch.nn.Module`` class and call the ``export`` function.
+
+.. code-block:: python
+
+   import torch
+
+   # Instantiate your model. This is just a regular PyTorch model that will be exported in the following steps.
+   model = SomeModel()
+   # Evaluate the model to switch some operations from training mode to inference.
+   model.eval()
+   # Create dummy input for the model. It will be used to run the model inside export function.
+   dummy_input = torch.randn(1, 3, 224, 224)
+   # Call the export function
+   torch.onnx.export(model, (dummy_input, ), 'model.onnx')
+
+
+Known Issues
+############
+
+As of version 1.8.1, not all PyTorch operations can be exported to ONNX opset 9 which is used by default.
+It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use ``opset_version`` option of the ``torch.onnx.export``. For more information about ONNX opset, refer to the `Operator Schemas <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`__ page.
+
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific PyTorch models. Here are some examples:
+
+* :doc:`Convert PyTorch BERT-NER Model <openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner>`
+* :doc:`Convert PyTorch RCAN Model <openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN>`
+* :doc:`Convert PyTorch YOLACT Model <openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT>`
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
index fd177ec290c2cb..18c2a42c960620 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
@@ -1,151 +1,186 @@
 # Converting a TensorFlow Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow}
 
+@sphinxdirective
+
 This page provides general instructions on how to convert a model from a TensorFlow format to the OpenVINO IR format using Model Optimizer. The instructions are different depending on whether your model was created with TensorFlow v1.X or TensorFlow v2.X.
 
-To use Model Optimizer, install OpenVINO Development Tools by following the [installation instructions](@ref openvino_docs_install_guides_install_dev_tools).
+To use Model Optimizer, install OpenVINO Development Tools by following the :doc:`installation instructions <openvino_docs_install_guides_install_dev_tools>`.
+
+Converting TensorFlow 1 Models 
+###############################
+
+Converting Frozen Model Format 
++++++++++++++++++++++++++++++++
 
-## Converting TensorFlow 1 Models <a name="Convert_From_TF1X"></a>
+To convert a TensorFlow model, use the ``*mo*`` script to simply convert a model with a path to the input model ``*.pb*`` file:
 
-### Converting Frozen Model Format <a name="Convert_From_TF"></a>
-To convert a TensorFlow model, use the *`mo`* script to simply convert a model with a path to the input model *`.pb`* file:
+.. code-block:: cpp
 
-```sh
- mo --input_model <INPUT_MODEL>.pb
-```
+   mo --input_model <INPUT_MODEL>.pb
+
+
+Converting Non-Frozen Model Formats 
++++++++++++++++++++++++++++++++++++
 
-### Converting Non-Frozen Model Formats <a name="loading-nonfrozen-models"></a>
 There are three ways to store non-frozen TensorFlow models and convert them by Model Optimizer:
 
-1. **Checkpoint**. In this case, a model consists of two files: `inference_graph.pb` (or `inference_graph.pbtxt`) and `checkpoint_file.ckpt`.
-If you do not have an inference graph file, refer to the [Freezing Custom Models in Python](#freeze-the-tensorflow-model) section.
-To convert the model with the inference graph in `.pb` format, run the `mo` script with a path to the checkpoint file:
-```sh
- mo --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT>
-```
-To convert the model with the inference graph in `.pbtxt` format, run the `mo` script with a path to the checkpoint file:
-```sh
- mo --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text
-```
-
-2. **MetaGraph**. In this case, a model consists of three or four files stored in the same directory: `model_name.meta`, `model_name.index`,
-`model_name.data-00000-of-00001` (the numbers may vary), and `checkpoint` (optional).
-To convert such TensorFlow model, run the `mo` script with a path to the MetaGraph `.meta` file:
-```sh
- mo --input_meta_graph <INPUT_META_GRAPH>.meta
-```
-
-3. **SavedModel format**. In this case, a model consists of a special directory with a `.pb` file
-and several subfolders: `variables`, `assets`, and `assets.extra`. For more information about the SavedModel directory, refer to the [README](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model#components) file in the TensorFlow repository.
-To convert such TensorFlow model, run the `mo` script with a path to the SavedModel directory:
-```sh
- mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
-```
+1. **Checkpoint**. In this case, a model consists of two files: ``inference_graph.pb`` (or ``inference_graph.pbtxt``) and ``checkpoint_file.ckpt``.
+If you do not have an inference graph file, refer to the `Freezing Custom Models in Python <#Freezing-Custom-Models-in-Python>`__  section.
+To convert the model with the inference graph in ``.pb`` format, run the `mo` script with a path to the checkpoint file:
+
+.. code-block:: cpp
+
+   mo --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT>
+
+To convert the model with the inference graph in ``.pbtxt`` format, run the ``mo`` script with a path to the checkpoint file:
+
+.. code-block:: cpp
+
+   mo --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text
+
+
+2. **MetaGraph**. In this case, a model consists of three or four files stored in the same directory: ``model_name.meta``, ``model_name.index``,
+``model_name.data-00000-of-00001`` (the numbers may vary), and ``checkpoint`` (optional).
+To convert such TensorFlow model, run the `mo` script with a path to the MetaGraph ``.meta`` file:
+
+.. code-block:: cpp
+
+   mo --input_meta_graph <INPUT_META_GRAPH>.meta
+
+
+3. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file
+and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model#components>`__ file in the TensorFlow repository.
+To convert such TensorFlow model, run the ``mo`` script with a path to the SavedModel directory:
+
+.. code-block:: cpp
+
+   mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
+
 
 You can convert TensorFlow 1.x SavedModel format in the environment that has a 1.x or 2.x version of TensorFlow. However, TensorFlow 2.x SavedModel format strictly requires the 2.x version of TensorFlow.
-If a model contains operations currently unsupported by OpenVINO, prune these operations by explicit specification of input nodes using the `--input` option.
-To determine custom input nodes, display a graph of the model in TensorBoard. To generate TensorBoard logs of the graph, use the `--tensorboard_logs` option.
-TensorFlow 2.x SavedModel format has a specific graph due to eager execution. In case of pruning, find custom input nodes in the `StatefulPartitionedCall/*` subgraph of TensorFlow 2.x SavedModel format.
+If a model contains operations currently unsupported by OpenVINO, prune these operations by explicit specification of input nodes using the ``--input`` option.
+To determine custom input nodes, display a graph of the model in TensorBoard. To generate TensorBoard logs of the graph, use the ``--tensorboard_logs`` option.
+TensorFlow 2.x SavedModel format has a specific graph due to eager execution. In case of pruning, find custom input nodes in the ``StatefulPartitionedCall/*`` subgraph of TensorFlow 2.x SavedModel format.
+
+Freezing Custom Models in Python 
+++++++++++++++++++++++++++++++++
 
-### Freezing Custom Models in Python <a name="freeze-the-tensorflow-model"></a>
 When a network is defined in Python code, you have to create an inference graph file. Graphs are usually built in a form
 that allows model training. That means all trainable parameters are represented as variables in the graph.
 To be able to use such graph with Model Optimizer, it should be frozen and dumped to a file with the following code:
 
-```python
-import tensorflow as tf
-from tensorflow.python.framework import graph_io
-frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["name_of_the_output_node"])
-graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
-```
+.. code-block:: python 
+
+   import tensorflow as tf
+   from tensorflow.python.framework import graph_io
+   frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["name_of_the_output_node"])
+   graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
 
 Where:
 
-* `sess` is the instance of the TensorFlow Session object where the network topology is defined.
-* `["name_of_the_output_node"]` is the list of output node names in the graph; `frozen` graph will
-    include only those nodes from the original `sess.graph_def` that are directly or indirectly used
-    to compute given output nodes. The `'name_of_the_output_node'` is an example of a possible output
-    node name. You should derive the names based on your own graph.
-* `./` is the directory where the inference graph file should be generated.
-* `inference_graph.pb` is the name of the generated inference graph file.
-* `as_text` specifies whether the generated file should be in human readable text format or binary.
+* ``sess`` is the instance of the TensorFlow Session object where the network topology is defined.
+* ``["name_of_the_output_node"]`` is the list of output node names in the graph; ``frozen`` graph will include only those nodes from the original ``sess.graph_def`` that are directly or indirectly used to compute given output nodes. The ``'name_of_the_output_node'`` is an example of a possible output node name. You should derive the names based on your own graph.
+* ``./`` is the directory where the inference graph file should be generated.
+* ``inference_graph.pb`` is the name of the generated inference graph file.
+* ``as_text`` specifies whether the generated file should be in human readable text format or binary.
+
+Converting TensorFlow 2 Models 
+###############################
 
-## Converting TensorFlow 2 Models <a name="Convert_From_TF2X"></a>
 To convert TensorFlow 2 models, ensure that `openvino-dev[tensorflow2]` is installed via `pip`.
 TensorFlow 2.X officially supports two model formats: SavedModel and Keras H5 (or HDF5).
 Below are the instructions on how to convert each of them.
 
-### SavedModel Format
-A model in the SavedModel format consists of a directory with a `saved_model.pb` file and two subfolders: `variables` and `assets`.
+SavedModel Format
++++++++++++++++++
+
+A model in the SavedModel format consists of a directory with a ``saved_model.pb`` file and two subfolders: ``variables`` and ``assets``.
 To convert such a model, run the `mo` script with a path to the SavedModel directory:
 
-```sh
- mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
-```
+.. code-block:: cpp
+
+   mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
 
 TensorFlow 2 SavedModel format strictly requires the 2.x version of TensorFlow installed in the
 environment for conversion to the Intermediate Representation (IR).
 
 If a model contains operations currently unsupported by OpenVINO™,
-prune these operations by explicit specification of input nodes using the `--input` or `--output`
+prune these operations by explicit specification of input nodes using the ``--input`` or ``--output``
 options. To determine custom input nodes, visualize a model graph in the TensorBoard.
 
-To generate TensorBoard logs of the graph, use the Model Optimizer `--tensorboard_logs` command-line
+To generate TensorBoard logs of the graph, use the Model Optimizer ``--tensorboard_logs`` command-line
 option.
 
 TensorFlow 2 SavedModel format has a specific graph structure due to eager execution. In case of
-pruning, find custom input nodes in the `StatefulPartitionedCall/*` subgraph.
+pruning, find custom input nodes in the ``StatefulPartitionedCall/*`` subgraph.
+
+Keras H5
+++++++++
 
-### Keras H5
 If you have a model in the HDF5 format, load the model using TensorFlow 2 and serialize it in the
 SavedModel format. Here is an example of how to do it:
 
-```python
-import tensorflow as tf
-model = tf.keras.models.load_model('model.h5')
-tf.saved_model.save(model,'model')
-```
+.. code-block:: python
+
+   import tensorflow as tf
+   model = tf.keras.models.load_model('model.h5')
+   tf.saved_model.save(model,'model')
+
 
 The Keras H5 model with a custom layer has specifics to be converted into SavedModel format.
-For example, the model with a custom layer `CustomLayer` from `custom_layer.py` is converted as follows:
+For example, the model with a custom layer ``CustomLayer`` from ``custom_layer.py`` is converted as follows:
+
+.. code-block:: python
+
+   import tensorflow as tf
+   from custom_layer import CustomLayer
+   model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': CustomLayer})
+   tf.saved_model.save(model,'model')
 
-```python
-import tensorflow as tf
-from custom_layer import CustomLayer
-model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': CustomLayer})
-tf.saved_model.save(model,'model')
-```
 
 Then follow the above instructions for the SavedModel format.
 
-> **NOTE**: Do not use other hacks to resave TensorFlow 2 models into TensorFlow 1 formats.
+.. note:: 
+
+   Do not use other hacks to resave TensorFlow 2 models into TensorFlow 1 formats.
+
+Command-Line Interface (CLI) Examples Using TensorFlow-Specific Parameters
+##########################################################################
 
-## Command-Line Interface (CLI) Examples Using TensorFlow-Specific Parameters
 * Launching the Model Optimizer for Inception V1 frozen model when model file is a plain text protobuf:
 
-```sh
- mo --input_model inception_v1.pbtxt --input_model_is_text -b 1
-```
+.. code-block:: cpp
+
+   mo --input_model inception_v1.pbtxt --input_model_is_text -b 1
 
-* Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir `/tmp/log_dir`
 
-```sh
- mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir
-```
+* Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir ``/tmp/log_dir``
 
-* Launching the Model Optimizer for BERT model in the SavedModel format, with three inputs. Specify explicitly the input shapes
-where the batch size and the sequence length equal 2 and 30 respectively.
+.. code-block:: cpp
 
-```sh
-mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
-```
+   mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir
 
-## Supported TensorFlow and TensorFlow 2 Keras Layers
-For the list of supported standard layers, refer to the [Supported Framework Layers ](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
 
-## Frequently Asked Questions (FAQ)
-The Model Optimizer provides explanatory messages if it is unable to run to completion due to typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](@ref openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ). The FAQ provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
+* Launching the Model Optimizer for BERT model in the SavedModel format, with three inputs. Specify explicitly the input shapes where the batch size and the sequence length equal 2 and 30 respectively.
+
+.. code-block:: cpp
+
+   mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
+
+
+Supported TensorFlow and TensorFlow 2 Keras Layers
+##################################################
+
+For the list of supported standard layers, refer to the :doc:`Supported Framework Layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` page.
+
+Frequently Asked Questions (FAQ)
+################################
+
+The Model Optimizer provides explanatory messages if it is unable to run to completion due to typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the :doc:`Model Optimizer FAQ <openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ>`. The FAQ provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
+
+Summary
+#######
 
-## Summary
 In this document, you learned:
 
 * Basic information about how the Model Optimizer works with TensorFlow models.
@@ -153,9 +188,13 @@ In this document, you learned:
 * How to freeze a TensorFlow model.
 * How to convert a trained TensorFlow model using the Model Optimizer with both framework-agnostic and TensorFlow-specific command-line options.
 
-## Additional Resources
-See the [Model Conversion Tutorials](@ref openvino_docs_MO_DG_prepare_model_convert_model_tutorials) page for a set of tutorials providing step-by-step instructions for converting specific TensorFlow models. Here are some examples:
-* [Convert TensorFlow EfficientDet Models](@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models)
-* [Convert TensorFlow FaceNet Models](@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow)
-* [Convert TensorFlow Object Detection API Models](@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models)
+Additional Resources
+####################
+
+See the :doc:`Model Conversion Tutorials <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific TensorFlow models. Here are some examples:
+
+* :doc:`Convert TensorFlow EfficientDet Models <openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models>`
+* :doc:`Convert TensorFlow FaceNet Models <openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow>`
+* :doc:`Convert TensorFlow Object Detection API Models <openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models>`
 
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md b/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md
index a6b9804b5e8235..05879c7f94c55b 100644
--- a/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md
+++ b/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md
@@ -15,22 +15,22 @@
    openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi
    openvino_docs_MO_DG_prepare_model_convert_model_tutorials
 
-@endsphinxdirective
-
 
 **OpenVINO IR (Intermediate Representation)** - the proprietary format of OpenVINO™, benefiting from the full extent of its features.
 
-**ONNX, PaddlePaddle, TensorFlow** - formats supported directly, which means they can be used with OpenVINO Runtime without any prior conversion. For a guide on how to run inference on ONNX, PaddlePaddle, or TensorFlow, see how to [Integrate OpenVINO™ with Your Application](../../../OV_Runtime_UG/integrate_with_your_application.md).
+**ONNX, PaddlePaddle, TensorFlow** - formats supported directly, which means they can be used with OpenVINO Runtime without any prior conversion. For a guide on how to run inference on ONNX, PaddlePaddle, or TensorFlow, see how to :doc:`Integrate OpenVINO™ with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`.
 
 **MXNet, Caffe, Kaldi** - formats supported indirectly, which means they need to be converted to OpenVINO IR before running inference. The conversion is done with Model Optimizer and in some cases may involve intermediate steps.
 
 Refer to the following articles for details on conversion for different formats and models:
 
-* [How to convert ONNX](./Convert_Model_From_ONNX.md)
-* [How to convert PaddlePaddle](./Convert_Model_From_Paddle.md)
-* [How to convert TensorFlow](./Convert_Model_From_TensorFlow.md)
-* [How to convert MXNet](./Convert_Model_From_MxNet.md)
-* [How to convert Caffe](./Convert_Model_From_Caffe.md)
-* [How to convert Kaldi](./Convert_Model_From_Kaldi.md)
+* :doc:`How to convert ONNX <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX>`
+* :doc:`How to convert PaddlePaddle <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle>`
+* :doc:`How to convert TensorFlow <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>`
+* :doc:`How to convert MXNet <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>`
+* :doc:`How to convert Caffe <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>`
+* :doc:`How to convert Kaldi <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi>`
 
-* [Conversion examples for specific models](./Convert_Model_Tutorials.md)
+* :doc:`Conversion examples for specific models <openvino_docs_MO_DG_prepare_model_convert_model_tutorials>`
+
+@endsphinxdirective

From b3503c8b7a719bc3bec684c8df099bdd70cc9812 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 30 Mar 2023 17:12:50 +0400
Subject: [PATCH 178/296] Fixed coverity for ov::Any (#16647)

---
 src/core/src/any.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp
index d1ecbd0716b27e..be1018dc8ea917 100644
--- a/src/core/src/any.cpp
+++ b/src/core/src/any.cpp
@@ -217,13 +217,13 @@ void Read<std::tuple<unsigned int, unsigned int, unsigned int>>::operator()(
 }
 
 void Read<AnyMap>::operator()(std::istream& is, AnyMap& map) const {
-    std::string key, value;
     char c;
 
     is >> c;
     OPENVINO_ASSERT(c == '{', "Failed to parse ov::AnyMap. Starting symbols is not '{', it's ", c);
 
     while (c != '}') {
+        std::string key, value;
         std::getline(is, key, ':');
         size_t enclosed_container_level = 0;
 

From ab96cc939b0a08b173c88b7da0816bad81a606a2 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Thu, 30 Mar 2023 16:59:58 +0200
Subject: [PATCH 179/296] DOCS shift to rst - Embedding Preprocessing
 Computation (#16659)

---
 .../prepare_model/Additional_Optimizations.md |  184 ++-
 docs/MO_DG/prepare_model/FP16_Compression.md  |   35 +-
 .../prepare_model/Model_Optimizer_FAQ.md      | 1135 +++++++++++------
 3 files changed, 881 insertions(+), 473 deletions(-)

diff --git a/docs/MO_DG/prepare_model/Additional_Optimizations.md b/docs/MO_DG/prepare_model/Additional_Optimizations.md
index 8f2459ebdf20bc..213ec14ab5d7af 100644
--- a/docs/MO_DG/prepare_model/Additional_Optimizations.md
+++ b/docs/MO_DG/prepare_model/Additional_Optimizations.md
@@ -1,102 +1,156 @@
 # Embedding Preprocessing Computation {#openvino_docs_MO_DG_Additional_Optimization_Use_Cases}
 
-Input data for inference can be different from the training dataset and requires additional preprocessing before inference.
-To accelerate the whole pipeline including preprocessing and inference, Model Optimizer provides special parameters such as `--mean_values`,
+@sphinxdirective
 
-`--scale_values`, `--reverse_input_channels`, and `--layout`. Based on these parameters, Model Optimizer generates OpenVINO IR with additionally
-inserted sub-graphs to perform the defined preprocessing. This preprocessing block can perform mean-scale normalization of input data,
-reverting data along channel dimension, and changing the data layout. 
-See the following sections for details on the parameters, or the [Overview of Preprocessing API](../../OV_Runtime_UG/preprocessing_overview.md) for the same functionality in OpenVINO Runtime.
+Input data for inference can be different from the training dataset and requires 
+additional preprocessing before inference. To accelerate the whole pipeline including 
+preprocessing and inference, Model Optimizer provides special parameters such as ``--mean_values``,
 
-## Specifying Layout
+``--scale_values``, ``--reverse_input_channels``, and ``--layout``. Based on these 
+parameters, Model Optimizer generates OpenVINO IR with additionally inserted sub-graphs 
+to perform the defined preprocessing. This preprocessing block can perform mean-scale 
+normalization of input data, reverting data along channel dimension, and changing 
+the data layout. See the following sections for details on the parameters, or the 
+:doc:`Overview of Preprocessing API <openvino_docs_OV_UG_Preprocessing_Overview>` 
+for the same functionality in OpenVINO Runtime.
 
-You may need to set input layouts, as it is required by some preprocessing, for example, setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB).
+Specifying Layout
+#################
 
-Layout defines the meaning of dimensions in shape and can be specified for both inputs and outputs. Some preprocessing requires to set input layouts, for example, setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB).
+You may need to set input layouts, as it is required by some preprocessing, for 
+example, setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB).
 
-For the layout syntax, check the [Layout API overview](../../OV_Runtime_UG/layout_overview.md). 
-To specify the layout, you can use the `--layout` option followed by the layout value. 
+Layout defines the meaning of dimensions in shape and can be specified for both 
+inputs and outputs. Some preprocessing requires to set input layouts, for example, 
+setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB).
 
-For example, the following command specifies the `NHWC` layout for a Tensorflow `nasnet_large` model that was exported to the ONNX format:
+For the layout syntax, check the :doc:`Layout API overview <openvino_docs_OV_UG_Layout_Overview>`.
+To specify the layout, you can use the ``--layout`` option followed by the layout value. 
 
-```
-mo --input_model tf_nasnet_large.onnx --layout nhwc
-```
+For example, the following command specifies the ``NHWC`` layout for a Tensorflow 
+``nasnet_large`` model that was exported to the ONNX format:
 
-Additionally, if a model has more than one input or needs both input and output layouts specified, you need to provide the name of each input or output to apply the layout.
+.. code-block:: sh
 
-For example, the following command specifies the layout for an ONNX `Yolo v3 Tiny` model with its first input `input_1` in `NCHW` layout and second input `image_shape` having two dimensions: batch and size of the image expressed as the `N?` layout:
+   mo --input_model tf_nasnet_large.onnx --layout nhwc
 
-```
-mo --input_model yolov3-tiny.onnx --layout input_1(nchw),image_shape(n?)
-```
 
-## Changing Model Layout
+Additionally, if a model has more than one input or needs both input and output 
+layouts specified, you need to provide the name of each input or output to apply the layout.
+
+For example, the following command specifies the layout for an ONNX ``Yolo v3 Tiny`` 
+model with its first input ``input_1`` in ``NCHW`` layout and second input ``image_shape`` 
+having two dimensions: batch and size of the image expressed as the ``N?`` layout:
+
+.. code-block:: sh
+
+   mo --input_model yolov3-tiny.onnx --layout input_1(nchw),image_shape(n?)
+
+
+Changing Model Layout
+#####################
 
 Changing the model layout may be necessary if it differs from the one presented by input data. 
-Use either `--layout` or `--source_layout` with `--target_layout` to change the layout.
+Use either ``--layout`` or ``--source_layout`` with ``--target_layout`` to change the layout.
+
+For example, for the same ``nasnet_large`` model mentioned previously, you can use 
+the following commands to provide data in the ``NCHW`` layout:
 
-For example, for the same `nasnet_large` model mentioned previously, you can use the following commands to provide data in the `NCHW` layout:
+.. code-block:: sh
 
-```
-mo --input_model tf_nasnet_large.onnx --source_layout nhwc --target_layout nchw
-mo --input_model tf_nasnet_large.onnx --layout "nhwc->nchw"
-```
+   mo --input_model tf_nasnet_large.onnx --source_layout nhwc --target_layout nchw
+   mo --input_model tf_nasnet_large.onnx --layout "nhwc->nchw"
 
-Again, if a model has more than one input or needs both input and output layouts specified, you need to provide the name of each input or output to apply the layout.
 
-For example, to provide data in the `NHWC` layout for the `Yolo v3 Tiny` model mentioned earlier, use the following commands:
+Again, if a model has more than one input or needs both input and output layouts 
+specified, you need to provide the name of each input or output to apply the layout.
 
-```
-mo --input_model yolov3-tiny.onnx --source_layout "input_1(nchw),image_shape(n?)" --target_layout "input_1(nhwc)"
-mo --input_model yolov3-tiny.onnx --layout "input_1(nchw->nhwc),image_shape(n?)"
-```
+For example, to provide data in the ``NHWC`` layout for the `Yolo v3 Tiny` model 
+mentioned earlier, use the following commands:
 
-## Specifying Mean and Scale Values
-Neural network models are usually trained with the normalized input data. This means that the input data values are converted to be in a specific range,
-for example, `[0, 1]` or `[-1, 1]`. Sometimes, the mean values (mean images) are subtracted from the input data values as part of the preprocessing.
+.. code-block:: sh
+
+   mo --input_model yolov3-tiny.onnx --source_layout "input_1(nchw),image_shape(n?)" --target_layout "input_1(nhwc)"
+   mo --input_model yolov3-tiny.onnx --layout "input_1(nchw->nhwc),image_shape(n?)"
+
+
+Specifying Mean and Scale Values
+################################
+
+Neural network models are usually trained with the normalized input data. This 
+means that the input data values are converted to be in a specific range, for example, 
+``[0, 1]`` or ``[-1, 1]``. Sometimes, the mean values (mean images) are subtracted 
+from the input data values as part of the preprocessing.
 
 There are two cases of how the input data preprocessing is implemented.
- * The input preprocessing operations are a part of a model. 
 
-  In this case, the application does not perform a separate preprocessing step: everything is embedded into the model itself. Model Optimizer will generate the OpenVINO IR format with required preprocessing operations, and no `mean` and `scale` parameters are required.
- * The input preprocessing operations are not a part of a model and the preprocessing is performed within the application which feeds the model with input data.
+* The input preprocessing operations are a part of a model.
+
+  In this case, the application does not perform a separate preprocessing step: 
+  everything is embedded into the model itself. Model Optimizer will generate the 
+  OpenVINO IR format with required preprocessing operations, and no ``mean`` and 
+  ``scale`` parameters are required.
+* The input preprocessing operations are not a part of a model and the preprocessing 
+  is performed within the application which feeds the model with input data.
 
-  In this case, information about mean/scale values should be provided to Model Optimizer to embed it to the generated OpenVINO IR format.
+  In this case, information about mean/scale values should be provided to Model 
+  Optimizer to embed it to the generated OpenVINO IR format.
 
-Model Optimizer provides command-line parameters to specify the values: `--mean_values`, `--scale_values`, `--scale`.
-Using these parameters, Model Optimizer embeds the corresponding preprocessing block for mean-value normalization of the input data
+Model Optimizer provides command-line parameters to specify the values: ``--mean_values``, 
+``--scale_values``, ``--scale``. Using these parameters, Model Optimizer embeds the 
+corresponding preprocessing block for mean-value normalization of the input data 
 and optimizes this block so that the preprocessing takes negligible time for inference.
 
-For example, the following command runs Model Optimizer for the PaddlePaddle UNet model and applies mean-scale normalization to the input data:
+For example, the following command runs Model Optimizer for the PaddlePaddle UNet 
+model and applies mean-scale normalization to the input data:
+
+.. code-block:: sh
+
+   mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
+
+
+Reversing Input Channels
+########################
+
+Sometimes, input images for your application can be of the RGB (or BGR) format 
+and the model is trained on images of the BGR (or RGB) format, which is in the 
+opposite order of color channels. In this case, it is important to preprocess the 
+input images by reverting the color channels before inference.
+
+To embed this preprocessing step into OpenVINO IR, Model Optimizer provides the 
+``--reverse_input_channels`` command-line parameter to shuffle the color channels.
+
+The ``--reverse_input_channels`` parameter can be used to preprocess the model 
+input in the following cases:
+
+* Only one dimension in the input shape has a size equal to ``3``.
+* One dimension has an undefined size and is marked as ``C`` channel using ``layout`` parameters.
+
+Using the ``--reverse_input_channels`` parameter, Model Optimizer embeds the corresponding 
+preprocessing block for reverting the input data along channel dimension and optimizes 
+this block so that the preprocessing takes only negligible time for inference.
+
+For example, the following command launches Model Optimizer for the TensorFlow AlexNet 
+model and embeds the ``reverse_input_channel`` preprocessing block into OpenVINO IR:
 
-```sh
-mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
-```
+.. code-block:: sh
 
-## Reversing Input Channels <a name="when_to_reverse_input_channels"></a>
-Sometimes, input images for your application can be of the RGB (or BGR) format and the model is trained on images of the BGR (or RGB) format,
-which is in the opposite order of color channels. In this case, it is important to preprocess the input images by reverting the color channels before inference.
+   mo --input_model alexnet.pb --reverse_input_channels
 
-To embed this preprocessing step into OpenVINO IR, Model Optimizer provides the `--reverse_input_channels` command-line parameter to shuffle the color channels.
 
-The `--reverse_input_channels` parameter can be used to preprocess the model input in the following cases:
- * Only one dimension in the input shape has a size equal to 3.
- * One dimension has an undefined size and is marked as `C` channel using `layout` parameters.
+.. note::
 
-Using the `--reverse_input_channels` parameter, Model Optimizer embeds the corresponding preprocessing block for reverting
-the input data along channel dimension and optimizes this block so that the preprocessing takes only negligible time for inference.
+   If both mean and scale values are specified, the mean is subtracted first and 
+   then the scale is applied regardless of the order of options in the command-line. 
+   Input values are *divided* by the scale value(s). If the ``--reverse_input_channels`` 
+   option is also used, ``reverse_input_channels`` will be applied first, then ``mean`` 
+   and after that ``scale``. The data flow in the model looks as follows: 
+   ``Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model``.
 
-For example, the following command launches Model Optimizer for the TensorFlow AlexNet model and embeds the `reverse_input_channel` preprocessing block into OpenVINO IR:
+Additional Resources
+####################
 
-```sh
-mo --input_model alexnet.pb --reverse_input_channels
-```
+* :doc:`Overview of Preprocessing API <openvino_docs_OV_UG_Preprocessing_Overview>`
 
-> **NOTE**: If both mean and scale values are specified, the mean is subtracted first and then the scale is applied regardless of the order of options
-in the command-line. Input values are *divided* by the scale value(s). If the `--reverse_input_channels` option is also used, `reverse_input_channels`
-will be applied first, then `mean` and after that `scale`. The data flow in the model looks as follows:
-`Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model`.
+@endsphinxdirective
 
-## Additional Resources
-* [Overview of Preprocessing API](../../OV_Runtime_UG/preprocessing_overview.md)
diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md
index 9216243f85a712..96086e596b7686 100644
--- a/docs/MO_DG/prepare_model/FP16_Compression.md
+++ b/docs/MO_DG/prepare_model/FP16_Compression.md
@@ -1,16 +1,29 @@
 # Compressing a Model to FP16 {#openvino_docs_MO_DG_FP16_Compression}
 
-Model Optimizer by default converts all floating-point weights to `FP16` data type. The resulting IR is called
-compressed `FP16` model. The resulting model will occupy about twice as less space in the file system, 
-but it may have some accuracy drop. For most models, the accuracy drop is negligible. 
-But in case if accuracy drop is significant user can disable compression explicitly.
+@sphinxdirective
 
-By default, models are compressed to `FP16`, but you can disable compression by specifying `--compress_to_fp16=False`:
-```
-mo --input_model INPUT_MODEL --compress_to_fp16=False
-```
+Model Optimizer by default converts all floating-point weights to ``FP16`` data type. 
+The resulting IR is called compressed ``FP16`` model. The resulting model will occupy 
+about twice as less space in the file system, but it may have some accuracy drop. 
+For most models, the accuracy drop is negligible. But in case if accuracy drop is 
+significant user can disable compression explicitly.
 
-For details on how plugins handle compressed `FP16` models, see [Working with devices](../../OV_Runtime_UG/supported_plugins/Device_Plugins.md).
+By default, models are compressed to ``FP16``, but you can disable compression by 
+specifying ``--compress_to_fp16=False``:
 
-> **NOTE**: `FP16` compression is sometimes used as the initial step for `INT8` quantization.
-> Refer to the [Post-training optimization](../../../tools/pot/docs/Introduction.md) guide for more information about that.
+.. code-block:: sh
+
+   mo --input_model INPUT_MODEL --compress_to_fp16=False
+
+
+For details on how plugins handle compressed ``FP16`` models, see 
+:doc:`Working with devices <openvino_docs_OV_UG_Working_with_devices>`.
+
+.. note::
+
+   ``FP16`` compression is sometimes used as the initial step for ``INT8`` quantization. 
+   Refer to the :doc:`Post-training optimization <pot_introduction>` guide for more 
+   information about that.
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
index 9f0d1a478a53fd..8fe92fae1bba52 100644
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@@ -1,648 +1,989 @@
 # Model Optimizer Frequently Asked Questions  {#openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ}
 
-If your question is not covered by the topics below, use the [OpenVINO Support page](https://software.intel.com/en-us/openvino-toolkit/documentation/get-started), where you can participate on a free forum.
+@sphinxdirective
+
+If your question is not covered by the topics below, use the `OpenVINO Support page <https://software.intel.com/en-us/openvino-toolkit/documentation/get-started>`__, where you can participate on a free forum.
+
+.. _question-1:
 
-#### Q1. What does the message "[ ERROR ]: Current caffe.proto does not contain field" mean? <a name="question-1"></a>
+Q1. What does the message "[ ERROR ]: Current caffe.proto does not contain field" mean?
+#####################################################################################################################################################
 
-**A** : Internally, Model Optimizer uses a protobuf library to parse and load Caffe models. This library requires a file grammar and a generated parser. For a Caffe fallback, Model Optimizer uses a Caffe-generated parser for a Caffe-specific `.proto` file (which is usually located in the `src/caffe/proto` directory). Make sure that you install exactly the same version of Caffe (with Python interface) as that was used to create the model.
+**A:** Internally, Model Optimizer uses a protobuf library to parse and load Caffe models. This library requires a file grammar and a generated parser. For a Caffe fallback, Model Optimizer uses a Caffe-generated parser for a Caffe-specific ``.proto`` file (which is usually located in the ``src/caffe/proto`` directory). Make sure that you install exactly the same version of Caffe (with Python interface) as that was used to create the model.
 
 If you just want to experiment with Model Optimizer and test a Python extension for working with your custom
-layers without building Caffe, add the layer description to the `caffe.proto` file and generate a parser for it.
+layers without building Caffe, add the layer description to the ``caffe.proto`` file and generate a parser for it.
 
-For example, to add the description of the `CustomReshape` layer, which is an artificial layer not present in any `caffe.proto` files:
+For example, to add the description of the ``CustomReshape`` layer, which is an artificial layer not present in any ``caffe.proto`` files:
 
-1.  Add the following lines to the `caffe.proto` file:
-```shell
-    package mo_caffe; // To avoid conflict with Caffe system, it is highly recommended to specify different package name.
-    ...
-    message LayerParameter {
-      // Other layers parameters description.
+1. Add the following lines to the ``caffe.proto`` file:
+
+   .. code-block:: shell
+
+      package mo_caffe; // To avoid conflict with Caffe system, it is highly recommended to specify different package name.
       ...
-      optional CustomReshapeParameter custom_reshape_param = 546; // 546 - ID is any number not present in caffe.proto.
-    }
-    // The lines from here to the end of the file are describing contents of this parameter.
-    message CustomReshapeParameter {
-      optional BlobShape shape = 1; // Just use the same parameter type as some other Caffe layers.
-    }
-```
-
-2.  Generate a new parser:
-```shell
-cd <SITE_PACKAGES_WITH_INSTALLED_OPENVINO>/openvino/tools/mo/front/caffe/proto
-python3 generate_caffe_pb2.py --input_proto <PATH_TO_CUSTOM_CAFFE>/src/caffe/proto/caffe.proto
-```
-where `PATH_TO_CUSTOM_CAFFE` is the path to the root directory of custom Caffe.
-
-3.  Now, Model Optimizer is able to load the model into memory and start working with your extensions if there are any.
-
-However, since your model has custom layers, you must register them as custom. To learn more about it, refer to [Custom Layers in Model Optimizer](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer).
-
-#### Q2. How do I create a bare caffemodel, if I have only prototxt? <a name="question-2"></a>
-
-**A** : You need the Caffe Python interface. In this case, do the following:
-```shell
-python3
-import caffe
-net = caffe.Net('<PATH_TO_PROTOTXT>/my_net.prototxt', caffe.TEST)
-net.save('<PATH_TO_PROTOTXT>/my_net.caffemodel')
-```
-#### Q3. What does the message "[ ERROR ]: Unable to create ports for node with id" mean? <a name="question-3"></a>
-
-**A** : Most likely, Model Optimizer does not know how to infer output shapes of some layers in the given topology.
+      message LayerParameter {
+        // Other layers parameters description.
+        ...
+        optional CustomReshapeParameter custom_reshape_param = 546; // 546 - ID is any number not present in caffe.proto.
+      }
+      // The lines from here to the end of the file are describing contents of this parameter.
+      message CustomReshapeParameter {
+        optional BlobShape shape = 1; // Just use the same parameter type as some other Caffe layers.
+      }
+
+
+2. Generate a new parser:
+
+   .. code-block:: shell
+
+      cd <SITE_PACKAGES_WITH_INSTALLED_OPENVINO>/openvino/tools/mo/front/caffe/proto
+      python3 generate_caffe_pb2.py --input_proto <PATH_TO_CUSTOM_CAFFE>/src/caffe/proto/caffe.proto
+
+
+   where ``PATH_TO_CUSTOM_CAFFE` is the path to the root directory of custom Caffe.
+
+3. Now, Model Optimizer is able to load the model into memory and start working with your extensions if there are any.
+
+   However, since your model has custom layers, you must register them as custom. To learn more about it, refer to :doc:`Custom Layers in Model Optimizer    <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>`.
+
+.. _question-2:
+
+Q2. How do I create a bare caffemodel, if I have only prototxt?
+#####################################################################################################################################################
+
+**A:** You need the Caffe Python interface. In this case, do the following:
+
+.. code-block:: shell
+
+   python3
+   import caffe
+   net = caffe.Net('<PATH_TO_PROTOTXT>/my_net.prototxt', caffe.TEST)
+   net.save('<PATH_TO_PROTOTXT>/my_net.caffemodel')
+
+
+.. _question-3:
+
+Q3. What does the message "[ ERROR ]: Unable to create ports for node with id" mean?
+#####################################################################################################################################################
+
+**A:** Most likely, Model Optimizer does not know how to infer output shapes of some layers in the given topology.
 To lessen the scope, compile the list of layers that are custom for Model Optimizer: present in the topology,
-absent in the [list of supported layers](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) for the target framework. Then, refer to available options in the corresponding section in the  [Custom Layers in Model Optimizer](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) page.
-
-#### Q4. What does the message "Input image of shape is larger than mean image from file" mean? <a name="question-4"></a>
-
-**A** : Your model input shapes must be smaller than or equal to the shapes of the mean image file you provide. The idea behind the mean file is to subtract its values from the input image in an element-wise manner. When the mean file is smaller than the input image, there are not enough values to perform element-wise subtraction. Also, make sure you use the mean file that was used during the network training phase. Note that the mean file is dependent on dataset.
-
-#### Q5. What does the message "Mean file is empty" mean? <a name="question-5"></a>
-
-**A** : Most likely, the mean file specified with the `--mean_file` flag is empty while Model Optimizer is launched. Make sure that this is exactly the required mean file and try to regenerate it from the given dataset if possible.
-
-#### Q6. What does the message "Probably mean file has incorrect format" mean? <a name="question-6"></a>
-
-**A** : The mean file that you provide for Model Optimizer must be in the `.binaryproto` format. You can try to check the content, using recommendations from the BVLC Caffe ([#290](https://github.com/BVLC/caffe/issues/290)).
-
-#### Q7. What does the message "Invalid proto file: there is neither 'layer' nor 'layers' top-level messages" mean? <a name="question-7"></a>
-
-**A** : The structure of any Caffe topology is described in the `caffe.proto` file of any Caffe version. For example, the following `.proto` file in Model Optimizer is used by default: `mo/front/caffe/proto/my_caffe.proto`, with the structure:
-```
-message NetParameter {
-  // ... some other parameters
-  // The layers that make up the net.  Each of their configurations, including
-  // connectivity and behavior, is specified as a LayerParameter.
-  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
-  // DEPRECATED: use 'layer' instead.
-  repeated V1LayerParameter layers = 2;
-}
-```
-This means that any topology should contain layers as top-level structures in `prototxt`. For example, see the [LeNet topology](https://github.com/BVLC/caffe/blob/master/examples/mnist/lenet.prototxt).
-
-#### Q8. What does the message "Old-style inputs (via 'input_dims') are not supported. Please specify inputs via 'input_shape'" mean? <a name="question-8"></a>
-
-**A** : The structure of any Caffe topology is described in the `caffe.proto` file for any Caffe version. For example, the following `.proto` file in Model Optimizer is used by default: `mo/front/caffe/proto/my_caffe.proto`, with the structure:
-```sh
-message NetParameter {
-
- optional string name = 1; // consider giving the network a name
-  // DEPRECATED. See InputParameter. The input blobs to the network.
-  repeated string input = 3;
-  // DEPRECATED. See InputParameter. The shape of the input blobs.
-  repeated BlobShape input_shape = 8;
-  // 4D input dimensions -- deprecated.  Use "input_shape" instead.
-  // If specified, for each input blob there should be four
-  // values specifying the num, channels, height and width of the input blob.
-  // Thus, there should be a total of (4 * #input) numbers.
-  repeated int32 input_dim = 4;
-  // ... other parameters
-}
-```
+absent in the :doc:`list of supported layers <openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers>` for the target framework. Then, refer to available options in the corresponding section in the  :doc:`Custom Layers in Model Optimizer <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` page.
+
+.. _question-4:
+
+Q4. What does the message "Input image of shape is larger than mean image from file" mean?
+#####################################################################################################################################################
+
+**A:** Your model input shapes must be smaller than or equal to the shapes of the mean image file you provide. The idea behind the mean file is to subtract its values from the input image in an element-wise manner. When the mean file is smaller than the input image, there are not enough values to perform element-wise subtraction. Also, make sure you use the mean file that was used during the network training phase. Note that the mean file is dependent on dataset.
+
+.. _question-5:
+
+Q5. What does the message "Mean file is empty" mean?
+#####################################################################################################################################################
+
+**A:** Most likely, the mean file specified with the ``--mean_file`` flag is empty while Model Optimizer is launched. Make sure that this is exactly the required mean file and try to regenerate it from the given dataset if possible.
+
+.. _question-6:
+
+Q6. What does the message "Probably mean file has incorrect format" mean?
+#####################################################################################################################################################
+
+**A:** The mean file that you provide for Model Optimizer must be in the ``.binaryproto`` format. You can try to check the content, using recommendations from the BVLC Caffe (`#290 <https://github.com/BVLC/caffe/issues/290>`__).
+
+.. _question-7:
+
+Q7. What does the message "Invalid proto file: there is neither 'layer' nor 'layers' top-level messages" mean?
+#####################################################################################################################################################
+
+**A:** The structure of any Caffe topology is described in the ``caffe.proto`` file of any Caffe version. For example, the following ``.proto`` file in Model Optimizer is used by default: ``mo/front/caffe/proto/my_caffe.proto``, with the structure:
+
+.. code-block:: sh
+
+   message NetParameter {
+     // ... some other parameters
+     // The layers that make up the net.  Each of their configurations, including
+     // connectivity and behavior, is specified as a LayerParameter.
+     repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
+     // DEPRECATED: use 'layer' instead.
+     repeated V1LayerParameter layers = 2;
+   }
+
+
+This means that any topology should contain layers as top-level structures in ``prototxt``. For example, see the `LeNet topology <https://github.com/BVLC/caffe/blob/master/examples/mnist/lenet.prototxt>`__.
+
+.. _question-8:
+
+Q8. What does the message "Old-style inputs (via 'input_dims') are not supported. Please specify inputs via 'input_shape'" mean?
+#####################################################################################################################################################
+
+**A:** The structure of any Caffe topology is described in the ``caffe.proto`` file for any Caffe version. For example, the following ``.proto`` file in Model Optimizer is used by default: ``mo/front/caffe/proto/my_caffe.proto``, with the structure:
+
+.. code-block:: sh
+
+   message NetParameter {
+
+    optional string name = 1; // consider giving the network a name
+     // DEPRECATED. See InputParameter. The input blobs to the network.
+     repeated string input = 3;
+     // DEPRECATED. See InputParameter. The shape of the input blobs.
+     repeated BlobShape input_shape = 8;
+     // 4D input dimensions -- deprecated.  Use "input_shape" instead.
+     // If specified, for each input blob there should be four
+     // values specifying the num, channels, height and width of the input blob.
+     // Thus, there should be a total of (4 * #input) numbers.
+     repeated int32 input_dim = 4;
+     // ... other parameters
+   }
+
+
 Therefore, the input layer of the provided model must be specified in one of the following styles:
 
 *
-```sh
-input: "data"
-input_shape
-{
-    dim: 1
-    dim: 3
-    dim: 227
-    dim: 227
-}
-```
+
+  .. code-block:: sh
+
+     input: "data"
+     input_shape
+     {
+         dim: 1
+         dim: 3
+         dim: 227
+         dim: 227
+     }
+
 
 *
-```sh
-input: "data"
-input_shape
-{
-    dim: 1
-    dim: 3
-    dim: 600
-    dim: 1000
-}
-input: "im_info"
-input_shape
-{
-     dim: 1
-     dim: 3
-}
-```
+
+  .. code-block:: sh
+
+     input: "data"
+     input_shape
+     {
+         dim: 1
+         dim: 3
+         dim: 600
+         dim: 1000
+     }
+     input: "im_info"
+     input_shape
+     {
+          dim: 1
+          dim: 3
+     }
+
 *
-```sh
-layer
-{
-    name: "data"
-    type: "Input"
-    top: "data"
-    input_param {shape: {dim: 1 dim: 3 dim: 600 dim: 1000}}
-}
-layer
-{
-    name: "im_info"
-    type: "Input"
-    top: "im_info"
-    input_param {shape: {dim: 1 dim: 3}}
-}
-```
+
+  .. code-block:: sh
+
+     layer
+     {
+         name: "data"
+         type: "Input"
+         top: "data"
+         input_param {shape: {dim: 1 dim: 3 dim: 600 dim: 1000}}
+     }
+     layer
+     {
+         name: "im_info"
+         type: "Input"
+         top: "im_info"
+         input_param {shape: {dim: 1 dim: 3}}
+     }
+
 *
-```sh
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 500
-```
+
+  .. code-block:: sh
+
+     input: "data"
+     input_dim: 1
+     input_dim: 3
+     input_dim: 500
+
 
 However, if your model contains more than one input, Model Optimizer is able to convert the model with inputs specified in one of the first three forms in the above list. The 4th form is not supported for multi-input topologies.
 
-#### Q9. What does the message "Mean file for topologies with multiple inputs is not supported" mean? <a name="question-9"></a>
+.. _question-9:
 
-**A** : Model Optimizer does not support mean file processing for topologies with more than one input. In this case, you need to perform preprocessing of the inputs for a generated Intermediate Representation in OpenVINO Runtime to perform subtraction for every input of your multi-input model. See the [Overview of Preprocessing](@ref openvino_docs_OV_UG_Preprocessing_Overview) for details.
+Q9. What does the message "Mean file for topologies with multiple inputs is not supported" mean?
+#####################################################################################################################################################
 
-#### Q10. What does the message "Cannot load or process mean file: value error" mean? <a name="question-10"></a>
+**A:** Model Optimizer does not support mean file processing for topologies with more than one input. In this case, you need to perform preprocessing of the inputs for a generated Intermediate Representation in OpenVINO Runtime to perform subtraction for every input of your multi-input model. See the :doc:`Overview of Preprocessing <openvino_docs_OV_UG_Preprocessing_Overview>` for details.
 
-**A** : There are multiple reasons why Model Optimizer does not accept the mean file. See FAQs [#4](#question-4), [#5](#question-5), and [#6](#question-6).
+.. _question-10:
 
-#### Q11. What does the message "Invalid prototxt file: value error" mean? <a name="question-11"></a>
+Q10. What does the message "Cannot load or process mean file: value error" mean?
+#####################################################################################################################################################
 
-**A** : There are multiple reasons why Model Optimizer does not accept a Caffe topology. See FAQs [#7](#question-7) and [#20](#question-20).
+**A:** There are multiple reasons why Model Optimizer does not accept the mean file.
+See FAQs :ref:`#4 <question-4>`, :ref:`#5 <question-5>`, and :ref:`#6 <question-6>`.
 
-#### Q12. What does the message "Error happened while constructing caffe.Net in the Caffe fallback function" mean? <a name="question-12"></a>
+.. _question-11:
 
-**A** : Model Optimizer tried to infer a specified layer via the Caffe framework. However, it cannot construct a net using the Caffe Python interface. Make sure that your `caffemodel` and `prototxt` files are correct. To ensure that the problem is not in the `prototxt` file, see FAQ [#2](#question-2).
+Q11. What does the message "Invalid prototxt file: value error" mean?
+#####################################################################################################################################################
 
-#### Q13. What does the message "Cannot infer shapes due to exception in Caffe" mean? <a name="question-13"></a>
+**A:** There are multiple reasons why Model Optimizer does not accept a Caffe topology. See FAQs :ref:`#7 <question-7>` and :ref:`#20 <question-20>`.
 
-**A** : Model Optimizer tried to infer a custom layer via the Caffe framework, but the model could not be inferred using Caffe. This might happen if you try to convert the model with some noise weights and biases, which conflict with layers that have dynamic shapes. You should write your own extension for every custom layer your topology might have. For more details, refer to the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) page.
+.. _question-12:
 
-#### Q14. What does the message "Cannot infer shape for node {} because there is no Caffe available. Please register python infer function for op or use Caffe for shape inference" mean? <a name="question-14"></a>
+Q12. What does the message "Error happened while constructing caffe.Net in the Caffe fallback function" mean?
+#####################################################################################################################################################
 
-**A** : Your model contains a custom layer and you have correctly registered it with the `CustomLayersMapping.xml` file. These steps are required to offload shape inference of the custom layer with the help of the system Caffe. However, Model Optimizer could not import a Caffe package. Make sure that you have built Caffe with a `pycaffe` target and added it to the `PYTHONPATH` environment variable. At the same time, it is highly recommended to avoid dependency on Caffe and write your own Model Optimizer extension for your custom layer. For more information, refer to FAQ [#44](#question-44).
+**A:** Model Optimizer tried to infer a specified layer via the Caffe framework. However, it cannot construct a net using the Caffe Python interface. Make sure that your ``caffemodel`` and ``prototxt`` files are correct. To ensure that the problem is not in the ``prototxt`` file, see FAQ :ref:`#2 <question-2>`.
 
-#### Q15. What does the message "Framework name can not be deduced from the given options. Use --framework to choose one of Caffe, TensorFlow, MXNet" mean? <a name="question-15"></a>
+.. _question-13:
 
-**A** : You have run Model Optimizer without a flag `--framework caffe|tf|mxnet`. Model Optimizer tries to deduce the framework by the extension of input model file (`.pb` for TensorFlow, `.caffemodel` for Caffe, `.params` for Apache MXNet). Your input model might have a different extension and you need to explicitly set the source framework. For example, use `--framework caffe`.
+Q13. What does the message "Cannot infer shapes due to exception in Caffe" mean?
+#####################################################################################################################################################
 
-#### Q16. What does the message "Input shape is required to convert MXNet model. Please provide it with --input_shape" mean? <a name="question-16"></a>
+**A:** Model Optimizer tried to infer a custom layer via the Caffe framework, but the model could not be inferred using Caffe. This might happen if you try to convert the model with some noise weights and biases, which conflict with layers that have dynamic shapes. You should write your own extension for every custom layer your topology might have. For more details, refer to the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` page.
 
-**A** : Input shape was not provided. That is mandatory for converting an MXNet model to the OpenVINO Intermediate Representation, because MXNet models do not contain information about input shapes. Use the `--input_shape` flag to specify it. For more information about using the `--input_shape`, refer to FAQ [#56](#question-56).
+.. _question-14:
 
-#### Q17. What does the message "Both --mean_file and mean_values are specified. Specify either mean file or mean values" mean? <a name="question-17"></a>
+Q14. What does the message "Cannot infer shape for node {} because there is no Caffe available. Please register python infer function for op or use Caffe for shape inference" mean?
+####################################################################################################################################################################################
 
-**A** : The `--mean_file` and `--mean_values` options are two ways of specifying preprocessing for the input. However, they cannot be used together, as it would mean double subtraction and lead to ambiguity. Choose one of these options and pass it with the corresponding CLI option.
+**A:** Your model contains a custom layer and you have correctly registered it with the ``CustomLayersMapping.xml`` file. These steps are required to offload shape inference of the custom layer with the help of the system Caffe. However, Model Optimizer could not import a Caffe package. Make sure that you have built Caffe with a ``pycaffe`` target and added it to the ``PYTHONPATH`` environment variable. At the same time, it is highly recommended to avoid dependency on Caffe and write your own Model Optimizer extension for your custom layer. For more information, refer to FAQ :ref:`#44 <question-44>`.
 
-#### Q18. What does the message "Negative value specified for --mean_file_offsets option. Please specify positive integer values in format '(x,y)'" mean? <a name="question-18"></a>
+.. _question-15:
 
-**A** : You might have specified negative values with `--mean_file_offsets`. Only positive integer values in format '(x,y)' must be used.
+Q15. What does the message "Framework name can not be deduced from the given options. Use --framework to choose one of Caffe, TensorFlow, MXNet" mean?
+######################################################################################################################################################
 
-#### Q19. What does the message "Both --scale and --scale_values are defined. Specify either scale factor or scale values per input channels" mean? <a name="question-19"></a>
+**A:** You have run Model Optimizer without a flag ``--framework caffe|tf|mxnet``. Model Optimizer tries to deduce the framework by the extension of input model file (``.pb`` for TensorFlow, ``.caffemodel`` for Caffe, ``.params`` for Apache MXNet). Your input model might have a different extension and you need to explicitly set the source framework. For example, use ``--framework caffe``.
 
-**A** : The `--scale` option sets a scaling factor for all channels, while `--scale_values` sets a scaling factor per each channel. Using both of them simultaneously produces ambiguity, so you must use only one of them. For more information, refer to the **Using Framework-Agnostic Conversion Parameters** section: for [Converting a Caffe Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe), [Converting a TensorFlow Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow), [Converting an MXNet Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet).
+.. _question-16:
 
-#### Q20. What does the message "Cannot find prototxt file: for Caffe please specify --input_proto - a protobuf file that stores topology and --input_model that stores pre-trained weights" mean? <a name="question-20"></a>
+Q16. What does the message "Input shape is required to convert MXNet model. Please provide it with --input_shape" mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer cannot find a `.prototxt` file for a specified model. By default, it must be located in the same directory as the input model with the same name (except extension). If any of these conditions is not satisfied, use `--input_proto` to specify the path to the `.prototxt` file.
+**A:** Input shape was not provided. That is mandatory for converting an MXNet model to the OpenVINO Intermediate Representation, because MXNet models do not contain information about input shapes. Use the ``--input_shape`` flag to specify it. For more information about using the ``--input_shape``, refer to FAQ :ref:`#56 <question-56>`.
 
-#### Q21. What does the message "Failed to create directory .. . Permission denied!" mean? <a name="question-21"></a>
+.. _question-17:
 
-**A** : Model Optimizer cannot create a directory specified via `--output_dir`. Make sure that you have enough permissions to create the specified directory.
+Q17. What does the message "Both --mean_file and mean_values are specified. Specify either mean file or mean values" mean?
+#####################################################################################################################################################
 
-#### Q22. What does the message "Discovered data node without inputs and value" mean? <a name="question-22"></a>
+**A:** The ``--mean_file`` and ``--mean_values`` options are two ways of specifying preprocessing for the input. However, they cannot be used together, as it would mean double subtraction and lead to ambiguity. Choose one of these options and pass it with the corresponding CLI option.
 
-**A** : One of the layers in the specified topology might not have inputs or values. Make sure that the provided `caffemodel` and `protobuf` files are correct.
+.. _question-18:
 
-#### Q23. What does the message "Part of the nodes was not translated to IE. Stopped" mean? <a name="question-23"></a>
+Q18. What does the message "Negative value specified for --mean_file_offsets option. Please specify positive integer values in format '(x,y)'" mean?
+#####################################################################################################################################################
 
-**A** : Some of the operations are not supported by OpenVINO Runtime and cannot be translated to OpenVINO Intermediate Representation. You can extend Model Optimizer by allowing generation of new types of operations and implement these operations in the dedicated OpenVINO plugins. For more information, refer to the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+**A:** You might have specified negative values with ``--mean_file_offsets``. Only positive integer values in format '(x,y)' must be used.
 
-#### Q24. What does the message "While creating an edge from .. to .. : node name is undefined in the graph. Check correctness of the input model" mean? <a name="question-24"></a>
+.. _question-19:
 
-**A** : Model Optimizer cannot build a graph based on a specified model. Most likely, it is incorrect.
+Q19. What does the message "Both --scale and --scale_values are defined. Specify either scale factor or scale values per input channels" mean?
+#####################################################################################################################################################
 
-#### Q25. What does the message "Node does not exist in the graph" mean? <a name="question-25"></a>
+**A:** The ``--scale`` option sets a scaling factor for all channels, while ``--scale_values`` sets a scaling factor per each channel. Using both of them simultaneously produces ambiguity, so you must use only one of them. For more information, refer to the **Using Framework-Agnostic Conversion Parameters** section: for :doc:`Converting a Caffe Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>`, :doc:`Converting a TensorFlow Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>`, :doc:`Converting an MXNet Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>`.
 
-**A** : You might have specified an output node via the `--output` flag that does not exist in a provided model. Make sure that the specified output is correct and this node exists in the current model.
+.. _question-20:
 
-#### Q26. What does the message "--input parameter was provided. Other inputs are needed for output computation. Provide more inputs or choose another place to cut the net" mean? <a name="question-26"></a>
+Q20. What does the message "Cannot find prototxt file: for Caffe please specify --input_proto - a protobuf file that stores topology and --input_model that stores pre-trained weights" mean?
+##############################################################################################################################################################################################
 
-**A** : Most likely, Model Optimizer tried to cut the model by a specified input. However, other inputs are needed.
+**A:** Model Optimizer cannot find a ``.prototxt`` file for a specified model. By default, it must be located in the same directory as the input model with the same name (except extension). If any of these conditions is not satisfied, use ``--input_proto`` to specify the path to the ``.prototxt`` file.
 
-#### Q27. What does the message "Placeholder node does not have an input port, but input port was provided" mean?  <a name="question-27"></a>
+.. _question-21:
 
-**A** : You might have specified a placeholder node with an input node, while the placeholder node does not have it in the model.
+Q21. What does the message "Failed to create directory .. . Permission denied!" mean?
+#####################################################################################################################################################
 
-#### Q28. What does the message "Port index is out of number of available input ports for node" mean? <a name="question-28"></a>
+**A:** Model Optimizer cannot create a directory specified via ``--output_dir``. Make sure that you have enough permissions to create the specified directory.
 
-**A** : This error occurs when an incorrect input port is specified with the `--input` command line argument. When using `--input`, you may optionally specify an input port in the form: `X:node_name`, where `X` is an integer index of the input port starting from 0 and `node_name` is the name of a node in the model. This error occurs when the specified input port `X` is not in the range 0..(n-1), where n is the number of input ports for the node. Specify a correct port index, or do not use it if it is not needed.
+.. _question-22:
 
-#### Q29. What does the message "Node has more than 1 input and input shapes were provided. Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer" mean? <a name="question-29"></a>
+Q22. What does the message "Discovered data node without inputs and value" mean?
+#####################################################################################################################################################
 
-**A** : This error occurs when an incorrect combination of the `--input` and `--input_shape` command line options is used. Using both `--input` and `--input_shape` is valid only if `--input` points to the `Placeholder` node, a node with one input port or `--input` has the form `PORT:NODE`, where `PORT` is an integer port index of input for node `NODE`. Otherwise, the combination of `--input` and `--input_shape` is incorrect.
+**A:** One of the layers in the specified topology might not have inputs or values. Make sure that the provided ``caffemodel`` and ``protobuf`` files are correct.
 
-@sphinxdirective
+.. _question-23:
 
-.. _question-30:
+Q23. What does the message "Part of the nodes was not translated to IE. Stopped" mean?
+#####################################################################################################################################################
 
-@endsphinxdirective
+**A:** Some of the operations are not supported by OpenVINO Runtime and cannot be translated to OpenVINO Intermediate Representation. You can extend Model Optimizer by allowing generation of new types of operations and implement these operations in the dedicated OpenVINO plugins. For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
+
+.. _question-24:
+
+Q24. What does the message "While creating an edge from .. to .. : node name is undefined in the graph. Check correctness of the input model" mean?
+#####################################################################################################################################################
+
+**A:** Model Optimizer cannot build a graph based on a specified model. Most likely, it is incorrect.
+
+.. _question-25:
+
+Q25. What does the message "Node does not exist in the graph" mean?
+#####################################################################################################################################################
+
+**A:** You might have specified an output node via the ``--output`` flag that does not exist in a provided model. Make sure that the specified output is correct and this node exists in the current model.
+
+.. _question-26:
+
+Q26. What does the message "--input parameter was provided. Other inputs are needed for output computation. Provide more inputs or choose another place to cut the net" mean?
+##############################################################################################################################################################################
+
+**A:** Most likely, Model Optimizer tried to cut the model by a specified input. However, other inputs are needed.
 
+.. _question-27:
 
-#### Q30. What does the message "Input port > 0 in --input is not supported if --input_shape is not provided. Node: NAME_OF_THE_NODE. Omit port index and all input ports will be replaced by placeholders. Or provide --input_shape" mean?
+Q27. What does the message "Placeholder node does not have an input port, but input port was provided" mean?
+#####################################################################################################################################################
 
-**A** : When using the `PORT:NODE` notation for the `--input` command line argument and `PORT` > 0, you should specify `--input_shape` for this input. This is a limitation of the current Model Optimizer implementation.
+**A:** You might have specified a placeholder node with an input node, while the placeholder node does not have it in the model.
+
+.. _question-28:
+
+Q28. What does the message "Port index is out of number of available input ports for node" mean?
+#####################################################################################################################################################
+
+**A:** This error occurs when an incorrect input port is specified with the ``--input`` command line argument. When using ``--input``, you may optionally specify an input port in the form: ``X:node_name``, where ``X`` is an integer index of the input port starting from 0 and ``node_name`` is the name of a node in the model. This error occurs when the specified input port ``X`` is not in the range 0..(n-1), where n is the number of input ports for the node. Specify a correct port index, or do not use it if it is not needed.
+
+.. _question-29:
+
+Q29. What does the message "Node has more than 1 input and input shapes were provided. Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer" mean?
+######################################################################################################################################################################################################
+
+**A:** This error occurs when an incorrect combination of the ``--input`` and ``--input_shape`` command line options is used. Using both ``--input`` and ``--input_shape`` is valid only if ``--input`` points to the ``Placeholder`` node, a node with one input port or ``--input`` has the form ``PORT:NODE``, where ``PORT`` is an integer port index of input for node ``NODE``. Otherwise, the combination of ``--input`` and ``--input_shape`` is incorrect.
+
+
+.. _question-30:
+
+Q30. What does the message "Input port > 0 in --input is not supported if --input_shape is not provided. Node: NAME_OF_THE_NODE. Omit port index and all input ports will be replaced by placeholders. Or provide --input_shape" mean?
+#######################################################################################################################################################################################################################################
+
+**A:** When using the ``PORT:NODE`` notation for the ``--input`` command line argument and ``PORT`` > 0, you should specify ``--input_shape`` for this input. This is a limitation of the current Model Optimizer implementation.
 
 > **NOTE**: It is no longer relevant message since the limitation on input port index for model truncation has been resolved.
 
-#### Q31. What does the message "No or multiple placeholders in the model, but only one shape is provided, cannot set it" mean? <a name="question-31"></a>
+.. _question-31:
+
+Q31. What does the message "No or multiple placeholders in the model, but only one shape is provided, cannot set it" mean?
+#####################################################################################################################################################
+
+**A:** You might have provided only one shape for the placeholder, while there are none or multiple inputs in the model. Make sure that you have provided the correct data for placeholder nodes.
+
+.. _question-32:
+
+Q32. What does the message "The amount of input nodes for port is not equal to 1" mean?
+#####################################################################################################################################################
+
+**A:** This error occurs when the ``SubgraphMatch.single_input_node`` function is used for an input port that supplies more than one node in a sub-graph. The ``single_input_node`` function can be used only for ports that has a single consumer inside the matching sub-graph. When multiple nodes are connected to the port, use the ``input_nodes`` function or ``node_by_pattern`` function instead of ``single_input_node``. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
+
+.. _question-33:
+
+Q33. What does the message "Output node for port has already been specified" mean?
+#####################################################################################################################################################
+
+**A:** This error occurs when the ``SubgraphMatch._add_output_node`` function is called manually from user's extension code. This is an internal function, and you should not call it directly.
+
+.. _question-34:
+
+Q34. What does the message "Unsupported match kind.... Match kinds "points" or "scope" are supported only" mean?
+#####################################################################################################################################################
+
+**A:** While using configuration file to implement a TensorFlow front replacement extension, an incorrect match kind was used. Only ``points`` or ``scope`` match kinds are supported.  For more details, refer to the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
 
-**A** : You might have provided only one shape for the placeholder, while there are none or multiple inputs in the model. Make sure that you have provided the correct data for placeholder nodes.
+.. _question-35:
 
-#### Q32. What does the message "The amount of input nodes for port is not equal to 1" mean? <a name="question-32"></a>
+Q35. What does the message "Cannot write an event file for the TensorBoard to directory" mean?
+#####################################################################################################################################################
 
-**A** : This error occurs when the `SubgraphMatch.single_input_node` function is used for an input port that supplies more than one node in a sub-graph. The `single_input_node` function can be used only for ports that has a single consumer inside the matching sub-graph. When multiple nodes are connected to the port, use the `input_nodes` function or `node_by_pattern` function instead of `single_input_node`. For more details, refer to the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide.
+**A:** Model Optimizer tried to write an event file in the specified directory but failed to do that. That could happen when the specified directory does not exist or you do not have permissions to write in it.
 
-#### Q33. What does the message "Output node for port has already been specified" mean? <a name="question-33"></a>
+.. _question-36:
 
-**A** : This error occurs when the `SubgraphMatch._add_output_node` function is called manually from user's extension code. This is an internal function, and you should not call it directly.
+Q36. What does the message "There is no registered 'infer' function for node  with op = .. . Please implement this function in the extensions" mean?
+#####################################################################################################################################################
 
-#### Q34. What does the message "Unsupported match kind.... Match kinds "points" or "scope" are supported only" mean? <a name="question-34"></a>
+**A** Most likely, you tried to extend Model Optimizer with a new primitive, but you did not specify an infer function. For more information on extensions, see the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
 
-**A** : While using configuration file to implement a TensorFlow front replacement extension, an incorrect match kind was used. Only `points` or `scope` match kinds are supported.  For more details, refer to the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide.
+.. _question-37:
 
-#### Q35. What does the message "Cannot write an event file for the TensorBoard to directory" mean? <a name="question-35"></a>
+Q37. What does the message "Stopped shape/value propagation at node" mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer tried to write an event file in the specified directory but failed to do that. That could happen when the specified directory does not exist or you do not have permissions to write in it.
+**A:** Model Optimizer cannot infer shapes or values for the specified node. It can happen because of the following reasons: a bug exists in the custom shape infer function, the node inputs have incorrect values/shapes, or the input shapes are incorrect.
 
-#### Q36. What does the message "There is no registered 'infer' function for node  with op = .. . Please implement this function in the extensions" mean? <a name="question-36"></a>
+.. _question-38:
 
-**A** Most likely, you tried to extend Model Optimizer with a new primitive, but you did not specify an infer function. For more information on extensions, see the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+Q38. What does the message "The input with shape .. does not have the batch dimension" mean?
+#####################################################################################################################################################
 
-#### Q37. What does the message "Stopped shape/value propagation at node" mean? <a name="question-37"></a>
+**A:** Batch dimension is the first dimension in the shape and it should be equal to 1 or undefined. In your case, it is not either equal to 1 or undefined, which is why the ``-b`` shortcut produces undefined and unspecified behavior. To resolve the issue, specify full shapes for each input with the ``--input_shape`` option. Run Model Optimizer with the ``--help`` option to learn more about the notation for input shapes.
 
-**A** : Model Optimizer cannot infer shapes or values for the specified node. It can happen because of the following reasons: a bug exists in the custom shape infer function, the node inputs have incorrect values/shapes, or the input shapes are incorrect.
+.. _question-39:
 
-#### Q38. What does the message "The input with shape .. does not have the batch dimension" mean? <a name="question-38"></a>
+Q39. What does the message "Not all output shapes were inferred or fully defined for node" mean?
+#####################################################################################################################################################
 
-**A** : Batch dimension is the first dimension in the shape and it should be equal to 1 or undefined. In your case, it is not either equal to 1 or undefined, which is why the `-b` shortcut produces undefined and unspecified behavior. To resolve the issue, specify full shapes for each input with the `--input_shape` option. Run Model Optimizer with the `--help` option to learn more about the notation for input shapes.
+**A:** Most likely, the shape is not defined (partially or fully) for the specified node. You can use ``--input_shape`` with positive integers to override model input shapes.
 
-#### Q39. What does the message "Not all output shapes were inferred or fully defined for node" mean? <a name="question-39"></a>
+.. _question-40:
 
-**A** : Most likely, the shape is not defined (partially or fully) for the specified node. You can use `--input_shape` with positive integers to override model input shapes.
+Q40. What does the message "Shape for tensor is not defined. Can not proceed" mean?
+#####################################################################################################################################################
 
-#### Q40. What does the message "Shape for tensor is not defined. Can not proceed" mean? <a name="question-40"></a>
+**A:** This error occurs when the ``--input`` command-line option is used to cut a model and ``--input_shape`` is not used to override shapes for a node, so a shape for the node cannot be inferred by Model Optimizer. You need to help Model Optimizer by specifying shapes with ``--input_shape`` for each node specified with the ``--input`` command-line option.
 
-**A** : This error occurs when the `--input` command-line option is used to cut a model and `--input_shape` is not used to override shapes for a node, so a shape for the node cannot be inferred by Model Optimizer. You need to help Model Optimizer by specifying shapes with `--input_shape` for each node specified with the `--input` command-line option.
+.. _question-41:
 
-#### Q41. What does the message "Module TensorFlow was not found. Please install TensorFlow 1.2 or higher" mean? <a name="question-41"></a>
+Q41. What does the message "Module TensorFlow was not found. Please install TensorFlow 1.2 or higher" mean?
+#####################################################################################################################################################
 
-**A** : To convert TensorFlow models with Model Optimizer, TensorFlow 1.2 or newer must be installed. For more information on prerequisites, see the [Configuring Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) guide.
+**A:** To convert TensorFlow models with Model Optimizer, TensorFlow 1.2 or newer must be installed. For more information on prerequisites, see the :doc:`Configuring Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` guide.
 
-#### Q42. What does the message "Cannot read the model file: it is incorrect TensorFlow model file or missing" mean? <a name="question-42"></a>
+.. _question-42:
 
-**A** : The model file should contain a frozen TensorFlow graph in the text or binary format. Make sure that `--input_model_is_text` is provided for a model in the text format. By default, a model is interpreted as binary file.
+Q42. What does the message "Cannot read the model file: it is incorrect TensorFlow model file or missing" mean?
+#####################################################################################################################################################
 
-#### Q43. What does the message "Cannot pre-process TensorFlow graph after reading from model file. File is corrupt or has unsupported format" mean? <a name="question-43"></a>
+**A:** The model file should contain a frozen TensorFlow graph in the text or binary format. Make sure that ``--input_model_is_text`` is provided for a model in the text format. By default, a model is interpreted as binary file.
 
-**A** : Most likely, there is a problem with the specified file for the model. The file exists, but it has an invalid format or is corrupted.
+.. _question-43:
 
-#### Q44. What does the message "Found custom layer. Model Optimizer does not support this layer. Please, register it in CustomLayersMapping.xml or implement extension" mean? <a name="question-44"></a>
+Q43. What does the message "Cannot pre-process TensorFlow graph after reading from model file. File is corrupt or has unsupported format" mean?
+#####################################################################################################################################################
 
-**A** : This means that the layer `{layer_name}` is not supported in Model Optimizer. You will find a list of all unsupported layers in the corresponding section. You should implement the extensions for this layer. See [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) for more information.
+**A:** Most likely, there is a problem with the specified file for the model. The file exists, but it has an invalid format or is corrupted.
 
-#### Q45. What does the message "Custom replacement configuration file does not exist" mean? <a name="question-45"></a>
+.. _question-44:
 
-**A** : A path to the custom replacement configuration file was provided with the `--transformations_config` flag, but the file could not be found. Make sure the specified path is correct and the file exists.
+Q44. What does the message "Found custom layer. Model Optimizer does not support this layer. Please, register it in CustomLayersMapping.xml or implement extension" mean?
+##########################################################################################################################################################################
 
-#### Q46. What does the message "Extractors collection have case insensitive duplicates" mean? <a name="question-46"></a>
+**A:** This means that the layer ``{layer_name}`` is not supported in Model Optimizer. You will find a list of all unsupported layers in the corresponding section. You should implement the extensions for this layer. See :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` for more information.
 
-**A** : When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+.. _question-45:
 
-#### Q47. What does the message "Input model name is not in an expected format, cannot extract iteration number" mean? <a name="question-47"></a>
+Q45. What does the message "Custom replacement configuration file does not exist" mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer cannot load an MXNet model in the specified file format. Make sure you use the `.json` or `.param` format.
+**A:** A path to the custom replacement configuration file was provided with the ``--transformations_config`` flag, but the file could not be found. Make sure the specified path is correct and the file exists.
 
-#### Q48. What does the message "Cannot convert type of placeholder because not all of its outputs are 'Cast' to float operations" mean? <a name="question-48"></a>
+.. _question-46:
 
-**A** : There are models where `Placeholder` has the UINT8 type and the first operation after it is 'Cast', which casts the input to FP32. Model Optimizer detected that the `Placeholder` has the UINT8 type, but the next operation is not 'Cast' to float. Model Optimizer does not support such a case. Make sure you change the model to have `Placeholder` for FP32.
+Q46. What does the message "Extractors collection have case insensitive duplicates" mean?
+#####################################################################################################################################################
 
-#### Q49. What does the message "Data type is unsupported" mean? <a name="question-49"></a>
+**A:** When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
 
-**A** : Model Optimizer cannot read the value with the specified data type. Currently, the following types are supported: bool, float16, float32, double, int8, int16, int32, int64, uint8, uint16, uint32, uint64, str.
+.. _question-47:
 
-#### Q50. What does the message "No node with name ..." mean? <a name="question-50"></a>
+Q47. What does the message "Input model name is not in an expected format, cannot extract iteration number" mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer tried to access a node that does not exist. This could happen if you have incorrectly specified placeholder, input or output node name.
+**A:** Model Optimizer cannot load an MXNet model in the specified file format. Make sure you use the ``.json`` or ``.param`` format.
 
-#### Q51. What does the message "Module MXNet was not found. Please install MXNet 1.0.0" mean? <a name="question-51"></a>
+.. _question-48:
 
-**A** : To convert MXNet models with Model Optimizer, Apache MXNet 1.0.0 must be installed. For more information about prerequisites, see the[Configuring Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) guide.
+Q48. What does the message "Cannot convert type of placeholder because not all of its outputs are 'Cast' to float operations" mean?
+#####################################################################################################################################################
 
-#### Q52. What does the message "The following error happened while loading MXNet model .." mean? <a name="question-52"></a>
+**A:** There are models where ``Placeholder`` has the UINT8 type and the first operation after it is 'Cast', which casts the input to FP32. Model Optimizer detected that the ``Placeholder`` has the UINT8 type, but the next operation is not 'Cast' to float. Model Optimizer does not support such a case. Make sure you change the model to have ``Placeholder`` for FP32.
 
-**A** : Most likely, there is a problem with loading of the MXNet model. Make sure the specified path is correct, the model exists and is not corrupted, and you have sufficient permissions to work with it.
+.. _question-49:
 
-#### Q53. What does the message "The following error happened while processing input shapes: .." mean? <a name="question-53"></a>
+Q49. What does the message "Data type is unsupported" mean?
+#####################################################################################################################################################
 
-**A** : Make sure inputs are defined and have correct shapes. You can use `--input_shape` with positive integers to override model input shapes.
+**A:** Model Optimizer cannot read the value with the specified data type. Currently, the following types are supported: bool, float16, float32, double, int8, int16, int32, int64, uint8, uint16, uint32, uint64, str.
 
-#### Q54. What does the message "Attempt to register of custom name for the second time as class. Note that custom names are case-insensitive" mean? <a name="question-54"></a>
+.. _question-50:
 
-**A** : When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+Q50. What does the message "No node with name ..." mean?
+#####################################################################################################################################################
 
-#### Q55. What does the message "Both --input_shape and --batch were provided. Please, provide only one of them" mean? <a name="question-55"></a>
+**A:** Model Optimizer tried to access a node that does not exist. This could happen if you have incorrectly specified placeholder, input or output node name.
 
-**A** : Specifying the batch and the input shapes at the same time is not supported. You must specify a desired batch as the first value of the input shape.
+.. _question-51:
 
-#### Q56. What does the message "Input shape .. cannot be parsed" mean? <a name="question-56"></a>
+Q51. What does the message "Module MXNet was not found. Please install MXNet 1.0.0" mean?
+#####################################################################################################################################################
 
-**A** : The specified input shape cannot be parsed. Define it in one of the following ways:
+**A:** To convert MXNet models with Model Optimizer, Apache MXNet 1.0.0 must be installed. For more information about prerequisites, see the :doc:`Configuring Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` guide.
+
+.. _question-52:
+
+Q52. What does the message "The following error happened while loading MXNet model .." mean?
+#####################################################################################################################################################
+
+**A:** Most likely, there is a problem with loading of the MXNet model. Make sure the specified path is correct, the model exists and is not corrupted, and you have sufficient permissions to work with it.
+
+.. _question-53:
+
+Q53. What does the message "The following error happened while processing input shapes: .." mean?
+#####################################################################################################################################################
+
+**A:** Make sure inputs are defined and have correct shapes. You can use ``--input_shape`` with positive integers to override model input shapes.
+
+.. _question-54:
+
+Q54. What does the message "Attempt to register of custom name for the second time as class. Note that custom names are case-insensitive" mean?
+#####################################################################################################################################################
+
+**A:** When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
+
+.. _question-55:
+
+Q55. What does the message "Both --input_shape and --batch were provided. Please, provide only one of them" mean?
+#####################################################################################################################################################
+
+**A:** Specifying the batch and the input shapes at the same time is not supported. You must specify a desired batch as the first value of the input shape.
+
+.. _question-56:
+
+Q56. What does the message "Input shape .. cannot be parsed" mean?
+#####################################################################################################################################################
+
+**A:** The specified input shape cannot be parsed. Define it in one of the following ways:
 
 *
-```shell
- mo --input_model <INPUT_MODEL>.caffemodel --input_shape (1,3,227,227)
-```
+
+  .. code-block:: shell
+
+     mo --input_model <INPUT_MODEL>.caffemodel --input_shape (1,3,227,227)
+
 *
-```shell
- mo --input_model <INPUT_MODEL>.caffemodel --input_shape [1,3,227,227]
-```
-*   In case of multi input topology you should also specify inputs:
-```shell
- mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),(1,6,1,1)
-```
+
+  .. code-block:: shell
+
+     mo --input_model <INPUT_MODEL>.caffemodel --input_shape [1,3,227,227]
+
+* In case of multi input topology you should also specify inputs:
+
+  .. code-block:: shell
+
+     mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),(1,6,1,1)
+
 
 Keep in mind that there is no space between and inside the brackets for input shapes.
 
-#### Q57. What does the message "Please provide input layer names for input layer shapes" mean? <a name="question-57"></a>
+.. _question-57:
+
+Q57. What does the message "Please provide input layer names for input layer shapes" mean?
+#####################################################################################################################################################
+
+**A:** When specifying input shapes for several layers, you must provide names for inputs, whose shapes will be overwritten. For usage examples, see the :doc:`Converting a Caffe Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>`. Additional information for ``--input_shape`` is in FAQ :ref:`#56 <question-56>`.
+
+.. _question-58:
+
+Q58. What does the message "Values cannot be parsed" mean?
+#####################################################################################################################################################
+
+**A:** Mean values for the given parameter cannot be parsed. It should be a string with a list of mean values. For example, in '(1,2,3)', 1 stands for the RED channel, 2 for the GREEN channel, 3 for the BLUE channel.
+
+.. _question-59:
+
+Q59. What does the message ".. channels are expected for given values" mean?
+#####################################################################################################################################################
+
+**A:** The number of channels and the number of given values for mean values do not match. The shape should be defined as '(R,G,B)' or '[R,G,B]'. The shape should not contain undefined dimensions (? or -1). The order of values is as follows: (value for a RED channel, value for a GREEN channel, value for a BLUE channel).
+
+.. _question-60:
+
+Q60. What does the message "You should specify input for each mean value" mean?
+#####################################################################################################################################################
+
+**A:** Most likely, you didn't specify inputs using ``--mean_values``. Specify inputs with the ``--input`` flag. For usage examples, refer to the FAQ :ref:`#62 <question-62>`.
+
+.. _question-61:
+
+Q61. What does the message "You should specify input for each scale value" mean?
+#####################################################################################################################################################
+
+**A:** Most likely, you didn't specify inputs using ``--scale_values``. Specify inputs with the ``--input`` flag. For usage examples, refer to the FAQ :ref:`#63 <question-63>`.
+
+.. _question-62:
+
+Q62. What does the message "Number of inputs and mean values does not match" mean?
+#####################################################################################################################################################
+
+**A:** The number of specified mean values and the number of inputs must be equal. For a usage example, refer to the :doc:`Converting a Caffe Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>` guide.
+
+.. _question-63:
+
+Q63. What does the message "Number of inputs and scale values does not match" mean?
+#####################################################################################################################################################
+
+**A:** The number of specified scale values and the number of inputs must be equal.  For a usage example, refer to the :doc:`Converting a Caffe Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe>` guide.
+
+.. _question-64:
 
-**A** : When specifying input shapes for several layers, you must provide names for inputs, whose shapes will be overwritten. For usage examples, see the [Converting a Caffe Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe). Additional information for `--input_shape` is in FAQ [#56](#question-56).
+Q64. What does the message "No class registered for match kind ... Supported match kinds are .. " mean?
+#####################################################################################################################################################
 
-#### Q58. What does the message "Values cannot be parsed" mean? <a name="question-58"></a>
+**A:** A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the ``match_kind`` attribute. The attribute may have only one of the values: ``scope`` or ``points``. If a different value is provided, this error is displayed.
 
-**A** : Mean values for the given parameter cannot be parsed. It should be a string with a list of mean values. For example, in '(1,2,3)', 1 stands for the RED channel, 2 for the GREEN channel, 3 for the BLUE channel.
+.. _question-65:
 
-#### Q59. What does the message ".. channels are expected for given values" mean? <a name="question-59"></a>
+Q65. What does the message "No instance(s) is(are) defined for the custom replacement" mean?
+#####################################################################################################################################################
 
-**A** : The number of channels and the number of given values for mean values do not match. The shape should be defined as '(R,G,B)' or '[R,G,B]'. The shape should not contain undefined dimensions (? or -1). The order of values is as follows: (value for a RED channel, value for a GREEN channel, value for a BLUE channel).
+**A:** A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the ``instances`` attribute. This attribute is mandatory. This error will occur if the attribute is missing. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
 
-#### Q60. What does the message "You should specify input for each mean value" mean? <a name="question-60"></a>
+.. _question-66:
 
-**A** : Most likely, you didn't specify inputs using `--mean_values`. Specify inputs with the `--input` flag. For usage examples, refer to the FAQ [#62](#question-62).
+Q66. What does the message "The instance must be a single dictionary for the custom replacement with id .." mean?
+#####################################################################################################################################################
 
-#### Q61. What does the message "You should specify input for each scale value" mean? <a name="question-61"></a>
+**A:** A replacement defined in the configuration file for sub-graph replacement, using start/end nodes, has the ``instances`` attribute. For this type of replacement, the instance must be defined with a dictionary with two keys ``start_points`` and ``end_points``. Values for these keys are lists with the start and end node names, respectively. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
 
-**A** : Most likely, you didn't specify inputs using `--scale_values`. Specify inputs with the `--input` flag. For usage examples, refer to the FAQ [#63](#question-63).
+.. _question-67:
 
-#### Q62. What does the message "Number of inputs and mean values does not match" mean? <a name="question-62"></a>
+Q67. What does the message "No instances are defined for replacement with id .. " mean?
+#####################################################################################################################################################
 
-**A** : The number of specified mean values and the number of inputs must be equal. For a usage example, refer to the [Converting a Caffe Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe) guide.
+**A:** A replacement for the specified id is not defined in the configuration file. For more information, refer to the FAQ :ref:`#65 <question-65>`.
 
-#### Q63. What does the message "Number of inputs and scale values does not match" mean? <a name="question-63"></a>
+.. _question-68:
 
-**A** : The number of specified scale values and the number of inputs must be equal.  For a usage example, refer to the [Converting a Caffe Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe) guide.
+Q68. What does the message "Custom replacements configuration file .. does not exist" mean?
+#####################################################################################################################################################
 
-#### Q64. What does the message "No class registered for match kind ... Supported match kinds are .. " mean? <a name="question-64"></a>
+**A:** The path to a custom replacement configuration file was provided with the ``--transformations_config`` flag, but it cannot be found. Make sure the specified path is correct and the file exists.
 
-**A** : A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the `match_kind` attribute. The attribute may have only one of the values: `scope` or `points`. If a different value is provided, this error is displayed.
+.. _question-69:
 
-#### Q65. What does the message "No instance(s) is(are) defined for the custom replacement" mean? <a name="question-65"></a>
+Q69. What does the message "Failed to parse custom replacements configuration file .." mean?
+#####################################################################################################################################################
 
-**A** : A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the `instances` attribute. This attribute is mandatory. This error will occur if the attribute is missing. For more details, refer to the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide.
+**A:** The file for custom replacement configuration provided with the ``--transformations_config`` flag cannot be parsed. In particular, it should have a valid JSON structure. For more details, refer to the `JSON Schema Reference <https://spacetelescope.github.io/understanding-json-schema/reference/index.html>`__ page.
 
-#### Q66. What does the message "The instance must be a single dictionary for the custom replacement with id .." mean? <a name="question-66"></a>
+.. _question-70:
 
-**A** : A replacement defined in the configuration file for sub-graph replacement, using start/end nodes, has the `instances` attribute. For this type of replacement, the instance must be defined with a dictionary with two keys `start_points` and `end_points`. Values for these keys are lists with the start and end node names, respectively. For more details, refer to the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide.
+Q70. What does the message "One of the custom replacements in the configuration file .. does not contain attribute 'id'" mean?
+#####################################################################################################################################################
 
-#### Q67. What does the message "No instances are defined for replacement with id .. " mean? <a name="question-67"></a>
+**A:** Every custom replacement should declare a set of mandatory attributes and their values. For more details, refer to FAQ :ref:`#71 <question-71>`.
 
-**A** : A replacement for the specified id is not defined in the configuration file. For more information, refer to the FAQ [#65](#question-65).
+.. _question-71:
 
-#### Q68. What does the message "Custom replacements configuration file .. does not exist" mean? <a name="question-68"></a>
+Q71. What does the message "File .. validation failed" mean?
+#####################################################################################################################################################
 
-**A** : The path to a custom replacement configuration file was provided with the `--transformations_config` flag, but it cannot be found. Make sure the specified path is correct and the file exists.
+**A:** The file for custom replacement configuration provided with the ``--transformations_config`` flag cannot pass validation. Make sure you have specified ``id``, ``instances``, and ``match_kind`` for all the patterns.
 
-#### Q69. What does the message "Failed to parse custom replacements configuration file .." mean? <a name="question-69"></a>
+.. _question-72:
 
-**A** : The file for custom replacement configuration provided with the `--transformations_config` flag cannot be parsed. In particular, it should have a valid JSON structure. For more details, refer to the [JSON Schema Reference](https://spacetelescope.github.io/understanding-json-schema/reference/index.html) page.
+Q72. What does the message "Cannot update the file .. because it is broken" mean?
+#####################################################################################################################################################
 
-#### Q70. What does the message "One of the custom replacements in the configuration file .. does not contain attribute 'id'" mean? <a name="question-70"></a>
+**A:** The custom replacement configuration file provided with the ``--tensorflow_custom_operations_config_update`` cannot be parsed. Make sure that the file is correct and refer to FAQ :ref:`#68 <question-68>`, :ref:`#69 <question-69>`, :ref:`#70 <question-70>`, and :ref:`#71 <question-71>`.
 
-**A** : Every custom replacement should declare a set of mandatory attributes and their values. For more details, refer to FAQ [#71](#question-71).
+.. _question-73:
 
-#### Q71. What does the message "File .. validation failed" mean? <a name="question-71"></a>
+Q73. What does the message "End node .. is not reachable from start nodes: .." mean?
+#####################################################################################################################################################
 
-**A** : The file for custom replacement configuration provided with the `--transformations_config` flag cannot pass validation. Make sure you have specified `id`, `instances`, and `match_kind` for all the patterns.
+**A:** This error occurs when you try to make a sub-graph match. It is detected that between the start and end nodes that were specified as inputs/outputs for the subgraph to find, there are nodes marked as outputs but there is no path from them to the input nodes. Make sure the subgraph you want to match does actually contain all the specified output nodes.
 
-#### Q72. What does the message "Cannot update the file .. because it is broken" mean? <a name="question-72"></a>
+.. _question-74:
 
-**A** : The custom replacement configuration file provided with the `--tensorflow_custom_operations_config_update` cannot be parsed. Make sure that the file is correct and refer to FAQ [#68](#question-68), [#69](#question-69), [#70](#question-70), and [#71](#question-71).
+Q74. What does the message "Sub-graph contains network input node .." mean?
+#####################################################################################################################################################
 
-#### Q73. What does the message "End node .. is not reachable from start nodes: .." mean? <a name="question-73"></a>
+**A:** The start or end node for the sub-graph replacement using start/end nodes is specified incorrectly. Model Optimizer finds internal nodes of the sub-graph strictly "between" the start and end nodes, and then adds all input nodes to the sub-graph (and the inputs of their inputs, etc.) for these "internal" nodes. This error reports that Model Optimizer reached input node during this phase. This means that the start/end points are specified incorrectly in the configuration file. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
 
-**A** : This error occurs when you try to make a sub-graph match. It is detected that between the start and end nodes that were specified as inputs/outputs for the subgraph to find, there are nodes marked as outputs but there is no path from them to the input nodes. Make sure the subgraph you want to match does actually contain all the specified output nodes.
+.. _question-75:
 
-#### Q74. What does the message "Sub-graph contains network input node .." mean? <a name="question-74"></a>
+Q75. What does the message "... elements of ... were clipped to infinity while converting a blob for node [...] to ..." mean?
+#####################################################################################################################################################
 
-**A** : The start or end node for the sub-graph replacement using start/end nodes is specified incorrectly. Model Optimizer finds internal nodes of the sub-graph strictly "between" the start and end nodes, and then adds all input nodes to the sub-graph (and the inputs of their inputs, etc.) for these "internal" nodes. This error reports that Model Optimizer reached input node during this phase. This means that the start/end points are specified incorrectly in the configuration file. For more details, refer to the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide. 
+**A:** This message may appear when the ``--compress_to_fp16`` (or deprecated ``--data_type``) command-line option is used. This option implies compression of all the model weights, biases, and other constant values to FP16. If a value of a constant is out of the range of valid FP16 values, the value is converted to positive or negative infinity. It may lead to incorrect results of inference or may not be a problem, depending on the model. The number of such elements and the total number of elements in the constant value is printed out together with the name of the node, where this value is used.
 
-#### Q75. What does the message "... elements of ... were clipped to infinity while converting a blob for node [...] to ..." mean? <a name="question-75"></a>
+.. _question-76:
 
-**A** : This message may appear when the `--compress_to_fp16` (or deprecated `--data_type`) command-line option is used. This option implies compression of all the model weights, biases, and other constant values to FP16. If a value of a constant is out of the range of valid FP16 values, the value is converted to positive or negative infinity. It may lead to incorrect results of inference or may not be a problem, depending on the model. The number of such elements and the total number of elements in the constant value is printed out together with the name of the node, where this value is used.
+Q76. What does the message "... elements of ... were clipped to zero while converting a blob for node [...] to ..." mean?
+#####################################################################################################################################################
 
-#### Q76. What does the message "... elements of ... were clipped to zero while converting a blob for node [...] to ..." mean? <a name="question-76"></a>
+**A:** This message may appear when the ``--compress_to_fp16`` (or deprecated ``--data_type``) command-line option is used. This option implies conversion of all blobs in the mode to FP16. If a value in the blob is so close to zero that it cannot be represented as a valid FP16 value, it is converted to a true zero FP16 value. Depending on the model, it may lead to incorrect results of inference or may not be a problem. The number of such elements and the total number of elements in the blob are printed out together with a name of the node, where this blob is used.
 
-**A** : This message may appear when the `--compress_to_fp16` (or deprecated `--data_type`) command-line option is used. This option implies conversion of all blobs in the mode to FP16. If a value in the blob is so close to zero that it cannot be represented as a valid FP16 value, it is converted to a true zero FP16 value. Depending on the model, it may lead to incorrect results of inference or may not be a problem. The number of such elements and the total number of elements in the blob are printed out together with a name of the node, where this blob is used.
+.. _question-77:
 
-#### Q77. What does the message "The amount of nodes matched pattern ... is not equal to 1" mean? <a name="question-77"></a>
+Q77. What does the message "The amount of nodes matched pattern ... is not equal to 1" mean?
+#####################################################################################################################################################
 
-**A** : This error occurs when the `SubgraphMatch.node_by_pattern` function is used with a pattern that does not uniquely identify a single node in a sub-graph. Try to extend the pattern string to make unambiguous match to a single sub-graph node. For more details, refer to the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer) guide.
+**A:** This error occurs when the ``SubgraphMatch.node_by_pattern`` function is used with a pattern that does not uniquely identify a single node in a sub-graph. Try to extend the pattern string to make unambiguous match to a single sub-graph node. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` guide.
 
-#### Q78. What does the message "The topology contains no "input" layers" mean? <a name="question-78"></a>
+.. _question-78:
 
-**A** : Your Caffe topology `.prototxt` file is intended for training. Model Optimizer expects a deployment-ready `.prototxt` file. To fix the problem, prepare a deployment-ready `.prototxt` file. Preparation of a deploy-ready topology usually results in removing `data` layer(s), adding `input` layer(s), and removing loss layer(s).
+Q78. What does the message "The topology contains no "input" layers" mean?
+#####################################################################################################################################################
 
-#### Q79. What does the message "Warning: please expect that Model Optimizer conversion might be slow" mean? <a name="question-79"></a>
+**A:** Your Caffe topology ``.prototxt`` file is intended for training. Model Optimizer expects a deployment-ready ``.prototxt`` file. To fix the problem, prepare a deployment-ready ``.prototxt`` file. Preparation of a deploy-ready topology usually results in removing ``data`` layer(s), adding ``input`` layer(s), and removing loss layer(s).
 
-**A** : You are using an unsupported Python version. Use only versions 3.4 - 3.6 for the C++ `protobuf` implementation that is supplied with OpenVINO toolkit. You can still boost the conversion speed by building the protobuf library from sources. For complete instructions about building `protobuf` from sources, see the appropriate section in the[Converting a Model to Intermediate Representation](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) guide.
+.. _question-79:
 
-#### Q80. What does the message "Arguments --nd_prefix_name, --pretrained_model_name and --input_symbol should be provided. Please provide all or do not use any." mean? <a name="question-80"></a>
+Q79. What does the message "Warning: please expect that Model Optimizer conversion might be slow" mean?
+#####################################################################################################################################################
 
-**A** : This error occurs if you did not provide the `--nd_prefix_name`, `--pretrained_model_name`, and `--input_symbol` parameters.
-Model Optimizer requires both `.params` and `.nd` model files to merge into the result file (`.params`). 
-Topology description (`.json` file) should be prepared (merged) in advance and provided with the `--input_symbol` parameter.
+**A:** You are using an unsupported Python version. Use only versions 3.4 - 3.6 for the C++ ``protobuf`` implementation that is supplied with OpenVINO toolkit. You can still boost the conversion speed by building the protobuf library from sources. For complete instructions about building ``protobuf`` from sources, see the appropriate section in the :doc:`Converting a Model to Intermediate Representation <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` guide.
 
-If you add additional layers and weights that are in `.nd` files to your model, Model Optimizer can build a model
-from one `.params` file and two additional `.nd` files (`*_args.nd`, `*_auxs.nd`).
+.. _question-80:
+
+Q80. What does the message "Arguments --nd_prefix_name, --pretrained_model_name and --input_symbol should be provided. Please provide all or do not use any." mean?
+####################################################################################################################################################################
+
+**A:** This error occurs if you did not provide the ``--nd_prefix_name``, ``--pretrained_model_name``, and ``--input_symbol`` parameters.
+Model Optimizer requires both ``.params`` and ``.nd`` model files to merge into the result file (``.params``).
+Topology description (``.json`` file) should be prepared (merged) in advance and provided with the ``--input_symbol`` parameter.
+
+If you add additional layers and weights that are in ``.nd`` files to your model, Model Optimizer can build a model
+from one ``.params`` file and two additional ``.nd`` files (``*_args.nd``, ``*_auxs.nd``).
 To do that, provide both CLI options or do not pass them if you want to convert an MXNet model without additional weights.
-For more information, refer to the [Converting an MXNet Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet) guide.
+For more information, refer to the :doc:`Converting an MXNet Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>` guide.
+
+.. _question-81:
+
+Q81. What does the message "You should specify input for mean/scale values" mean?
+#####################################################################################################################################################
+
+**A:** When the model has multiple inputs and you want to provide mean/scale values, you need to pass those values for each input. More specifically, the number of passed values should be the same as the number of inputs of the model.
+For more information, refer to the :doc:`Converting a Model to Intermediate Representation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
-#### Q81. What does the message "You should specify input for mean/scale values" mean? <a name="question-81"></a>
+.. _question-82:
 
-**A** : When the model has multiple inputs and you want to provide mean/scale values, you need to pass those values for each input. More specifically, the number of passed values should be the same as the number of inputs of the model.
-For more information, refer to the [Converting a Model to Intermediate Representation](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
+Q82. What does the message "Input with name ... not found!" mean?
+#####################################################################################################################################################
 
-#### Q82. What does the message "Input with name ... not found!" mean? <a name="question-82"></a>
+**A:** When you passed the mean/scale values and specify names of input layers of the model, you might have used the name that does not correspond to any input layer. Make sure that you list only names of the input layers of your model when passing values with the ``--input`` option.
+For more information, refer to the :doc:`Converting a Model to Intermediate Representation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
-**A** : When you passed the mean/scale values and specify names of input layers of the model, you might have used the name that does not correspond to any input layer. Make sure that you list only names of the input layers of your model when passing values with the `--input` option. 
-For more information, refer to the [Converting a Model to Intermediate Representation](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
 
-#### Q83. What does the message "Specified input json ... does not exist" mean? <a name="question-83"></a>
+.. _question-83:
 
-**A** : Most likely, `.json` file does not exist or has a name that does not match the notation of Apache MXNet. Make sure the file exists and has a correct name.
-For more information, refer to the [Converting an MXNet Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet) guide.
+Q83. What does the message "Specified input json ... does not exist" mean?
+#####################################################################################################################################################
 
-#### Q84. What does the message "Unsupported Input model file type ... Model Optimizer support only .params and .nd files format" mean? <a name="question-84"></a>
+**A:** Most likely, ``.json`` file does not exist or has a name that does not match the notation of Apache MXNet. Make sure the file exists and has a correct name.
+For more information, refer to the :doc:`Converting an MXNet Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>` guide.
 
-**A** : Model Optimizer for Apache MXNet supports only `.params` and `.nd` files formats. Most likely, you specified an unsupported file format in `--input_model`.
-For more information, refer to [Converting an MXNet Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet).
+.. _question-84:
 
-#### Q85. What does the message "Operation ... not supported. Please register it as custom op" mean? <a name="question-85"></a>
+Q84. What does the message "Unsupported Input model file type ... Model Optimizer support only .params and .nd files format" mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer tried to load the model that contains some unsupported operations.
+**A:** Model Optimizer for Apache MXNet supports only ``.params`` and ``.nd`` files formats. Most likely, you specified an unsupported file format in ``--input_model``.
+For more information, refer to :doc:`Converting an MXNet Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet>`.
+
+
+.. _question-85:
+
+Q85. What does the message "Operation ... not supported. Please register it as custom op" mean?
+#####################################################################################################################################################
+
+**A:** Model Optimizer tried to load the model that contains some unsupported operations.
 If you want to convert model that contains unsupported operations, you need to prepare extension for all such operations.
-For more information, refer to the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
+
+.. _question-86:
 
-#### Q86. What does the message "Can not register Op ... Please, call function 'register_caffe_python_extractor' with parameter 'name'" mean? <a name="question-86"></a>
+Q86. What does the message "Can not register Op ... Please, call function 'register_caffe_python_extractor' with parameter 'name'" mean?
+#####################################################################################################################################################
 
-**A** : This error appears if the class of implementation of `Op` for Python Caffe layer could not be used by Model Optimizer. Python layers should be handled differently comparing to ordinary Caffe layers.
+**A:** This error appears if the class of implementation of ``Op`` for Python Caffe layer could not be used by Model Optimizer. Python layers should be handled differently comparing to ordinary Caffe layers.
 
-In particular, you need to call the function `register_caffe_python_extractor` and pass `name` as the second argument of the function.
+In particular, you need to call the function ``register_caffe_python_extractor`` and pass ``name`` as the second argument of the function.
 The name should be the compilation of the layer name with the module name separated by a dot.
 
-For example, your topology contains this layer with type `Python`:
-
-```
-layer {
-  name: 'proposal'
-  type: 'Python'
-  ...
-  python_param {
-    module: 'rpn.proposal_layer'
-    layer: 'ProposalLayer'
-    param_str: "'feat_stride': 16"
-  }
-}
-```
-
-The first step is to implement an extension for this layer in Model Optimizer as an ancestor of `Op` class:
-```
-class ProposalPythonExampleOp(Op):
-       op = 'Proposal'
-
-       def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
-           ...
-```
+For example, your topology contains this layer with type ``Python``:
+
+.. code-block::
+
+   layer {
+     name: 'proposal'
+     type: 'Python'
+     ...
+     python_param {
+       module: 'rpn.proposal_layer'
+       layer: 'ProposalLayer'
+       param_str: "'feat_stride': 16"
+     }
+   }
+
+
+The first step is to implement an extension for this layer in Model Optimizer as an ancestor of ``Op`` class:
+
+.. code-block::
+
+   class ProposalPythonExampleOp(Op):
+          op = 'Proposal'
+
+          def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+              ...
+
 
 It is mandatory to call two functions right after the implementation of that class:
-```
-class ProposalPythonExampleOp(Op):
-      ...
 
-register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')
-Op.excluded_classes.append(ProposalPythonExampleOp)
-```
+.. code-block::
+
+   class ProposalPythonExampleOp(Op):
+         ...
+
+   register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')
+   Op.excluded_classes.append(ProposalPythonExampleOp)
 
-Note that the first call <code>register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')</code> registers an extension of the layer in Model Optimizer, which will be found by the specific name (mandatory to join module name and layer name): <code>rpn.proposal_layer.ProposalLayer</code>.
+
+Note that the first call ``register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')`` registers an extension of the layer in Model Optimizer, which will be found by the specific name (mandatory to join module name and layer name): ``rpn.proposal_layer.ProposalLayer``.
 
 The second call prevents Model Optimizer from using this extension as if it is an extension for
-a layer with type `Proposal`. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues.
-For more information, refer to the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+a layer with type ``Proposal``. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues.
+For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
+
+.. _question-87:
 
-#### Q87. What does the message "Model Optimizer is unable to calculate output shape of Memory node .." mean? <a name="question-87"></a>
+Q87. What does the message "Model Optimizer is unable to calculate output shape of Memory node .." mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer supports only `Memory` layers, in which `input_memory` goes before `ScaleShift` or the `FullyConnected` layer.
-This error message means that in your model the layer after input memory is not of the `ScaleShift` or `FullyConnected` type.
+**A:** Model Optimizer supports only ``Memory`` layers, in which ``input_memory`` goes before ``ScaleShift`` or the ``FullyConnected`` layer.
+This error message means that in your model the layer after input memory is not of the ``ScaleShift`` or ``FullyConnected`` type.
 This is a known limitation.
 
-#### Q88. What do the messages "File ...  does not appear to be a Kaldi file (magic number does not match)", "Kaldi model should start with <Nnet> tag" mean? <a name="question-88"></a>
+.. _question-88:
+
+Q88. What do the messages "File ...  does not appear to be a Kaldi file (magic number does not match)", "Kaldi model should start with <Nnet> tag" mean?
+#########################################################################################################################################################
 
-**A** : These error messages mean that Model Optimizer does not support your Kaldi model, because the `checksum` of the model is not
-16896 (the model should start with this number), or the model file does not contain the `<Net>` tag as a starting one.
+**A:** These error messages mean that Model Optimizer does not support your Kaldi model, because the ``checksum`` of the model is not
+16896 (the model should start with this number), or the model file does not contain the ``<Net>`` tag as a starting one.
 Make sure that you provide a path to a true Kaldi model and try again.
 
-#### Q89. What do the messages "Expect counts file to be one-line file." or "Expect counts file to contain list of integers" mean? <a name="question-89"></a>
 
-**A** : These messages mean that the file counts you passed contain not one line. The count file should start with
-`[` and end with  `]`,  and integer values should be separated by spaces between those brackets.
+.. _question-89:
+
+Q89. What do the messages "Expect counts file to be one-line file." or "Expect counts file to contain list of integers" mean?
+#####################################################################################################################################################
+
+**A:** These messages mean that the file counts you passed contain not one line. The count file should start with
+``[`` and end with  ``]``,  and integer values should be separated by spaces between those brackets.
+
+.. _question-90:
 
-#### Q90. What does the message "Model Optimizer is not able to read Kaldi model .." mean? <a name="question-90"></a>
+Q90. What does the message "Model Optimizer is not able to read Kaldi model .." mean?
+#####################################################################################################################################################
 
-**A** : There are multiple reasons why Model Optimizer does not accept a Kaldi topology, including: 
-the file is not available or does not exist. Refer to FAQ [#88](#question-88).
+**A:** There are multiple reasons why Model Optimizer does not accept a Kaldi topology, including:
+the file is not available or does not exist. Refer to FAQ :ref:`#88 <question-88>`.
 
-#### Q91. What does the message "Model Optimizer is not able to read counts file  .." mean? <a name="question-91"></a>
+.. _question-91:
 
-**A** : There are multiple reasons why Model Optimizer does not accept a counts file, including: 
-the file is not available or does not exist. Refer to FAQ [#89](#question-89).
+Q91. What does the message "Model Optimizer is not able to read counts file  .." mean?
+#####################################################################################################################################################
 
-#### Q92. What does the message "For legacy MXNet models Model Optimizer does not support conversion of old MXNet models (trained with 1.0.0 version of MXNet and lower) with custom layers." mean? <a name="question-92"></a>
+**A:** There are multiple reasons why Model Optimizer does not accept a counts file, including:
+the file is not available or does not exist. Refer to FAQ :ref:`#89 <question-89>`.
 
-**A** : This message means that if you have a model with custom layers and its JSON file has been generated with Apache MXNet version
+.. _question-92:
+
+Q92. What does the message "For legacy MXNet models Model Optimizer does not support conversion of old MXNet models (trained with 1.0.0 version of MXNet and lower) with custom layers." mean?
+###############################################################################################################################################################################################
+
+**A:** This message means that if you have a model with custom layers and its JSON file has been generated with Apache MXNet version
 lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it, you have to rebuild
 MXNet with unsupported layers or generate a new JSON file with Apache MXNet version 1.0.0 or higher. You also need to implement
 OpenVINO extension to use custom layers.
-For more information, refer to the [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro) guide.
+For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>` guide.
+
+.. _question-93:
 
-#### Q93. What does the message "Graph contains a cycle. Can not proceed .." mean?  <a name="question-93"></a>
+Q93. What does the message "Graph contains a cycle. Can not proceed .." mean?
+#####################################################################################################################################################
 
-**A** : Model Optimizer supports only straightforward models without cycles.
+**A:** Model Optimizer supports only straightforward models without cycles.
 
 There are multiple ways to avoid cycles:
 
 For Tensorflow:
-* [Convert models, created with TensorFlow Object Detection API](@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models)
+
+* :doc:`Convert models, created with TensorFlow Object Detection API <openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models>`
 
 For all frameworks:
-1. [Replace cycle containing Sub-graph in Model Optimizer](@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer)
-2. See [OpenVINO Extensibility Mechanism](@ref openvino_docs_Extensibility_UG_Intro)
+
+1. :doc:`Replace cycle containing Sub-graph in Model Optimizer <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>`
+2. See :doc:`OpenVINO Extensibility Mechanism <openvino_docs_Extensibility_UG_Intro>`
 
 or
 * Edit the model in its original framework to exclude cycle.
 
-#### Q94. What does the message "Can not transpose attribute '..' with value .. for node '..' .." mean?  <a name="question-94"></a>
+.. _question-94:
 
-**A** : This message means that the model is not supported. It may be caused by using shapes larger than 4-D.
+Q94. What does the message "Can not transpose attribute '..' with value .. for node '..' .." mean?
+#####################################################################################################################################################
+
+**A:** This message means that the model is not supported. It may be caused by using shapes larger than 4-D.
 There are two ways to avoid such message:
 
-* [Cut off parts of the model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model).
+* :doc:`Cut off parts of the model <openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model>`.
 * Edit the network in its original framework to exclude such layers.
 
-#### Q95. What does the message "Expected token `</ParallelComponent>`, has `...`" mean?  <a name="question-95"></a>
+.. _question-95:
+
+Q95. What does the message "Expected token ``</ParallelComponent>``, has ``...``" mean?
+#####################################################################################################################################################
 
-**A** : This error messages mean that Model Optimizer does not support your Kaldi model, because the Net contains `ParallelComponent` that does not end with the `</ParallelComponent>` tag.
+**A:** This error messages mean that Model Optimizer does not support your Kaldi model, because the Net contains ``ParallelComponent`` that does not end with the ``</ParallelComponent>`` tag.
 Make sure that you provide a path to a true Kaldi model and try again.
 
-#### Q96. What does the message "Interp layer shape inference function may be wrong, please, try to update layer shape inference function in the file (extensions/ops/interp.op at the line ...)." mean?  <a name="question-96"></a>
+.. _question-96:
+
+Q96. What does the message "Interp layer shape inference function may be wrong, please, try to update layer shape inference function in the file (extensions/ops/interp.op at the line ...)." mean?
+####################################################################################################################################################################################################
+
+**A:** There are many flavors of Caffe framework, and most layers in them are implemented identically.
+However, there are exceptions. For example, the output value of layer Interp is calculated differently in Deeplab-Caffe and classic Caffe. Therefore, if your model contains layer Interp and the conversion of your model has failed, modify the ``interp_infer`` function in the ``extensions/ops/interp.op`` file according to the comments in the file.
+
+.. _question-97:
+
+Q97. What does the message "Mean/scale values should ..." mean?
+#####################################################################################################################################################
 
-**A** : There are many flavors of Caffe framework, and most layers in them are implemented identically.
-However, there are exceptions. For example, the output value of layer Interp is calculated differently in Deeplab-Caffe and classic Caffe. Therefore, if your model contains layer Interp and the conversion of your model has failed, modify the `interp_infer` function in the `extensions/ops/interp.op` file according to the comments in the file.
+**A:** It means that your mean/scale values have a wrong format. Specify mean/scale values in the form of ``layer_name(val1,val2,val3)``.
+You need to specify values for each input of the model. For more information, refer to the :doc:`Converting a Model to Intermediate Representation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
-#### Q97. What does the message "Mean/scale values should ..." mean? <a name="question-97"></a>
+.. _question-98:
 
-**A** : It means that your mean/scale values have a wrong format. Specify mean/scale values in the form of `layer_name(val1,val2,val3)`.
-You need to specify values for each input of the model. For more information, refer to the [Converting a Model to Intermediate Representation](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
+Q98. What does the message "Operation _contrib_box_nms is not supported ..." mean?
+#####################################################################################################################################################
 
-#### Q98. What does the message "Operation _contrib_box_nms is not supported ..." mean? <a name="question-98"></a>
+**A:** It means that you are trying to convert a topology contains the ``_contrib_box_nms`` operation which is not supported directly. However, the sub-graph of operations including ``_contrib_box_nms`` could be replaced with the DetectionOutput layer if your topology is one of the ``gluoncv`` topologies. Specify the ``--enable_ssd_gluoncv`` command-line parameter for Model Optimizer to enable this transformation.
 
-**A** : It means that you are trying to convert a topology contains the `_contrib_box_nms` operation which is not supported directly. However, the sub-graph of operations including `_contrib_box_nms` could be replaced with the DetectionOutput layer if your topology is one of the `gluoncv` topologies. Specify the `--enable_ssd_gluoncv` command-line parameter for Model Optimizer to enable this transformation.
+.. _question-99:
 
-#### Q99. What does the message "ModelOptimizer is not able to parse *.caffemodel" mean? <a name="question-99"></a>
+Q99. What does the message "ModelOptimizer is not able to parse *.caffemodel" mean?
+#####################################################################################################################################################
 
-**A** : If a `*.caffemodel` file exists and is correct, the error occurred possibly because of the use of Python protobuf implementation. In some cases, error messages may appear during model parsing, for example: "`utf-8` codec can't decode byte 0xe0 in position 4: invalid continuation byte in field: mo_caffe.SpatialTransformerParameter.transform_type". You can either use Python 3.7 or build the `cpp` implementation of `protobuf` yourself for your version of Python. For the complete instructions about building `protobuf` from sources, see the appropriate section in the [Converting Models with Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) guide.
+**A:** If a ``*.caffemodel`` file exists and is correct, the error occurred possibly because of the use of Python protobuf implementation. In some cases, error messages may appear during model parsing, for example: "``utf-8`` codec can't decode byte 0xe0 in position 4: invalid continuation byte in field: mo_caffe.SpatialTransformerParameter.transform_type". You can either use Python 3.7 or build the ``cpp`` implementation of ``protobuf`` yourself for your version of Python. For the complete instructions about building ``protobuf`` from sources, see the appropriate section in the :doc:`Converting Models with Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` guide.
 
-#### Q100. What does the message "SyntaxError: 'yield' inside list comprehension" during MxNet model conversion mean? <a name="question-100"></a>
 
-**A** : The issue "SyntaxError: `yield` inside list comprehension" might occur during converting MXNet models (`mobilefacedet-v1-mxnet`, `brain-tumor-segmentation-0001`) on Windows platform with Python 3.8 environment. This issue is caused by the API changes for `yield expression` in Python 3.8.
+.. _question-100:
+
+Q100. What does the message "SyntaxError: 'yield' inside list comprehension" during MxNet model conversion mean?
+#####################################################################################################################################################
+
+**A:** The issue "SyntaxError: ``yield`` inside list comprehension" might occur during converting MXNet models (``mobilefacedet-v1-mxnet``, ``brain-tumor-segmentation-0001``) on Windows platform with Python 3.8 environment. This issue is caused by the API changes for ``yield expression`` in Python 3.8.
 The following workarounds are suggested to resolve this issue:
 1. Use Python 3.7 to convert MXNet models on Windows
-2. Update Apache MXNet by using `pip install mxnet==1.7.0.post2`
+2. Update Apache MXNet by using ``pip install mxnet==1.7.0.post2``
 Note that it might have conflicts with previously installed PyPI dependencies.
 
-#### Q101. What does the message "The IR preparation was executed by the legacy MO path. ..." mean? <a name="question-101"></a>
+.. _question-101:
+
+Q101. What does the message "The IR preparation was executed by the legacy MO path. ..." mean?
+#####################################################################################################################################################
 
-**A** : For the models in ONNX format, there are two available paths of IR conversion.
+**A:** For the models in ONNX format, there are two available paths of IR conversion.
 The old one is handled by the old Python implementation, while the new one uses new C++ frontends.
 Starting from the 2022.1 version, the default IR conversion path for ONNX models is processed using the new ONNX frontend.
-Certain features, such as `--extensions` and `--transformations_config`, are not yet fully supported on the new frontends.
-The new frontends support only paths to shared libraries (.dll and .so) for `--extensions`. They support JSON configurations with defined library fields for `--transformations_config`.
-Inputs freezing (enabled by `--freeze_placeholder_with_value` or `--input` arguments) is not supported by the new frontends.
-The IR conversion falls back to the old path if a user does not select any expected path of conversion explicitly (with `--use_new_frontend` or `--use_legacy_frontend` MO arguments) and unsupported pre-defined scenario is detected on the new frontend path.
+Certain features, such as ``--extensions`` and ``--transformations_config``, are not yet fully supported on the new frontends.
+The new frontends support only paths to shared libraries (.dll and .so) for ``--extensions``. They support JSON configurations with defined library fields for ``--transformations_config``.
+Inputs freezing (enabled by ``--freeze_placeholder_with_value`` or ``--input`` arguments) is not supported by the new frontends.
+The IR conversion falls back to the old path if a user does not select any expected path of conversion explicitly (with ``--use_new_frontend`` or ``--use_legacy_frontend`` MO arguments) and unsupported pre-defined scenario is detected on the new frontend path.
+
+@endsphinxdirective

From 5e149aa0dd1090107c492f48d4b894e2db6eb948 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 30 Mar 2023 16:58:02 +0000
Subject: [PATCH 180/296] Bump test-generator from 0.1.1 to 0.1.2 in /tests
 (#16625)

Bumps [test-generator](https://github.com/kevinastone/generator) from 0.1.1 to 0.1.2.
- [Release notes](https://github.com/kevinastone/generator/releases)
- [Changelog](https://github.com/kevinastone/generator/blob/master/HISTORY.rst)
- [Commits](https://github.com/kevinastone/generator/compare/v0.1.1...v0.1.2)

---
updated-dependencies:
- dependency-name: test-generator
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 tests/constraints.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/constraints.txt b/tests/constraints.txt
index 219731dc417cb0..3cccdc2872afc9 100644
--- a/tests/constraints.txt
+++ b/tests/constraints.txt
@@ -10,7 +10,7 @@ scipy~=1.7; python_version == '3.7'
 wheel>=0.38.1
 defusedxml>=0.7.1
 fastjsonschema~=2.15.1
-test-generator==0.1.1
+test-generator==0.1.2
 requests>=2.25.1
 opencv-python>=4.5
 py>=1.9.0

From e94f7b25c0bc06a3f05831c3d0080fba31ff9fbf Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 30 Mar 2023 21:01:41 +0400
Subject: [PATCH 181/296] Fixed cmake dev warnings (#16655)

---
 CMakeLists.txt                                  |  4 ++++
 cmake/developer_package/native_compile.cmake    | 11 +++++++++--
 src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 16 +++++++++++++---
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index de4f80b8cf9882..92bb7b0f19b4e0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,10 @@ else()
     endif()
 endif()
 
+if(POLICY CMP0091)
+    cmake_policy(SET CMP0091 NEW) # Enables use of MSVC_RUNTIME_LIBRARY
+endif()
+
 project(OpenVINO DESCRIPTION "OpenVINO toolkit")
 
 find_package(IEDevScripts REQUIRED
diff --git a/cmake/developer_package/native_compile.cmake b/cmake/developer_package/native_compile.cmake
index 7b23986ddb87f4..638d2b15cf1a3b 100644
--- a/cmake/developer_package/native_compile.cmake
+++ b/cmake/developer_package/native_compile.cmake
@@ -72,6 +72,15 @@ function(ov_native_compile_external_project)
         list(APPEND ARG_CMAKE_ARGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
     endif()
 
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21)
+        if(DEFINED CMAKE_CXX_LINKER_LAUNCHER)
+            list(APPEND ARG_CMAKE_ARGS "-DCMAKE_CXX_LINKER_LAUNCHER=${CMAKE_CXX_LINKER_LAUNCHER}")
+        endif()
+        if(DEFINED CMAKE_C_LINKER_LAUNCHER)
+            list(APPEND ARG_CMAKE_ARGS "-DCMAKE_C_LINKER_LAUNCHER=${CMAKE_C_LINKER_LAUNCHER}")
+        endif()
+    endif()
+
     ExternalProject_Add(${ARG_TARGET_NAME}
         # Directory Options
         SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}"
@@ -84,8 +93,6 @@ function(ov_native_compile_external_project)
         CMAKE_ARGS
             "-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}"
             "-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}"
-            "-DCMAKE_CXX_LINKER_LAUNCHER=${CMAKE_CXX_LINKER_LAUNCHER}"
-            "-DCMAKE_C_LINKER_LAUNCHER=${CMAKE_C_LINKER_LAUNCHER}"
             "-DCMAKE_CXX_FLAGS=${compile_flags}"
             "-DCMAKE_C_FLAGS=${compile_flags}"
             "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
index 3d81054936a2d7..f25d0f44cba85c 100644
--- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
@@ -52,6 +52,19 @@ if(ENABLE_ONEDNN_FOR_GPU)
             list(APPEND cmake_extra_args "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
         endif()
 
+        if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21)
+            if(DEFINED CMAKE_CXX_LINKER_LAUNCHER)
+                list(APPEND cmake_extra_args "-DCMAKE_CXX_LINKER_LAUNCHER=${CMAKE_CXX_LINKER_LAUNCHER}")
+            endif()
+            if(DEFINED CMAKE_C_LINKER_LAUNCHER)
+                list(APPEND cmake_extra_args "-DCMAKE_C_LINKER_LAUNCHER=${CMAKE_C_LINKER_LAUNCHER}")
+            endif()
+        endif()
+
+        if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15 AND DEFINED CMAKE_MSVC_RUNTIME_LIBRARY)
+            list(APPEND cmake_extra_args "-DCMAKE_MSVC_RUNTIME_LIBRARY=${CMAKE_MSVC_RUNTIME_LIBRARY}")
+        endif()
+
         ExternalProject_Add(onednn_gpu_build
             SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu"
             BINARY_DIR "${ONEDNN_BUILD_DIR}"
@@ -70,13 +83,10 @@ if(ENABLE_ONEDNN_FOR_GPU)
                 "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
                 "-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}"
                 "-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}"
-                "-DCMAKE_CXX_LINKER_LAUNCHER=${CMAKE_CXX_LINKER_LAUNCHER}"
-                "-DCMAKE_C_LINKER_LAUNCHER=${CMAKE_C_LINKER_LAUNCHER}"
                 "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
                 "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
                 "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=${ENABLE_LTO}"
                 "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
-                "-DCMAKE_MSVC_RUNTIME_LIBRARY=${CMAKE_MSVC_RUNTIME_LIBRARY}"
                 "-DDNNL_CPU_RUNTIME=NONE"
                 "-DDNNL_GPU_RUNTIME=OCL"
                 "-DDNNL_LIBRARY_NAME=onednn_gpu"

From fc95d8e544e70503fd8307b8efe488ce2a7aa765 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Thu, 30 Mar 2023 21:58:46 +0400
Subject: [PATCH 182/296] [TF FE] Align opset usage in utils (#16656)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/src/op_table.cpp     |  1 +
 .../tensorflow/src/translate_session.cpp      |  1 +
 .../tensorflow_common/include/utils.hpp       |  9 ++---
 .../src/op/fused_batch_norm.cpp               |  2 +-
 src/frontends/tensorflow_common/src/utils.cpp | 37 ++++++++-----------
 .../src/op/op_translation_utils.hpp           |  1 +
 .../tensorflow_lite/src/op_table.cpp          |  1 +
 7 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index 594a5b4a0ed65b..1fac101cc4ac25 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -6,6 +6,7 @@
 
 #include "common_op_table.hpp"
 #include "openvino/opsets/opset10.hpp"
+#include "openvino/opsets/opset8.hpp"
 #include "openvino/opsets/opset9.hpp"
 
 using namespace std;
diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp
index 2bb70b5b3baea3..165979a3ce848d 100644
--- a/src/frontends/tensorflow/src/translate_session.cpp
+++ b/src/frontends/tensorflow/src/translate_session.cpp
@@ -6,6 +6,7 @@
 
 #include "input_model.hpp"
 #include "openvino/opsets/opset10.hpp"
+#include "openvino/opsets/opset8.hpp"
 #include "tf_framework_node.hpp"
 #include "utils.hpp"
 
diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp
index 4f2eaa17f392d8..d8e32948879d95 100644
--- a/src/frontends/tensorflow_common/include/utils.hpp
+++ b/src/frontends/tensorflow_common/include/utils.hpp
@@ -7,7 +7,6 @@
 #include "openvino/core/validation_util.hpp"
 #include "openvino/frontend/node_context.hpp"
 #include "openvino/opsets/opset10.hpp"
-#include "openvino/opsets/opset8.hpp"
 #include "openvino/pass/graph_rewrite.hpp"
 
 #ifndef TENSORFLOW_OP_VALIDATION
@@ -99,11 +98,11 @@ Output<Node> compute_subgraph_scalar_rank(const Output<Node>& output,
                                           element::Type output_type,
                                           bool as_scalar = false);
 
-std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
-                                                      const ov::AxisVector& input_order);
+std::shared_ptr<ov::opset10::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
+                                                       const ov::AxisVector& input_order);
 
-std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
-                                                  const std::vector<int64_t>& new_shape);
+std::shared_ptr<ov::opset10::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
+                                                   const std::vector<int64_t>& new_shape);
 
 template <typename T>
 void convert_nhwc_to_hw(const std::vector<T>& src, std::vector<size_t>& dst) {
diff --git a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
index 7af6ce29497384..8ad5023e1b42b3 100644
--- a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
+++ b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
@@ -8,7 +8,7 @@
 
 using namespace std;
 using namespace ov;
-using namespace ov::opset8;
+using namespace ov::opset10;
 
 namespace ov {
 namespace frontend {
diff --git a/src/frontends/tensorflow_common/src/utils.cpp b/src/frontends/tensorflow_common/src/utils.cpp
index a84159406296ac..05aca5ad9ba6c6 100644
--- a/src/frontends/tensorflow_common/src/utils.cpp
+++ b/src/frontends/tensorflow_common/src/utils.cpp
@@ -8,7 +8,6 @@
 
 #include "common_op_table.hpp"
 #include "openvino/opsets/opset10.hpp"
-#include "openvino/opsets/opset8.hpp"
 
 using namespace ov;
 using namespace ov::op;
@@ -35,7 +34,7 @@ void set_out_name(const string& out_name, const Output<Node>& output) {
     output.get_tensor().add_names({out_name});
 }
 
-PadType convert_tf_padding(const ov::frontend::NodeContext& node, const string& tf_padding) {
+PadType convert_tf_padding(const frontend::NodeContext& node, const string& tf_padding) {
     set<string> supported_ops = {"Conv2D",
                                  "Conv2DBackpropInput",
                                  "Conv3D",
@@ -82,7 +81,7 @@ PadType convert_tf_padding(const ov::frontend::NodeContext& node, const string&
     return PadType::EXPLICIT;
 }
 
-void fill_explicit_pads_vectors(const ov::frontend::NodeContext& node,
+void fill_explicit_pads_vectors(const frontend::NodeContext& node,
                                 bool is_nhwc,
                                 size_t spatial_dims_num,
                                 const vector<int64_t>& tf_explicit_paddings,
@@ -130,7 +129,7 @@ void fill_explicit_pads_vectors(const ov::frontend::NodeContext& node,
     }
 }
 
-OutputVector translate_convolution_op(const ov::frontend::NodeContext& node, size_t spatial_dims_num) {
+OutputVector translate_convolution_op(const frontend::NodeContext& node, size_t spatial_dims_num) {
     TENSORFLOW_OP_VALIDATION(node,
                              spatial_dims_num == 2 || spatial_dims_num == 3,
                              "Conv2D or Conv3D are supported only.");
@@ -240,9 +239,7 @@ OutputVector translate_convolution_op(const ov::frontend::NodeContext& node, siz
     return {conv};
 }
 
-void default_op_checks(const ov::frontend::NodeContext& node,
-                       size_t min_input_size,
-                       const vector<string>& supported_ops) {
+void default_op_checks(const frontend::NodeContext& node, size_t min_input_size, const vector<string>& supported_ops) {
     auto op_type = node.get_op_type();
     TENSORFLOW_OP_VALIDATION(node,
                              find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(),
@@ -291,17 +288,17 @@ PadMode convert_padding_mode(const NodeContext& node, const string& padding_mode
 }
 
 Output<Node> compute_subgraph_scalar_rank(const Output<Node>& output, element::Type output_type, bool as_scalar) {
-    auto shape_of = make_shared<opset10::ShapeOf>(output, output_type);
-    auto rank_of = make_shared<opset10::ShapeOf>(shape_of, output_type);
+    auto shape_of = make_shared<ShapeOf>(output, output_type);
+    auto rank_of = make_shared<ShapeOf>(shape_of, output_type);
 
     if (as_scalar) {
-        auto const_zero = make_shared<opset10::Constant>(element::i32, Shape{}, 0);
-        return make_shared<opset10::Squeeze>(rank_of, const_zero);
+        auto const_zero = make_shared<Constant>(element::i32, Shape{}, 0);
+        return make_shared<Squeeze>(rank_of, const_zero);
     }
     return rank_of;
 }
 
-void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
+void convert_nhwc_to_nchw(bool need_convert, Output<Node>& node, Rank input_rank) {
     if (need_convert) {
         if (input_rank.is_dynamic()) {
             // TODO: use ShapeOf sub-graph to generate permutation vector
@@ -319,7 +316,7 @@ void convert_nhwc_to_nchw(bool need_convert, ov::Output<ov::Node>& node, ov::Ran
     }
 }
 
-void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Rank input_rank) {
+void convert_nchw_to_nhwc(bool need_convert, Output<Node>& node, Rank input_rank) {
     if (need_convert) {
         if (input_rank.is_dynamic()) {
             // TODO: use ShapeOf sub-graph to generate permutation vector
@@ -337,17 +334,15 @@ void convert_nchw_to_nhwc(bool need_convert, ov::Output<ov::Node>& node, ov::Ran
     }
 }
 
-std::shared_ptr<ov::opset8::Transpose> make_transpose(const ov::Output<ov::Node>& arg,
-                                                      const ov::AxisVector& input_order) {
-    auto order = std::make_shared<ov::opset8::Constant>(element::i64, Shape{input_order.size()}, input_order);
-    auto transpose = std::make_shared<ov::opset8::Transpose>(arg, order);
+shared_ptr<Transpose> make_transpose(const Output<Node>& arg, const AxisVector& input_order) {
+    auto order = make_shared<Constant>(element::i64, Shape{input_order.size()}, input_order);
+    auto transpose = make_shared<Transpose>(arg, order);
     return transpose;
 }
 
-std::shared_ptr<ov::opset8::Reshape> make_reshape(const ov::Output<ov::Node>& arg,
-                                                  const std::vector<int64_t>& new_shape) {
-    auto new_shape_node = std::make_shared<ov::opset8::Constant>(element::i64, Shape{new_shape.size()}, new_shape);
-    auto reshape = std::make_shared<ov::opset8::Reshape>(arg, new_shape_node, true);
+shared_ptr<Reshape> make_reshape(const Output<Node>& arg, const vector<int64_t>& new_shape) {
+    auto new_shape_node = make_shared<Constant>(element::i64, Shape{new_shape.size()}, new_shape);
+    auto reshape = make_shared<Reshape>(arg, new_shape_node, true);
     return reshape;
 }
 
diff --git a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
index fddf10423d7448..d4eee069f57a25 100644
--- a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
+++ b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
@@ -13,6 +13,7 @@
 #include "openvino/core/node_vector.hpp"
 #include "openvino/frontend/tensorflow_lite/node_context.hpp"
 #include "openvino/opsets/opset10.hpp"
+#include "openvino/opsets/opset8.hpp"
 #include "utils.hpp"
 
 namespace ov {
diff --git a/src/frontends/tensorflow_lite/src/op_table.cpp b/src/frontends/tensorflow_lite/src/op_table.cpp
index f64005de6b148d..d4393eec372f1d 100644
--- a/src/frontends/tensorflow_lite/src/op_table.cpp
+++ b/src/frontends/tensorflow_lite/src/op_table.cpp
@@ -6,6 +6,7 @@
 
 #include "decoder_map.hpp"
 #include "openvino/opsets/opset10.hpp"
+#include "openvino/opsets/opset8.hpp"
 #include "utils.hpp"
 
 using namespace std;

From bf8e5cb4a25033329670afc110a03dfbdf57c3cf Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Fri, 31 Mar 2023 00:18:13 +0300
Subject: [PATCH 183/296] Fix ITT build fail (#16648)

---
 src/plugins/template/src/plugin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp
index 2c66fa25ae4c0d..a37bebfd2d94eb 100644
--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -163,7 +163,7 @@ std::shared_ptr<ov::ICompiledModel> ov::template_plugin::Plugin::import_model(st
 // ! [plugin:query_model]
 ov::SupportedOpsMap ov::template_plugin::Plugin::query_model(const std::shared_ptr<const ov::Model>& model,
                                                              const ov::AnyMap& properties) const {
-    OV_ITT_SCOPED_TASK(TemplatePlugin::itt::domains::TemplatePlugin, "Plugin::query_model");
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::query_model");
 
     Configuration fullConfig{properties, m_cfg, false};
 

From 9cf4ee1eae2e1a904c4b561486e1ce1feebddaaa Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Fri, 31 Mar 2023 05:49:45 +0200
Subject: [PATCH 184/296] Fix sanitizer out-of-memory error (#16457)

* Fix sanitizer out-of-memory error

* Add implementation for Windows

* apply comments

* Fix1

* Fix2

* Fix3
---
 thirdparty/cnpy/cnpy.h | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/thirdparty/cnpy/cnpy.h b/thirdparty/cnpy/cnpy.h
index 10159d3b529c5e..360f33d7e4dd63 100644
--- a/thirdparty/cnpy/cnpy.h
+++ b/thirdparty/cnpy/cnpy.h
@@ -19,21 +19,49 @@
 #include<stdint.h>
 #include<numeric>
 
+#if defined(_WIN32)
+#define NOMINMAX
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
 namespace cnpy {
 
     struct NpyArray {
+        unsigned long long GetFreeMemorySize() {
+#if defined(_WIN32)
+            MEMORYSTATUSEX status;
+            status.dwLength = sizeof(status);
+            GlobalMemoryStatusEx(&status);
+            return status.ullAvailPhys;
+#elif defined(__APPLE__)
+            return std::numeric_limits<unsigned long long>::max();
+#else
+            long pages = sysconf(_SC_AVPHYS_PAGES);
+            long page_size = sysconf(_SC_PAGE_SIZE);
+            return pages * page_size;
+#endif
+        }
+
         NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order) :
             shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
         {
             num_vals = 1;
             for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
             if (word_size &&
-                num_vals > std::vector<char>().max_size() / word_size)
+                num_vals > (GetFreeMemorySize() / word_size))
                 throw std::length_error("NpyArray of " + std::to_string(num_vals) +
                                         "*" + std::to_string(word_size) +
                                         " elements is too big.");
-            data_holder = std::shared_ptr<std::vector<char>>(
-                new std::vector<char>(num_vals * word_size));
+            try {
+                data_holder = std::shared_ptr<std::vector<char>>(
+                    new std::vector<char>(num_vals * word_size));
+            } catch (std::bad_alloc const &) {
+                throw std::length_error("NpyArray of " + std::to_string(num_vals) +
+                                        "*" + std::to_string(word_size) +
+                                        " elements is too big.");
+            }
         }
 
         NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { }

From 8fad140a0258ea00e28526851bc9d4ce01d21af6 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Fri, 31 Mar 2023 09:06:52 +0200
Subject: [PATCH 185/296] DOCS shift to rst - Quantization articles (#16596)

---
 tools/pot/docs/BestPractices.md               | 179 ++++++-------
 .../algorithms/quantization/default/README.md | 238 ++++++++++--------
 2 files changed, 225 insertions(+), 192 deletions(-)

diff --git a/tools/pot/docs/BestPractices.md b/tools/pot/docs/BestPractices.md
index a64470209fea30..2b56905efa2226 100644
--- a/tools/pot/docs/BestPractices.md
+++ b/tools/pot/docs/BestPractices.md
@@ -5,108 +5,111 @@
 .. toctree::
    :maxdepth: 1
    :hidden:
-   
+
    Saturation Issue <pot_saturation_issue>
 
-@endsphinxdirective
 
-The [Default Quantization](@ref pot_default_quantization_usage) of the Post-training Optimization Tool (POT) is 
+The :doc:`Default Quantization <pot_default_quantization_usage>` of the Post-training Optimization Tool (POT) is
 the fastest and easiest way to get a quantized model. It requires only some unannotated representative dataset to be provided in most cases. Therefore, it is recommended to use it as a starting point when it comes to model optimization. However, it can lead to significant accuracy deviation in some cases. The purpose of this article is to provide tips to address this issue.
 
-> **NOTE**: POT uses inference on the CPU during model optimization. It means that ability to infer the original
-> floating-point model is essential for model optimization. 
-> It is also worth mentioning that in case of the 8-bit quantization, it is recommended to run POT on the same CPU
-> architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, VPU, or GNA.
-> It should help to avoid the impact of the [saturation issue](@ref pot_saturation_issue) that occurs on AVX and SSE based CPU devices. 
+.. note::
+
+   POT uses inference on the CPU during model optimization. It means that ability to infer the original floating-point model is essential for model optimization. It is also worth mentioning that in case of the 8-bit quantization, it is recommended to run POT on the same CPU architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, VPU, or GNA. It should help to avoid the impact of the :doc:`saturation issue <pot_saturation_issue>` that occurs on AVX and SSE based CPU devices.
+
+
+Improving accuracy after the Default Quantization
+#################################################
 
-## Improving accuracy after the Default Quantization
 Parameters of the Default Quantization algorithm with basic settings are presented below:
-```python
-{
-    "name": "DefaultQuantization", # Optimization algorithm name
-    "params": {
-        "preset": "performance", # Preset [performance, mixed] which controls 
-                                 # the quantization scheme. For the CPU: 
-                                 # performance - symmetric quantization  of weights and activations.
-                                 # mixed - symmetric weights and asymmetric activations.
-                                 # accuracy - the same as "mixed" for CPU, GPU, and GNA devices; asymmetric weights and activations for VPU device.
-        "stat_subset_size": 300  # Size of subset to calculate activations statistics that can be used
-                                 # for quantization parameters calculation.
-    }
-}
-```
+
+.. code-block:: python
+
+   {
+       "name": "DefaultQuantization", # Optimization algorithm name
+       "params": {
+           "preset": "performance", # Preset [performance, mixed] which controls
+                                    # the quantization scheme. For the CPU:
+                                    # performance - symmetric quantization  of weights and activations.
+                                    # mixed - symmetric weights and asymmetric activations.
+                                    # accuracy - the same as "mixed" for CPU, GPU, and GNA devices; asymmetric weights and activations for VPU device.
+           "stat_subset_size": 300  # Size of subset to calculate activations statistics that can be used
+                                    # for quantization parameters calculation.
+       }
+   }
+
 
 There are two alternatives in case of substantial accuracy degradation after applying this method:
-1.  Hyperparameters tuning.
-2.  AccuracyAwareQuantization algorithm.
 
-### Tuning Hyperparameters of the Default Quantization
-The Default Quantization algorithm provides multiple hyperparameters which can be used in order to improve accuracy results for the fully-quantized model. 
+1. Hyperparameters tuning.
+2. AccuracyAwareQuantization algorithm.
+
+Tuning Hyperparameters of the Default Quantization
+++++++++++++++++++++++++++++++++++++++++++++++++++
+
+The Default Quantization algorithm provides multiple hyperparameters which can be used in order to improve accuracy results for the fully-quantized model.
 Below is a list of best practices that can be applied to improve accuracy without a substantial performance reduction with respect to default settings:
 
-1. The first recommended option is to change the `preset` from `performance` to `mixed`. This enables asymmetric quantization of 
-activations and can be helpful for models with non-ReLU activation functions, for example, YOLO, EfficientNet, etc.
-
-2. The next option is `use_fast_bias`. Setting this option to `false` enables a different bias correction method which is more accurate, in general,
-and applied after model quantization as a part of the Default Quantization algorithm.
-   > **NOTE**: Changing this option can substantially increase quantization time in the POT tool.
-
-3. Some model architectures require a special approach when being quantized. For example, Transformer-based models need to keep some operations in the original precision to preserve accuracy. That is why POT provides a `model_type` option to specify the model architecture. Now, only `"transformer"` type is available. Use it to quantize Transformer-based models, e.g. BERT.
-
-4. Another important option is a `range_estimator`. It defines how to calculate the minimum and maximum of quantization range for weights and activations.
-For example, the following `range_estimator` for activations can improve the accuracy for Faster R-CNN based networks:
-```python
-{
-    "name": "DefaultQuantization", 
-    "params": {
-        "preset": "performance", 
-        "stat_subset_size": 300  
-                                    
-
-        "activations": {                 # defines activation
-            "range_estimator": {         # defines how to estimate statistics 
-                "max": {                 # right border of the quantizating floating-point range
-                    "aggregator": "max", # use max(x) to aggregate statistics over calibration dataset
-                    "type": "abs_max"    # use abs(max(x)) to get per-sample statistics
-                }
-            }
-        }
-    }
-}
-```
-
-5. The next option is `stat_subset_size`. It controls the size of the calibration dataset used by POT to collect statistics for quantization parameters initialization.
-It is assumed that this dataset should contain a sufficient number of representative samples. Thus, varying this parameter may affect accuracy (higher is better). 
-However, we empirically found that 300 samples are sufficient to get representative statistics in most cases.
-
-6. The last option is `ignored_scope`. It allows excluding some layers from the quantization process, i.e. their inputs will not be quantized. It may be helpful for some patterns for which it is known in advance that they drop accuracy when executing in low-precision.
-For example, `DetectionOutput` layer of SSD model expressed as a subgraph should not be quantized to preserve the accuracy of Object Detection models.
-One of the sources for the ignored scope can be the Accuracy-aware algorithm which can revert layers back to the original precision (see details below).
-
-Find all the possible options and their description in the configuration [specification file](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_spec.json) in the POT directory.
-
-## Accuracy-aware Quantization
-When the steps above do not lead to the accurate quantized model, you may use the so-called [Accuracy-aware Quantization](@ref pot_accuracyaware_usage) algorithm which leads to mixed-precision models. 
-A fragment of Accuracy-aware Quantization configuration with default settings is shown below:
-
-```python
-{
-    "name": "AccuracyAwareQuantization",
-    "params": {
-        "preset": "performance", 
-        "stat_subset_size": 300,
-
-        "maximal_drop": 0.01 # Maximum accuracy drop which has to be achieved after the quantization
-    }
-}
-
-```
+1. The first recommended option is to change the ``preset`` from ``performance`` to ``mixed``. This enables asymmetric quantization of activations and can be helpful for models with non-ReLU activation functions, for example, YOLO, EfficientNet, etc.
+2. The next option is ``use_fast_bias``. Setting this option to ``false`` enables a different bias correction method which is more accurate, in general, and applied after model quantization as a part of the Default Quantization algorithm.
+
+   .. note:: Changing this option can substantially increase quantization time in the POT tool.
+
+3. Some model architectures require a special approach when being quantized. For example, Transformer-based models need to keep some operations in the original precision to preserve accuracy. That is why POT provides a ``model_type`` option to specify the model architecture. Now, only ``"transformer"`` type is available. Use it to quantize Transformer-based models, e.g. BERT.
+4. Another important option is a `range_estimator`. It defines how to calculate the minimum and maximum of quantization range for weights and activations. For example, the following ``range_estimator`` for activations can improve the accuracy for Faster R-CNN based networks:
+
+   .. code-block:: python
+
+      {
+          "name": "DefaultQuantization",
+          "params": {
+              "preset": "performance",
+              "stat_subset_size": 300
+              "activations": {                 # defines activation
+                  "range_estimator": {         # defines how to estimate statistics
+                      "max": {                 # right border of the quantizating floating-point range
+                          "aggregator": "max", # use max(x) to aggregate statistics over calibration dataset
+                          "type": "abs_max"    # use abs(max(x)) to get per-sample statistics
+                      }
+                  }
+              }
+          }
+      }
+
+
+5. The next option is ``stat_subset_size``. It controls the size of the calibration dataset used by POT to collect statistics for quantization parameters initialization. It is assumed that this dataset should contain a sufficient number of representative samples. Thus, varying this parameter may affect accuracy (higher is better). However, we empirically found that 300 samples are sufficient to get representative statistics in most cases.
+6. The last option is ``ignored_scope``. It allows excluding some layers from the quantization process, i.e. their inputs will not be quantized. It may be helpful for some patterns for which it is known in advance that they drop accuracy when executing in low-precision. For example, ``DetectionOutput`` layer of SSD model expressed as a subgraph should not be quantized to preserve the accuracy of Object Detection models. One of the sources for the ignored scope can be the Accuracy-aware algorithm which can revert layers back to the original precision (see details below).
+
+Find all the possible options and their description in the configuration `specification file <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_spec.json>`__ in the POT directory.
+
+Accuracy-aware Quantization
+###########################
+
+When the steps above do not lead to the accurate quantized model, you may use the so-called :doc:`Accuracy-aware Quantization <pot_accuracyaware_usage>` algorithm which leads to mixed-precision models. A fragment of Accuracy-aware Quantization configuration with default settings is shown below:
+
+.. code-block:: python
+
+   {
+       "name": "AccuracyAwareQuantization",
+       "params": {
+           "preset": "performance",
+           "stat_subset_size": 300,
+           "maximal_drop": 0.01 # Maximum accuracy drop which has to be achieved after the quantization
+       }
+   }
+
 
 Since the Accuracy-aware Quantization calls the Default Quantization at the first step it means that all the parameters of the latter one are also valid and can be applied to the accuracy-aware scenario.
 
-> **NOTE**: In general, the potential increase in speed with the Accuracy-aware Quantization algorithm is not as high  as with the Default Quantization, when the model gets fully quantized.
+.. note::
+
+   In general, the potential increase in speed with the Accuracy-aware Quantization algorithm is not as high  as with the Default Quantization, when the model gets fully quantized.
+
+
+Reducing the performance gap of Accuracy-aware Quantization
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+To improve model performance after Accuracy-aware Quantization, try the ``"tune_hyperparams"`` setting and set it to ``True``. It will enable searching for optimal quantization parameters before reverting layers to the "backup" precision. Note that this may impact the overall quantization time, though.
 
-### Reducing the performance gap of Accuracy-aware Quantization
-To improve model performance after Accuracy-aware Quantization, try the `"tune_hyperparams"` setting and set it to `True`. It will enable searching for optimal quantization parameters before reverting layers to the "backup" precision. Note that this may impact the overall quantization time, though.
+If you do not achieve the desired accuracy and performance after applying the Accuracy-aware Quantization algorithm or you need an accurate fully-quantized model, we recommend either using Quantization-Aware Training from :doc:`NNCF <tmo_introduction>`.
 
-If you do not achieve the desired accuracy and performance after applying the Accuracy-aware Quantization algorithm or you need an accurate fully-quantized model, we recommend either using Quantization-Aware Training from [NNCF](@ref tmo_introduction).
+@endsphinxdirective
\ No newline at end of file
diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md
index 3b13371699afc6..e685c503d001e4 100644
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md
@@ -1,121 +1,151 @@
 # DefaultQuantization Parameters {#pot_compression_algorithms_quantization_default_README}
 
+@sphinxdirective
+
 The DefaultQuantization Algorithm is designed to perform fast and accurate quantization. It does not offer direct control over the accuracy metric itself but provides many options that can be used to improve it.
 
-## Parameters
-Default Quantization algorithm has mandatory and optional parameters. For more details on how to use these parameters, refer to [Best Practices](@ref pot_docs_BestPractices) document. Below is an example of the definition of Default Quantization method and its parameters:
-```python
-{
-    "name": "DefaultQuantization", # the name of optimization algorithm
-    "params": {
-        ...
-    }
-}
-```
-
-### Mandatory parameters
-- `"preset"` - a preset which controls the quantization mode (symmetric and asymmetric). It can take two values:
-    - `"performance"` (default) - stands for symmetric quantization of weights and activations. This is the most
-    efficient across all the HW.
-    - `"mixed"` - symmetric quantization of weights and asymmetric quantization of activations. This mode can be useful
-    for quantization of NN, which has both negative and positive input values in quantizing operations, for example
-    non-ReLU based CNN.  
-- `"stat_subset_size"` - size of a subset to calculate activations statistics used for quantization. The whole dataset
-is used if no parameter is specified. It is recommended to use not less than 300 samples.
-- `"stat_batch_size"` - size of a batch to calculate activations statistics used for quantization. It has a value of 1 if no parameter is specified.
-
-### Optional parameters
+Parameters
+####################
+
+Default Quantization algorithm has mandatory and optional parameters. For more details on how to use these parameters, refer to :doc:`Best Practices <pot_docs_BestPractices>` document. Below is an example of the definition of Default Quantization method and its parameters:
+
+.. code-block:: python
+
+   {
+       "name": "DefaultQuantization", # the name of optimization algorithm
+       "params": {
+           ...
+       }
+   }
+
+
+Mandatory parameters
+++++++++++++++++++++
+
+- ``"preset"`` - a preset which controls the quantization mode (symmetric and asymmetric). It can take two values:
+
+  - ``"performance"`` (default) - stands for symmetric quantization of weights and activations. This is the most efficient across all the HW.
+  - ``"mixed"`` - symmetric quantization of weights and asymmetric quantization of activations. This mode can be useful for quantization of NN, which has both negative and positive input values in quantizing operations, for example non-ReLU based CNN.
+
+- ``"stat_subset_size"`` - size of a subset to calculate activations statistics used for quantization. The whole dataset is used if no parameter is specified. It is recommended to use not less than 300 samples.
+- ``"stat_batch_size"`` - size of a batch to calculate activations statistics used for quantization. It has a value of 1 if no parameter is specified.
+
+Optional parameters
++++++++++++++++++++
+
 All other options should be considered as an advanced mode and require deep knowledge of the quantization process. Below
 is an overall description of all possible parameters:
-- `"model type"` - required for accurate optimization of some model architectures. Now, only `"transformer"` type is supported for Transformer-based models (BERT, etc.). Default value is `None`.
-- `"inplace_statistics"` - used to change a method of statistics collection from in-place (in-graph operations) to external collectors that require more memory but can increase optimization time. Default value is `True`.
-- `"ignored"` - NN subgraphs which should be excluded from the optimization process
-    - `"scope"` - list of particular nodes to exclude
-    - `"operations"` - list of operation types to exclude (expressed in OpenVINO IR notation). This list consists of
-    the following tuples:
-        - `"type"` - a type of ignored operation.
-        - `"attributes"` - if attributes are defined, they will be considered during the ignorance. They are defined by
-        a dictionary of `"<NAME>": "<VALUE>"` pairs.
-- `"weights"` - this section describes quantization scheme for weights and the way to estimate the
-quantization range for that. It is worth noting that changing the quantization scheme may lead to inability to infer such
-mode on the existing HW.
-    - `"bits"` - bit-width, the default value is "8".
-    - `"mode"` - a quantization mode (symmetric or asymmetric).
-    - `"level_low"` - the minimum level in the integer range to quantize. The default is "0" for an unsigned range, and "-2^(bit-1)" for a signed one .
-    - `"level_high"` - the maximum level in the integer range to quantize. The default is "2^bits-1" for an unsigned range, and "2^(bit-1)-1" for a signed one.
-    - `"granularity"` - quantization scale granularity. It can take the following values:
-        - `"pertensor"` (default) - per-tensor quantization with one scale factor and zero-point.
-        - `"perchannel"` - per-channel quantization with per-channel scale factor and zero-point.
-    - `"range_estimator"` - this section describes parameters of range estimator that is used in MinMaxQuantization
-    method to get the quantization ranges and filter outliers based on the collected statistics. Below are the parameters
-    that can be modified to get better accuracy results:
-        - `"max"` - parameters to estimate top border of quantizing floating-point range:
-            - `"type"` - a type of the estimator:
-                - `"max"` (default) - estimates the maximum in the quantizing set of value.
-                - `"quantile"` - estimates the quantile in the quantizing set of value.
-            - `"outlier_prob"` - outlier probability used in the "quantile" estimator.
-        - `"min"` - parameters to estimate bottom border of quantizing floating-point range:
-            - `"type"` - a type of the estimator:
-                - `"min"` (default) - estimates the minimum in the quantizing set of value.
-                - `"quantile"` - estimates the quantile in the quantizing set of value.
-            - `"outlier_prob"` - outlier probability used in the "quantile" estimator.
-- `"activations"` - this section describes quantization scheme for activations and the way to estimate the
-quantization range for that. As before, changing the quantization scheme may lead to inability to infer such
-mode on the existing HW:
-    - `"bits"` - bit-width, the default value is "8".
-    - `"mode"` - a quantization mode (symmetric or asymmetric).
-    - `"level_low"` - the minimum level in the integer range to quantize. The default is "0" for an unsigned range, and "-2^(bit-1)" for a signed one.
-    - `"level_high"` - the maximum level in the integer range to quantize. The default is "2^bits-1" for an unsigned range, and "2^(bit-1)-1" for a signed one.
-    - `"granularity"` - quantization scale granularity. It can take the following values:
-        - `"pertensor"` (default) - per-tensor quantization with one scale factor and zero-point.
-        - `"perchannel"` - per-channel quantization with per-channel scale factor and zero-point.
-    - `"range_estimator"` - this section describes parameters of range estimator that is used in MinMaxQuantization
-    method to get the quantization ranges and filter outliers based on the collected statistics. These are the parameters
-    that can be modified to get better accuracy results:
-        - `"preset"` - preset that defines the same estimator for both top and bottom borders of quantizing
-        floating-point range. Possible value is `"quantile"`.
-        - `"max"` - parameters to estimate top border of quantizing floating-point range:
-            - `"aggregator"` - a type of the function used to aggregate statistics obtained with the estimator
-            over the calibration dataset to get a value of the top border:
-                - `"mean"` (default) - aggregates mean value.
-                - `"max"` - aggregates max value.
-                - `"min"` - aggregates min value.
-                - `"median"` - aggregates median value.
-                - `"mean_no_outliers"` - aggregates mean value after removal of extreme quantiles.
-                - `"median_no_outliers"` - aggregates median value after removal of extreme quantiles.
-                - `"hl_estimator"` - Hodges-Lehmann filter based aggregator.
-            - `"type"` - a type of the estimator:
-                - `"max"` (default) - estimates the maximum in the quantizing set of value.
-                - `"quantile"` - estimates the quantile in the quantizing set of value.
-            - `"outlier_prob"` - outlier probability used in the "quantile" estimator.
-        - `"min"` - parameters to estimate bottom border of quantizing floating-point range:
-            - `"type"` - a type of the estimator:
-                - `"max"` (default) - estimates the maximum in the quantizing set of value.
-                - `"quantile"` - estimates the quantile in the quantizing set of value.
-            - `"outlier_prob"` - outlier probability used in the "quantile" estimator.
-- `"use_layerwise_tuning"` - enables layer-wise fine-tuning of model parameters (biases, Convolution/MatMul weights and FakeQuantize scales) by minimizing the mean squared error between original and quantized layer outputs.
-Enabling this option may increase compressed model accuracy, but will result in increased execution time and memory consumption.
-
-## Additional Resources
+
+- ``"model type"`` - required for accurate optimization of some model architectures. Now, only ``"transformer"`` type is supported for Transformer-based models (BERT, etc.). Default value is `None`.
+- ``"inplace_statistics"`` - used to change a method of statistics collection from in-place (in-graph operations) to external collectors that require more memory but can increase optimization time. Default value is `True`.
+- ``"ignored"`` - NN subgraphs which should be excluded from the optimization process
+
+  - ``"scope"`` - list of particular nodes to exclude
+  - ``"operations"`` - list of operation types to exclude (expressed in OpenVINO IR notation). This list consists of the following tuples:
+
+    - ``"type"`` - a type of ignored operation.
+    - ``"attributes"`` - if attributes are defined, they will be considered during the ignorance. They are defined bya dictionary of ``"<NAME>": "<VALUE>"`` pairs.
+
+- ``"weights"`` - this section describes quantization scheme for weights and the way to estimate the quantization range for that. It is worth noting that changing the quantization scheme may lead to inability to infer such mode on the existing HW.
+
+  - ``"bits"`` - bit-width, the default value is "8".
+  - ``"mode"`` - a quantization mode (symmetric or asymmetric).
+  - ``"level_low"`` - the minimum level in the integer range to quantize. The default is "0" for an unsigned range, and "-2^(bit-1)" for a signed one .
+  - ``"level_high"`` - the maximum level in the integer range to quantize. The default is "2^bits-1" for an unsigned range, and "2^(bit-1)-1" for a signed one.
+  - ``"granularity"`` - quantization scale granularity. It can take the following values:
+
+    - ``"pertensor"`` (default) - per-tensor quantization with one scale factor and zero-point.
+    - ``"perchannel"`` - per-channel quantization with per-channel scale factor and zero-point.
+
+  - ``"range_estimator"`` - this section describes parameters of range estimator that is used in MinMaxQuantization method to get the quantization ranges and filter outliers based on the collected statistics. Below are the parameters that can be modified to get better accuracy results:
+
+    - ``"max"`` - parameters to estimate top border of quantizing floating-point range:
+
+      - ``"type"`` - a type of the estimator:
+
+        - ``"max"`` (default) - estimates the maximum in the quantizing set of value.
+        - ``"quantile"`` - estimates the quantile in the quantizing set of value.
+
+      - ``"outlier_prob"`` - outlier probability used in the "quantile" estimator.
+
+    - ``"min"`` - parameters to estimate bottom border of quantizing floating-point range:
+
+      - ``"type"`` - a type of the estimator:
+
+        - ``"min"`` (default) - estimates the minimum in the quantizing set of value.
+        - ``"quantile"`` - estimates the quantile in the quantizing set of value.
+
+      - ``"outlier_prob"`` - outlier probability used in the "quantile" estimator.
+
+- ``"activations"`` - this section describes quantization scheme for activations and the way to estimate the quantization range for that. As before, changing the quantization scheme may lead to inability to infer such mode on the existing HW:
+
+  - ``"bits"`` - bit-width, the default value is "8".
+  - ``"mode"`` - a quantization mode (symmetric or asymmetric).
+  - ``"level_low"`` - the minimum level in the integer range to quantize. The default is "0" for an unsigned range, and "-2^(bit-1)" for a signed one.
+  - ``"level_high"`` - the maximum level in the integer range to quantize. The default is "2^bits-1" for an unsigned range, and "2^(bit-1)-1" for a signed one.
+  - ``"granularity"`` - quantization scale granularity. It can take the following values:
+
+    - ``"pertensor"`` (default) - per-tensor quantization with one scale factor and zero-point.
+    - ``"perchannel"`` - per-channel quantization with per-channel scale factor and zero-point.
+
+  - ``"range_estimator"`` - this section describes parameters of range estimator that is used in MinMaxQuantization method to get the quantization ranges and filter outliers based on the collected statistics. These are the parameters that can be modified to get better accuracy results:
+
+    - ``"preset"`` - preset that defines the same estimator for both top and bottom borders of quantizing floating-point range. Possible value is ``"quantile"``.
+    - ``"max"`` - parameters to estimate top border of quantizing floating-point range:
+
+      - ``"aggregator"`` - a type of the function used to aggregate statistics obtained with the estimator over the calibration dataset to get a value of the top border:
+
+        - ``"mean"`` (default) - aggregates mean value.
+        - ``"max"`` - aggregates max value.
+        - ``"min"`` - aggregates min value.
+        - ``"median"`` - aggregates median value.
+        - ``"mean_no_outliers"`` - aggregates mean value after removal of extreme quantiles.
+        - ``"median_no_outliers"`` - aggregates median value after removal of extreme quantiles.
+        - ``"hl_estimator"`` - Hodges-Lehmann filter based aggregator.
+
+      - ``"type"`` - a type of the estimator:
+
+        - ``"max"`` (default) - estimates the maximum in the quantizing set of value.
+        - ``"quantile"`` - estimates the quantile in the quantizing set of value.
+
+      - ``"outlier_prob"`` - outlier probability used in the "quantile" estimator.
+
+    - ``"min"`` - parameters to estimate bottom border of quantizing floating-point range:
+
+      - ``"type"`` - a type of the estimator:
+
+        - ``"max"`` (default) - estimates the maximum in the quantizing set of value.
+        - ``"quantile"`` - estimates the quantile in the quantizing set of value.
+
+      - ``"outlier_prob"`` - outlier probability used in the "quantile" estimator.
+
+- ``"use_layerwise_tuning"`` - enables layer-wise fine-tuning of model parameters (biases, Convolution/MatMul weights and FakeQuantize scales) by minimizing the mean squared error between original and quantized layer outputs. Enabling this option may increase compressed model accuracy, but will result in increased execution time and memory consumption.
+
+Additional Resources
+####################
+
 Tutorials:
-* [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino)
-* [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization)
-* [Quantization of Segmentation model for medical data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize)
-* [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert)
+
+* `Quantization of Image Classification model <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino>`__
+* `Quantization of Object Detection model from Model Zoo <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization>`__
+* `Quantization of Segmentation model for medical data <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize>`__
+* `Quantization of BERT for Text Classification <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert>`__
 
 Examples:
-* [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation)
-* [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection)
-* [Quantizatin of speech model for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech)
+
+* :doc:`Quantization of 3D segmentation model <pot_example_3d_segmentation_README>`
+* :doc:`Quantization of Face Detection model <pot_example_face_detection_README>`
+* :doc:`Quantization of speech model for GNA device <pot_example_speech_README>`
 
 Command-line example:
-* [Quantization of Image Classification model](https://docs.openvino.ai/latest/pot_configs_examples_README.html)
+
+* :doc:`Quantization of Image Classification model <pot_configs_examples_README>`
 
 A template and full specification for DefaultQuantization algorithm for POT command-line interface:
-* [Template](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/openvino/tools/pot/configs/templates/default_quantization_template.json)
-* [Full specification](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_spec.json)
 
-@sphinxdirective
+* `Template <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/openvino/tools/pot/configs/templates/default_quantization_template.json>`__
+* `Full specification <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_spec.json>`__
+
 
 .. dropdown:: Template
 

From 385bbbd49b2c8a51137e79b254c1d569c0fdfdea Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Fri, 31 Mar 2023 09:07:30 +0200
Subject: [PATCH 186/296] DOCS shift to rst - Hello Reshape SSD C++ Sample &
 Hello Reshape SSD Python* Sample (#16662)

---
 samples/cpp/hello_reshape_ssd/README.md    | 216 ++++++++++++---------
 samples/python/hello_reshape_ssd/README.md | 156 ++++++++-------
 2 files changed, 209 insertions(+), 163 deletions(-)

diff --git a/samples/cpp/hello_reshape_ssd/README.md b/samples/cpp/hello_reshape_ssd/README.md
index d683f542fa0335..36c5e526978d82 100644
--- a/samples/cpp/hello_reshape_ssd/README.md
+++ b/samples/cpp/hello_reshape_ssd/README.md
@@ -1,117 +1,143 @@
 # Hello Reshape SSD C++ Sample {#openvino_inference_engine_samples_hello_reshape_ssd_README}
 
-This sample demonstrates how to do synchronous inference of object detection models using [input reshape feature](../../../docs/OV_Runtime_UG/ShapeInference.md).
+@sphinxdirective
+
+This sample demonstrates how to do synchronous inference of object detection models using :doc:`input reshape feature <openvino_docs_OV_UG_ShapeInference>`.
 Models with only one input and output are supported.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Node operations | `ov::Node::get_type_info`, `ngraph::op::DetectionOutput::get_type_info_static`, `ov::Output::get_any_name`, `ov::Output::get_shape` | Get a node info |
-| Model Operations | `ov::Model::get_ops`, `ov::Model::reshape` | Get model nodes, reshape input |
-| Tensor Operations | `ov::Tensor::data` | Get a tensor data |
-| Preprocessing | `ov::preprocess::PreProcessSteps::convert_element_type`, `ov::preprocess::PreProcessSteps::convert_layout` | Model input preprocessing |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
-
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [person-detection-retail-0013](@ref omz_models_model_person_detection_retail_0013) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../../samples/python/hello_reshape_ssd/README.md) |
-
-## How It Works
++----------------------------------+-------------------------------------------------------------+------------------------------------------------+
+| Feature                          | API                                                         | Description                                    |
++==================================+=============================================================+================================================+
+| Node operations                  | ``ov::Node::get_type_info``,                                | Get a node info                                |
+|                                  | ``ngraph::op::DetectionOutput::get_type_info_static``,      |                                                |
+|                                  | ``ov::Output::get_any_name``,                               |                                                |
+|                                  | ``ov::Output::get_shape``                                   |                                                |
++----------------------------------+-------------------------------------------------------------+------------------------------------------------+
+| Model Operations                 | ``ov::Model::get_ops``,                                     | Get model nodes, reshape input                 |
+|                                  | ``ov::Model::reshape``                                      |                                                |
++----------------------------------+-------------------------------------------------------------+------------------------------------------------+
+| Tensor Operations                | ``ov::Tensor::data``                                        | Get a tensor data                              |
++----------------------------------+-------------------------------------------------------------+------------------------------------------------+
+| Preprocessing                    | ``ov::preprocess::PreProcessSteps::convert_element_type``,  | Model input preprocessing                      |
+|                                  | ``ov::preprocess::PreProcessSteps::convert_layout``         |                                                |
++----------------------------------+-------------------------------------------------------------+------------------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
+
++----------------------------------+---------------------------------------------------------------------------------------------+
+| Options                          | Values                                                                                      |
++==================================+=============================================================================================+
+| Validated Models                 | :doc:`person-detection-retail-0013 <omz_models_model_person_detection_retail_0013>`         |
++----------------------------------+---------------------------------------------------------------------------------------------+
+| Model Format                     | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)             |
++----------------------------------+---------------------------------------------------------------------------------------------+
+| Supported devices                | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                        |
++----------------------------------+---------------------------------------------------------------------------------------------+
+| Other language realization       | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_hello_reshape_ssd_README>` |
++----------------------------------+---------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 Upon the start-up the sample application reads command line parameters, loads specified network and image to the Inference
 Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application creates output image and output data to the standard output stream.
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
-```
-hello_reshape_ssd <path_to_model> <path_to_image> <device>
-```
+.. code-block:: console
+   
+   hello_reshape_ssd <path_to_model> <path_to_image> <device>
 
 To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (\*.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
 
-### Example
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (\*.onnx) that do not require preprocessing.
+
+Example
++++++++
 
 1. Install openvino-dev python package if you don't have it to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev
-   ```
+
+   .. code-block:: console
+      
+      python -m pip install openvino-dev
 
 2. Download a pre-trained model using:
-   ```
-   omz_downloader --name person-detection-retail-0013
-   ```
-
-3. `person-detection-retail-0013` does not need to be converted, because it is already in necessary format, so you can skip this step. If you want to use another model that is not in the IR or ONNX format, you can convert it using the model converter script:
-   ```
-   omz_converter --name <model_name>
-   ```
-
-4. Perform inference of `person_detection.bmp` using `person-detection-retail-0013` model on a `GPU`, for example:
-   ```
-   hello_reshape_ssd person-detection-retail-0013.xml person_detection.bmp GPU
-   ```
-
-## Sample Output
-
-The application renders an image with detected objects enclosed in rectangles. It outputs the list of classes
-of the detected objects along with the respective confidence values and the coordinates of the
-rectangles to the standard output stream.
-
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Loading model files: \models\person-detection-retail-0013.xml
-[ INFO ] model name: ResMobNet_v4 (LReLU) with single SSD head
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {1, 3, 320, 544}
-[ INFO ]     outputs
-[ INFO ]         output name: detection_out
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1, 200, 7}
-Reshape network to the image size = [960x1699]
-[ INFO ] model name: ResMobNet_v4 (LReLU) with single SSD head
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {1, 3, 960, 1699}
-[ INFO ]     outputs
-[ INFO ]         output name: detection_out
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1, 200, 7}
-[0,1] element, prob = 0.716309,    (852,187)-(983,520)
-The resulting image was saved in the file: hello_reshape_ssd_output.bmp
-
-This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+
+   .. code-block:: console
+      
+      omz_downloader --name person-detection-retail-0013
+
+3. ``person-detection-retail-0013`` does not need to be converted, because it is already in necessary format, so you can skip this step. If you want to use another model that is not in the IR or ONNX format, you can convert it using the model converter script:
+
+   .. code-block:: console
+      
+      omz_converter --name <model_name>
+
+4. Perform inference of ``person_detection.bmp`` using ``person-detection-retail-0013`` model on a ``GPU``, for example:
+   
+   .. code-block:: console
+      
+      hello_reshape_ssd person-detection-retail-0013.xml person_detection.bmp GPU
+
+Sample Output
+#############
+
+The application renders an image with detected objects enclosed in rectangles. It outputs the list of classes of the detected objects along with the respective confidence values and the coordinates of the rectangles to the standard output stream.
+
+.. code-block:: console
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Loading model files: \models\person-detection-retail-0013.xml
+   [ INFO ] model name: ResMobNet_v4 (LReLU) with single SSD head
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {1, 3, 320, 544}
+   [ INFO ]     outputs
+   [ INFO ]         output name: detection_out
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1, 200, 7}
+   Reshape network to the image size = [960x1699]
+   [ INFO ] model name: ResMobNet_v4 (LReLU) with single SSD head
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {1, 3, 960, 1699}
+   [ INFO ]     outputs
+   [ INFO ]         output name: detection_out
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1, 200, 7}
+   [0,1] element, prob = 0.716309,    (852,187)-(983,520)
+   The resulting image was saved in the file: hello_reshape_ssd_output.bmp
+   
+   This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+
diff --git a/samples/python/hello_reshape_ssd/README.md b/samples/python/hello_reshape_ssd/README.md
index 0341bea096d84a..ab27d66072c8cf 100644
--- a/samples/python/hello_reshape_ssd/README.md
+++ b/samples/python/hello_reshape_ssd/README.md
@@ -1,95 +1,115 @@
 # Hello Reshape SSD Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_reshape_ssd_README}
 
-This sample demonstrates how to do synchronous inference of object detection models using [Shape Inference feature](../../../docs/OV_Runtime_UG/ShapeInference.md).  
-Models with only 1 input and output are supported.
-
-The following Python API is used in the application:
+@sphinxdirective
 
-| Feature          | API                                                                                                                                       | Description       |
-| :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | :---------------- |
-| Model Operations | [openvino.runtime.Model.reshape], [openvino.runtime.Model.input], [openvino.runtime.Output.get_any_name], [openvino.runtime.PartialShape] | Managing of model |
+This sample demonstrates how to do synchronous inference of object detection models using :doc:`Shape Inference feature <openvino_docs_OV_UG_ShapeInference>`.  
 
-Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](../hello_classification/README.md).
+Models with only 1 input and output are supported.
 
-| Options                    | Values                                                                   |
-| :------------------------- | :----------------------------------------------------------------------- |
-| Validated Models           | [mobilenet-ssd](@ref omz_models_model_mobilenet_ssd)                     |
-| Validated Layout           | NCHW                                                                     |
-| Model Format               | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx) |
-| Supported devices          | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md)        |
-| Other language realization | [C++](../../../samples/cpp/hello_reshape_ssd/README.md)                  |
+The following Python API is used in the application:
 
-## How It Works
++------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------+
+| Feature                            | API                                                                                                                                                                            | Description                          |
++====================================+================================================================================================================================================================================+======================================+
+| Model Operations                   | `openvino.runtime.Model.reshape <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.reshape>`__ ,               | Managing of model                    |
+|                                    | `openvino.runtime.Model.input <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.input>`__ ,                   |                                      |
+|                                    | `openvino.runtime.Output.get_any_name <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Output.html#openvino.runtime.Output.get_any_name>`__ ,  |                                      |
+|                                    | `openvino.runtime.PartialShape <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.PartialShape.html>`__                                          |                                      |
++------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification Python* Sample <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>`.
+
++------------------------------------+---------------------------------------------------------------------------+
+| Options                            | Values                                                                    |
++====================================+===========================================================================+
+| Validated Models                   | :doc:`mobilenet-ssd <omz_models_model_mobilenet_ssd>`                     |
++------------------------------------+---------------------------------------------------------------------------+
+| Validated Layout                   | NCHW                                                                      |
++------------------------------------+---------------------------------------------------------------------------+
+| Model Format                       | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx) |
++------------------------------------+---------------------------------------------------------------------------+
+| Supported devices                  | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`      |
++------------------------------------+---------------------------------------------------------------------------+
+| Other language realization         | :doc:`C++ <openvino_inference_engine_samples_hello_reshape_ssd_README>`   |
++------------------------------------+---------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image to the OpenVINO™ Runtime plugin, performs synchronous inference, and processes output data.  
 As a result, the program creates an output image, logging each step in a standard output stream.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Running
+Running
+#######
 
-```
-python hello_reshape_ssd.py <path_to_model> <path_to_image> <device_name>
-```
+.. code-block:: console
+   
+   python hello_reshape_ssd.py <path_to_model> <path_to_image> <device_name>
 
 To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`.
 
-### Example
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+Example
++++++++
+
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+   
+   .. code-block:: console
+      
+      python -m pip install openvino-dev[caffe]
 
 2. Download a pre-trained model:
-   ```
-   omz_downloader --name mobilenet-ssd
-   ```
+   
+   .. code-block:: console
+      
+      omz_downloader --name mobilenet-ssd
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name mobilenet-ssd
-   ```
+   
+   .. code-block:: console
+      
+      omz_converter --name mobilenet-ssd
 
-4. Perform inference of `banana.jpg` using `ssdlite_mobilenet_v2` model on a `GPU`, for example:
-   ```
-   python hello_reshape_ssd.py mobilenet-ssd.xml banana.jpg GPU
-   ```
+4. Perform inference of ``banana.jpg`` using ``ssdlite_mobilenet_v2`` model on a ``GPU``, for example:
+   
+   .. code-block:: console
+      
+      python hello_reshape_ssd.py mobilenet-ssd.xml banana.jpg GPU
 
-## Sample Output
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream and creates an output image, drawing bounding boxes for inference results with an over 50% confidence.
 
-```
-[ INFO ] Creating OpenVINO Runtime Core
-[ INFO ] Reading the model: C:/test_data/models/mobilenet-ssd.xml
-[ INFO ] Reshaping the model to the height and width of the input image
-[ INFO ] Loading the model to the plugin
-[ INFO ] Starting inference in synchronous mode
-[ INFO ] Found: class_id = 52, confidence = 0.98, coords = (21, 98), (276, 210)
-[ INFO ] Image out.bmp was created!
-[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-
-[openvino.runtime.Model.reshape]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.reshape
-[openvino.runtime.Model.input]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.input
-[openvino.runtime.Output.get_any_name]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Output.html#openvino.runtime.Output.get_any_name
-[openvino.runtime.PartialShape]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.PartialShape.html
+.. code-block:: console
+   
+   [ INFO ] Creating OpenVINO Runtime Core
+   [ INFO ] Reading the model: C:/test_data/models/mobilenet-ssd.xml
+   [ INFO ] Reshaping the model to the height and width of the input image
+   [ INFO ] Loading the model to the plugin
+   [ INFO ] Starting inference in synchronous mode
+   [ INFO ] Found: class_id = 52, confidence = 0.98, coords = (21, 98), (276, 210)
+   [ INFO ] Image out.bmp was created!
+   [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From 73e75c58ba1f26295ea6835d4cb675a8139f118e Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Fri, 31 Mar 2023 09:07:59 +0200
Subject: [PATCH 187/296] DOCS shift to rst - Hello NV12 Input Classification
 C++ Sample & Hello NV12 Input Classification C Sample (#16664)

---
 .../hello_nv12_input_classification/README.md | 193 ++++++++-------
 .../hello_nv12_input_classification/README.md | 229 ++++++++++--------
 2 files changed, 229 insertions(+), 193 deletions(-)

diff --git a/samples/c/hello_nv12_input_classification/README.md b/samples/c/hello_nv12_input_classification/README.md
index 8d6778d491f076..efb267398405f9 100644
--- a/samples/c/hello_nv12_input_classification/README.md
+++ b/samples/c/hello_nv12_input_classification/README.md
@@ -1,114 +1,129 @@
 # Hello NV12 Input Classification C Sample {#openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README}
 
+@sphinxdirective
+
 This sample demonstrates how to execute an inference of image classification networks like AlexNet with images in NV12 color format using Synchronous Inference Request API.
 
 Hello NV12 Input Classification C Sample demonstrates how to use the NV12 automatic input pre-processing API in your applications:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Node Operations | `ov_port_get_any_name` | Get a layer name |
-| Infer Request Operations | `ov_infer_request_set_tensor`, `ov_infer_request_get_output_tensor_by_index` | Operate with tensors |
-| Preprocessing | `ov_preprocess_input_tensor_info_set_color_format`, `ov_preprocess_preprocess_steps_convert_element_type`, `ov_preprocess_preprocess_steps_convert_color` | Change the color format of the input data |
++-----------------------------------------+-----------------------------------------------------------+--------------------------------------------------------+
+| Feature                                 | API                                                       | Description                                            |
++=========================================+===========================================================+========================================================+
+| Node Operations                         | ``ov_port_get_any_name``                                  | Get a layer name                                       |
++-----------------------------------------+-----------------------------------------------------------+--------------------------------------------------------+
+| Infer Request Operations                | ``ov_infer_request_set_tensor``,                          | Operate with tensors                                   |
+|                                         | ``ov_infer_request_get_output_tensor_by_index``           |                                                        |
++-----------------------------------------+-----------------------------------------------------------+--------------------------------------------------------+
+| Preprocessing                           | ``ov_preprocess_input_tensor_info_set_color_format``,     | Change the color format of the input data              |
+|                                         | ``ov_preprocess_preprocess_steps_convert_element_type``,  |                                                        |
+|                                         | ``ov_preprocess_preprocess_steps_convert_color``          |                                                        |
++-----------------------------------------+-----------------------------------------------------------+--------------------------------------------------------+
+
 
-Basic Inference Engine API is covered by [Hello Classification C sample](../hello_classification/README.md).
+Basic Inference Engine API is covered by :doc:`Hello Classification C sample <openvino_inference_engine_ie_bridges_c_samples_hello_classification_README>`.
 
-| Options  | Values |
-|:---                              |:---
-| Validated Models                 | [alexnet](@ref omz_models_model_alexnet)
-| Model Format                     | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)
-| Validated images                 | An uncompressed image in the NV12 color format - \*.yuv
-| Supported devices                | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization       | [C++](../../../samples/cpp/hello_nv12_input_classification/README.md) |
++-----------------------------------------+---------------------------------------------------------------------------------------+
+| Options                                 | Values                                                                                |
++=========================================+=======================================================================================+
+| Validated Models                        | :doc:`alexnet <omz_models_model_alexnet>`                                             |
++-----------------------------------------+---------------------------------------------------------------------------------------+
+| Model Format                            | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)        |
++-----------------------------------------+---------------------------------------------------------------------------------------+
+| Validated images                        | An uncompressed image in the NV12 color format - \*.yuv                               |
++-----------------------------------------+---------------------------------------------------------------------------------------+
+| Supported devices                       | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                  |
++-----------------------------------------+---------------------------------------------------------------------------------------+
+| Other language realization              | :doc:`C++ <openvino_inference_engine_samples_hello_nv12_input_classification_README>` |
++-----------------------------------------+---------------------------------------------------------------------------------------+
 
-## How It Works
+How It Works
+############
 
-Upon the start-up, the sample application reads command-line parameters, loads specified network and an
-image in the NV12 color format to an Inference Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the
-application outputs data to the standard output stream.
+Upon the start-up, the sample application reads command-line parameters, loads specified network and an image in the NV12 color format to an Inference Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream.
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in Inference Engine Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in Inference Engine Samples guide.
 
-## Running
+Running
+#######
 
 To run the sample, you need specify a model and image:
 
-- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
-
-The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to
-convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such
-as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an
-uncompressed NV12 image using FFmpeg:
-
-```sh
-ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv
-```
-
-> **NOTES**:
->
-> - Because the sample reads raw image files, you should provide a correct image size along with the
->   image path. The sample expects the logical size of the image, not the buffer size. For example,
->   for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the
->   buffer size is 640x720.
-> - By default, this sample expects that network input has BGR channels order. If you trained your
->   model to work with RGB order, you need to reconvert your model using the Model Optimizer tool
->   with `--reverse_input_channels` argument specified. For more information about the argument,
->   refer to **When to Reverse Input Channels** section of
->   [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
-> - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
-
-### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
-   ```
-   python <path_to_omz_tools>/downloader.py --name alexnet
-   ```
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
+
+The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an uncompressed NV12 image using FFmpeg:
+
+.. code-block:: bash
+   
+   ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv
+
+.. note::
+  
+   - Because the sample reads raw image files, you should provide a correct image size along with the image path. The sample expects the logical size of the image, not the buffer size. For example, for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the buffer size is 640x720.
+   - By default, this sample expects that network input has BGR channels order. If you trained your model to work with RGB order, you need to reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+Example
++++++++
+
+1. Download a pre-trained model using :doc:`Model Downloader <omz_tools_downloader>`:
+   
+   .. code-block:: console
+      
+      python <path_to_omz_tools>/downloader.py --name alexnet
 
 2. If a model is not in the Inference Engine IR or ONNX format, it must be converted. You can do this using the model converter script:
-   ```
-   python <path_to_omz_tools>/converter.py --name alexnet
-   ```
+   
+   .. code-block:: console
+
+      python <path_to_omz_tools>/converter.py --name alexnet
 
 3. Perform inference of NV12 image using `alexnet` model on a `CPU`, for example:
-   ```
-   <path_to_sample>/hello_nv12_input_classification_c <path_to_model>/alexnet.xml <path_to_image>/cat.yuv 300x300 CPU
-   ```
+   
+   .. code-block:: console
+      
+      <path_to_sample>/hello_nv12_input_classification_c <path_to_model>/alexnet.xml <path_to_image>/cat.yuv 300x300 CPU
 
-## Sample Output
+Sample Output
+#############
 
 The application outputs top-10 inference results.
 
-```
-Top 10 results:
-
-Image ./cat.yuv
-
-classid probability
-------- -----------
-435       0.091733
-876       0.081725
-999       0.069305
-587       0.043726
-666       0.038957
-419       0.032892
-285       0.030309
-700       0.029941
-696       0.021628
-855       0.020339
-
-This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ into Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-- [C API Reference](https://docs.openvino.ai/latest/api/api_reference.html)
\ No newline at end of file
+.. code-block:: console
+   
+   Top 10 results:
+   
+   Image ./cat.yuv
+   
+   classid probability
+   ------- -----------
+   435       0.091733
+   876       0.081725
+   999       0.069305
+   587       0.043726
+   666       0.038957
+   419       0.032892
+   285       0.030309
+   700       0.029941
+   696       0.021628
+   855       0.020339
+   
+   This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ into Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+- `C API Reference <https://docs.openvino.ai/latest/api/api_reference.html>`__
+
+@endsphinxdirective
+
diff --git a/samples/cpp/hello_nv12_input_classification/README.md b/samples/cpp/hello_nv12_input_classification/README.md
index 18ddc5474333a9..5c3c89b168da3c 100644
--- a/samples/cpp/hello_nv12_input_classification/README.md
+++ b/samples/cpp/hello_nv12_input_classification/README.md
@@ -1,132 +1,153 @@
 # Hello NV12 Input Classification C++ Sample {#openvino_inference_engine_samples_hello_nv12_input_classification_README}
 
+@sphinxdirective
+
 This sample demonstrates how to execute an inference of image classification models with images in NV12 color format using Synchronous Inference Request API.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Node Operations | `ov::Output::get_any_name` | Get a layer name |
-| Infer Request Operations | `ov::InferRequest::set_tensor`, `ov::InferRequest::get_tensor` | Operate with tensors |
-| Preprocessing | `ov::preprocess::InputTensorInfo::set_color_format`, `ov::preprocess::PreProcessSteps::convert_element_type`, `ov::preprocess::PreProcessSteps::convert_color` | Change the color format of the input data |
++-------------------------------------+-------------------------------------------------------------+-------------------------------------------+
+| Feature                             | API                                                         | Description                               |
++=====================================+=============================================================+===========================================+
+| Node Operations                     | ``ov::Output::get_any_name``                                | Get a layer name                          |
++-------------------------------------+-------------------------------------------------------------+-------------------------------------------+
+| Infer Request Operations            | ``ov::InferRequest::set_tensor``,                           | Operate with tensors                      |
+|                                     | ``ov::InferRequest::get_tensor``                            |                                           |
++-------------------------------------+-------------------------------------------------------------+-------------------------------------------+
+| Preprocessing                       | ``ov::preprocess::InputTensorInfo::set_color_format``,      | Change the color format of the input data |
+|                                     | ``ov::preprocess::PreProcessSteps::convert_element_type``,  |                                           |
+|                                     | ``ov::preprocess::PreProcessSteps::convert_color``          |                                           |
++-------------------------------------+-------------------------------------------------------------+-------------------------------------------+
+
 
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
 
-| Options | Values |
-| :--- |:--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Validated images | An uncompressed image in the NV12 color format - \*.yuv |
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C](../../../samples/c/hello_nv12_input_classification/README.md) |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
+| Options                             | Values                                                                                           |
++=====================================+==================================================================================================+
+| Validated Models                    | :doc:`alexnet <omz_models_model_alexnet>`                                                        |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
+| Model Format                        | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)                  |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
+| Validated images                    | An uncompressed image in the NV12 color format - \*.yuv                                          |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
+| Supported devices                   | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                             |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
+| Other language realization          | :doc:`C <openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README>` |
++-------------------------------------+--------------------------------------------------------------------------------------------------+
 
-## How It Works
+How It Works
+############
 
-At startup, the sample application reads command line parameters, loads the specified model and an
-image in the NV12 color format to an OpenVINO™ Runtime plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream. You can place labels in .labels file near the model to get pretty output.
+At startup, the sample application reads command line parameters, loads the specified model and an image in the NV12 color format to an OpenVINO™ Runtime plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream. You can place labels in .labels file near the model to get pretty output.
 
-You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
-```
-hello_nv12_input_classification <path_to_model> <path_to_image> <image_size> <device_name>
-```
+.. code-block:: console
+   
+   hello_nv12_input_classification <path_to_model> <path_to_image> <image_size> <device_name>
 
 To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
-
-The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to
-convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such
-as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an
-uncompressed NV12 image using FFmpeg:
-
-```sh
-ffmpeg -i cat.jpg -pix_fmt nv12 car.yuv
-```
-
-> **NOTES**:
->
-> - Because the sample reads raw image files, you should provide a correct image size along with the
->   image path. The sample expects the logical size of the image, not the buffer size. For example,
->   for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the
->   buffer size is 640x720.
-> - By default, this sample expects that model input has BGR channels order. If you trained your
->   model to work with RGB order, you need to reconvert your model using the Model Optimizer tool
->   with `--reverse_input_channels` argument specified. For more information about the argument,
->   refer to **When to Reverse Input Channels** section of
->   [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
-
-### Example
+
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
+
+The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an uncompressed NV12 image using FFmpeg:
+
+.. code-block:: bash
+   
+   ffmpeg -i cat.jpg -pix_fmt nv12 car.yuv
+
+
+.. note::
+  
+   - Because the sample reads raw image files, you should provide a correct image size along with the image path. The sample expects the logical size of the image, not the buffer size. For example, for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the buffer size is 640x720.
+   - By default, this sample expects that model input has BGR channels order. If you trained your model to work with RGB order, you need to reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+Example
++++++++
 
 1. Install openvino-dev python package if you don't have it to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+   
+   .. code-block:: console
+      
+      python -m pip install openvino-dev[caffe]
 
 2. Download a pre-trained model:
-   ```
-   omz_downloader --name alexnet
-   ```
+
+   .. code-block:: console
+      
+      omz_downloader --name alexnet
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name alexnet
-   ```
+   
+   .. code-block:: console
+      
+      omz_converter --name alexnet
 
-4. Perform inference of NV12 image using `alexnet` model on a `CPU`, for example:
-   ```
-   hello_nv12_input_classification alexnet.xml car.yuv 300x300 CPU
-   ```
+4. Perform inference of NV12 image using ``alexnet`` model on a ``CPU``, for example:
+   
+   .. code-block:: console
+      
+      hello_nv12_input_classification alexnet.xml car.yuv 300x300 CPU
 
-## Sample Output
+
+Sample Output
+#############
 
 The application outputs top-10 inference results.
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Loading model files: \models\alexnet.xml
-[ INFO ] model name: AlexNet
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {1, 3, 227, 227}
-[ INFO ]     outputs
-[ INFO ]         output name: prob
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1000}
-
-Top 10 results:
-
-Image \images\car.yuv
-
-classid probability
-------- -----------
-656     0.6668988
-654     0.1125269
-581     0.0679280
-874     0.0340229
-436     0.0257744
-817     0.0169367
-675     0.0110199
-511     0.0106134
-569     0.0083373
-717     0.0061734
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: console
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Loading model files: \models\alexnet.xml
+   [ INFO ] model name: AlexNet
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {1, 3, 227, 227}
+   [ INFO ]     outputs
+   [ INFO ]         output name: prob
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1000}
+   
+   Top 10 results:
+   
+   Image \images\car.yuv
+   
+   classid probability
+   ------- -----------
+   656     0.6668988
+   654     0.1125269
+   581     0.0679280
+   874     0.0340229
+   436     0.0257744
+   817     0.0169367
+   675     0.0110199
+   511     0.0106134
+   569     0.0083373
+   717     0.0061734
+
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From 1b9bd6176741b243ba5392a3da524dae40fdfee5 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 31 Mar 2023 11:24:32 +0400
Subject: [PATCH 188/296] Added constructor from string for element Type
 (#16643)

* Added constructor from string for element Type

* Fixed code style

* Removed WA for tests
---
 .../openvino/core/type/element_type.hpp       |  1 +
 src/core/src/type/element_type.cpp            | 44 +++++++++++++++++
 src/core/tests/element_type.cpp               | 47 +++++++++++++++++++
 src/inference/dev_api/ie_ngraph_utils.hpp     | 38 +--------------
 tests/lib/src/common_utils.cpp                | 31 ------------
 tests/lib/src/common_utils.h                  |  6 ---
 .../src/timetests/timetest_infer_api_2.cpp    |  4 +-
 7 files changed, 95 insertions(+), 76 deletions(-)

diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp
index 649589a8c5dff7..eaef94b3e402dc 100644
--- a/src/core/include/openvino/core/type/element_type.hpp
+++ b/src/core/include/openvino/core/type/element_type.hpp
@@ -61,6 +61,7 @@ class OPENVINO_API Type {
     Type(const Type&) = default;
     constexpr Type(const Type_t t) : m_type{t} {}
     Type(size_t bitwidth, bool is_real, bool is_signed, bool is_quantized, const std::string& cname);
+    explicit Type(const std::string& type);
     Type& operator=(const Type&) = default;
     std::string c_type_string() const;
     size_t size() const;
diff --git a/src/core/src/type/element_type.cpp b/src/core/src/type/element_type.cpp
index 9dbccd1095d408..3a04c9cc1771d6 100644
--- a/src/core/src/type/element_type.cpp
+++ b/src/core/src/type/element_type.cpp
@@ -74,6 +74,48 @@ inline TypeInfo get_type_info(ov::element::Type_t type) {
         OPENVINO_THROW("ov::element::Type_t not supported: ", type);
     }
 };
+
+ov::element::Type type_from_string(const std::string& type) {
+    if (type == "f16" || type == "FP16") {
+        return ::ov::element::Type(::ov::element::Type_t::f16);
+    } else if (type == "f32" || type == "FP32") {
+        return ::ov::element::Type(::ov::element::Type_t::f32);
+    } else if (type == "bf16" || type == "BF16") {
+        return ::ov::element::Type(::ov::element::Type_t::bf16);
+    } else if (type == "f64" || type == "FP64") {
+        return ::ov::element::Type(::ov::element::Type_t::f64);
+    } else if (type == "i4" || type == "I4") {
+        return ::ov::element::Type(::ov::element::Type_t::i4);
+    } else if (type == "i8" || type == "I8") {
+        return ::ov::element::Type(::ov::element::Type_t::i8);
+    } else if (type == "i16" || type == "I16") {
+        return ::ov::element::Type(::ov::element::Type_t::i16);
+    } else if (type == "i32" || type == "I32") {
+        return ::ov::element::Type(::ov::element::Type_t::i32);
+    } else if (type == "i64" || type == "I64") {
+        return ::ov::element::Type(::ov::element::Type_t::i64);
+    } else if (type == "u1" || type == "U1" || type == "BIN" || type == "bin") {
+        return ::ov::element::Type(::ov::element::Type_t::u1);
+    } else if (type == "u4" || type == "U4") {
+        return ::ov::element::Type(::ov::element::Type_t::u4);
+    } else if (type == "u8" || type == "U8") {
+        return ::ov::element::Type(::ov::element::Type_t::u8);
+    } else if (type == "u16" || type == "U16") {
+        return ::ov::element::Type(::ov::element::Type_t::u16);
+    } else if (type == "u32" || type == "U32") {
+        return ::ov::element::Type(::ov::element::Type_t::u32);
+    } else if (type == "u64" || type == "U64") {
+        return ::ov::element::Type(::ov::element::Type_t::u64);
+    } else if (type == "boolean" || type == "BOOL") {
+        return ::ov::element::Type(::ov::element::Type_t::boolean);
+    } else if (type == "undefined" || type == "UNSPECIFIED") {
+        return ::ov::element::Type(::ov::element::Type_t::undefined);
+    } else if (type == "dynamic") {
+        return ::ov::element::Type(::ov::element::Type_t::dynamic);
+    } else {
+        OPENVINO_THROW("Incorrect type: ", type);
+    }
+}
 }  // namespace
 
 std::vector<const ov::element::Type*> ov::element::Type::get_known_types() {
@@ -133,6 +175,8 @@ ov::element::Type::Type(size_t bitwidth,
     }
 }
 
+ov::element::Type::Type(const std::string& type) : Type(type_from_string(type)) {}
+
 std::string ov::element::Type::c_type_string() const {
     return get_type_info(m_type).m_cname;
 }
diff --git a/src/core/tests/element_type.cpp b/src/core/tests/element_type.cpp
index 4c3ec1941f1022..c19d17ea301dd1 100644
--- a/src/core/tests/element_type.cpp
+++ b/src/core/tests/element_type.cpp
@@ -7,6 +7,7 @@
 #include <map>
 
 #include "gtest/gtest.h"
+#include "openvino/core/except.hpp"
 
 using namespace ngraph;
 
@@ -25,6 +26,52 @@ TEST(element_type, from) {
     EXPECT_EQ(element::from<uint64_t>(), element::u64);
 }
 
+TEST(element_type, from_string) {
+    EXPECT_EQ(element::Type("boolean"), element::boolean);
+    EXPECT_EQ(element::Type("BOOL"), element::boolean);
+
+    EXPECT_EQ(element::Type("bf16"), element::bf16);
+    EXPECT_EQ(element::Type("BF16"), element::bf16);
+    EXPECT_EQ(element::Type("f16"), element::f16);
+    EXPECT_EQ(element::Type("FP16"), element::f16);
+    EXPECT_EQ(element::Type("f32"), element::f32);
+    EXPECT_EQ(element::Type("FP32"), element::f32);
+    EXPECT_EQ(element::Type("f64"), element::f64);
+    EXPECT_EQ(element::Type("FP64"), element::f64);
+
+    EXPECT_EQ(element::Type("i4"), element::i4);
+    EXPECT_EQ(element::Type("I4"), element::i4);
+    EXPECT_EQ(element::Type("i8"), element::i8);
+    EXPECT_EQ(element::Type("I8"), element::i8);
+    EXPECT_EQ(element::Type("i16"), element::i16);
+    EXPECT_EQ(element::Type("I16"), element::i16);
+    EXPECT_EQ(element::Type("i32"), element::i32);
+    EXPECT_EQ(element::Type("I32"), element::i32);
+    EXPECT_EQ(element::Type("i64"), element::i64);
+    EXPECT_EQ(element::Type("I64"), element::i64);
+
+    EXPECT_EQ(element::Type("bin"), element::u1);
+    EXPECT_EQ(element::Type("BIN"), element::u1);
+    EXPECT_EQ(element::Type("u1"), element::u1);
+    EXPECT_EQ(element::Type("U1"), element::u1);
+    EXPECT_EQ(element::Type("u4"), element::u4);
+    EXPECT_EQ(element::Type("U4"), element::u4);
+    EXPECT_EQ(element::Type("u8"), element::u8);
+    EXPECT_EQ(element::Type("U8"), element::u8);
+    EXPECT_EQ(element::Type("u16"), element::u16);
+    EXPECT_EQ(element::Type("U16"), element::u16);
+    EXPECT_EQ(element::Type("u32"), element::u32);
+    EXPECT_EQ(element::Type("U32"), element::u32);
+    EXPECT_EQ(element::Type("u64"), element::u64);
+    EXPECT_EQ(element::Type("U64"), element::u64);
+
+    EXPECT_EQ(element::Type("undefined"), element::undefined);
+    EXPECT_EQ(element::Type("UNSPECIFIED"), element::undefined);
+    EXPECT_EQ(element::Type("dynamic"), element::dynamic);
+
+    EXPECT_THROW(element::Type("some_string"), ov::Exception);
+}
+
 TEST(element_type, mapable) {
     std::map<element::Type, std::string> test_map;
 
diff --git a/src/inference/dev_api/ie_ngraph_utils.hpp b/src/inference/dev_api/ie_ngraph_utils.hpp
index f5204dbc4454ca..e3f4542c7035ec 100644
--- a/src/inference/dev_api/ie_ngraph_utils.hpp
+++ b/src/inference/dev_api/ie_ngraph_utils.hpp
@@ -62,43 +62,7 @@ inline ::ngraph::element::Type convertPrecision(const Precision& precision) {
 }
 
 inline ::ngraph::element::Type convertPrecision(const std::string& precision) {
-    if (precision == "f16" || precision == "FP16") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::f16);
-    } else if (precision == "f32" || precision == "FP32") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::f32);
-    } else if (precision == "bf16" || precision == "BF16") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::bf16);
-    } else if (precision == "f64" || precision == "FP64") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::f64);
-    } else if (precision == "i4" || precision == "I4") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::i4);
-    } else if (precision == "i8" || precision == "I8") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::i8);
-    } else if (precision == "i16" || precision == "I16") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::i16);
-    } else if (precision == "i32" || precision == "I32") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::i32);
-    } else if (precision == "i64" || precision == "I64") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::i64);
-    } else if (precision == "u1" || precision == "U1" || precision == "BIN" || precision == "bin") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u1);
-    } else if (precision == "u4" || precision == "U4") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u4);
-    } else if (precision == "u8" || precision == "U8") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u8);
-    } else if (precision == "u16" || precision == "U16") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u16);
-    } else if (precision == "u32" || precision == "U32") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u32);
-    } else if (precision == "u64" || precision == "U64") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::u64);
-    } else if (precision == "boolean" || precision == "BOOL") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::boolean);
-    } else if (precision == "undefined") {
-        return ::ngraph::element::Type(::ngraph::element::Type_t::undefined);
-    } else {
-        IE_THROW() << "Incorrect precision: " << precision;
-    }
+    return ::ov::element::Type(precision);
 }
 
 inline Precision convertPrecision(const ::ngraph::element::Type& precision) {
diff --git a/tests/lib/src/common_utils.cpp b/tests/lib/src/common_utils.cpp
index 00350511093615..ce5b68586091e6 100644
--- a/tests/lib/src/common_utils.cpp
+++ b/tests/lib/src/common_utils.cpp
@@ -65,34 +65,3 @@ void fillBlobs(InferenceEngine::InferRequest inferRequest,
         }
     }
 }
-
-/**
- * @brief Get input/output precision
- */
-ov::element::Type getType(const std::string &value) {
-    static const std::unordered_map<std::string, ov::element::Type> supported_precisions = {
-        {"FP32", ov::element::f32},
-        {"FP16", ov::element::f16},
-        {"BF16", ov::element::bf16},
-        {"U64", ov::element::u64},
-        {"I64", ov::element::i64},
-        {"U32", ov::element::u32},
-        {"I32", ov::element::i32},
-        {"U16", ov::element::u16},
-        {"I16", ov::element::i16},
-        {"U8", ov::element::u8},
-        {"I8", ov::element::i8},
-        {"BOOL", ov::element::boolean},
-    };
-
-    std::string val = value;
-    std::transform(val.begin(), val.end(), val.begin(), ::toupper);
-
-    const auto precision = supported_precisions.find(value);
-    if (precision == supported_precisions.end())
-    {
-        throw std::logic_error("\"" + val + "\"" + " is not a valid precision");
-    }
-
-    return precision->second;
-}
diff --git a/tests/lib/src/common_utils.h b/tests/lib/src/common_utils.h
index 445ca8cc7d1176..ac15570f38ce2e 100644
--- a/tests/lib/src/common_utils.h
+++ b/tests/lib/src/common_utils.h
@@ -176,9 +176,3 @@ void fillTensors(ov::InferRequest &infer_request, std::vector<T> &inputs) {
 void fillBlobs(InferenceEngine::InferRequest inferRequest,
                const InferenceEngine::ConstInputsDataMap &inputsInfo,
                const size_t &batchSize);
-
-
-/**
- * @brief Get input/output precision (OV API 2)
- */
-ov::element::Type getType(const std::string &value);
diff --git a/tests/time_tests/src/timetests/timetest_infer_api_2.cpp b/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
index 4350df01093bad..ee63b35365a363 100644
--- a/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_api_2.cpp
@@ -82,13 +82,13 @@ int runPipeline(const std::string &model, const std::string &device, const bool
                                 if (ip) {
                                     const auto inputs = cnnNetwork->inputs();
                                     for (size_t i = 0; i < inputs.size(); i++) {
-                                        preprocessor.input(i).tensor().set_element_type(getType(inputPrecision));
+                                        preprocessor.input(i).tensor().set_element_type(ov::element::Type(inputPrecision));
                                     }
                                 }
                                 if (op) {
                                     const auto outputs = cnnNetwork->outputs();
                                     for (size_t i = 0; i < outputs.size(); i++) {
-                                        preprocessor.output(i).tensor().set_element_type(getType(outputPrecision));
+                                        preprocessor.output(i).tensor().set_element_type(ov::element::Type(outputPrecision));
                                     }
                                 }
                                 cnnNetwork = preprocessor.build();

From 43fca3d2312e9ad29a60bcd2d3c2a7d5131db578 Mon Sep 17 00:00:00 2001
From: Anton Voronov <anton.voronov@intel.com>
Date: Fri, 31 Mar 2023 11:28:54 +0400
Subject: [PATCH 189/296] [CPU] Introduced shape agnostic eltwise (#15976)

---
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   |  307 +++-
 src/plugins/intel_cpu/src/nodes/eltwise.h     |   26 +-
 .../subgraph_tests/src/eltwise_caching.cpp    | 1332 +++++++++++++++++
 3 files changed, 1583 insertions(+), 82 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_caching.cpp

diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index ed6db22a6455c7..245fe2c92d0eec 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -354,30 +354,54 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
         const int offset_count = jep.input_size - 1;
 
         // ptrs initializing
-        auto init_ptrs_with_offsets = [this, offset_count](Reg64 pointer, const std::vector<size_t>& offsets) {
-            for (int j = 0; j < offset_count; j++) {
-                if (jep_.dims[j] != 1 && offsets[j] != 0) {
-                    mov(reg_tmp_64, offsets[j]);
+        if (jep.use_runtime_ptrs) {
+            for (int i = 0; i < jep.inputs_number; i++) {
+                mov(start_to_offsets, ptr[reg_const_params + GET_OFF(src_offsets) + i * sizeof(size_t)]);
+                mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]);
+                for (int j = 0; j < offset_count; j++) {
+                    mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]);
                     imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]);
-                    add(pointer, reg_tmp_64);
+                    add(get_src_reg(i), reg_tmp_64);
                 }
             }
-        };
 
-        for (int i = 0; i < jep.inputs_number; i++) {
-            mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]);
-            init_ptrs_with_offsets(get_src_reg(i), jep.src_offsets[i]);
-        }
+            mov(start_to_offsets, ptr[reg_const_params + GET_OFF(dst_offsets)]);
+            mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]);
+            for (int j = 0; j < offset_count; j++) {
+                mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]);
+                imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]);
+                add(reg_dst, reg_tmp_64);
+            }
 
-        mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]);
-        init_ptrs_with_offsets(reg_dst, jep.dst_offsets);
+            xor_(reg_oc_off, reg_oc_off);
 
-        mov(reg_post_op_ptrs, ptr[reg_const_params + GET_OFF(post_op_data)]);
+            mov(reg_work_amount, ptr[reg_const_params + GET_OFF(work_amount)]);
+        } else {
+            auto init_ptrs_with_offsets = [this, offset_count](Reg64 pointer, const std::vector<size_t>& offsets) {
+                for (int j = 0; j < offset_count; j++) {
+                    if (jep_.dims[j] != 1 && offsets[j] != 0) {
+                        mov(reg_tmp_64, offsets[j]);
+                        imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]);
+                        add(pointer, reg_tmp_64);
+                    }
+                }
+            };
 
-        xor_(reg_oc_off, reg_oc_off);
-        init_ptrs_with_offsets(reg_oc_off, jep.oc_offsets);
+            for (int i = 0; i < jep.inputs_number; i++) {
+                mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]);
+                init_ptrs_with_offsets(get_src_reg(i), jep.src_offsets[i]);
+            }
+
+            mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]);
+            init_ptrs_with_offsets(reg_dst, jep.dst_offsets);
+
+            xor_(reg_oc_off, reg_oc_off);
+            init_ptrs_with_offsets(reg_oc_off, jep.oc_offsets);
 
-        mov(reg_work_amount, jep.work_amount);
+            mov(reg_work_amount, jep.work_amount);
+        }
+
+        mov(reg_post_op_ptrs, ptr[reg_const_params + GET_OFF(post_op_data)]);
 
         Xbyak::Label unroll_loop_label;
         Xbyak::Label unroll_loop_end_label;
@@ -565,6 +589,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
     }
 
     Reg64 reg_post_op_ptrs = rax;
+    Reg64 start_to_offsets = reg_post_op_ptrs; // rax
     Reg64 reg_dst = rbx;
     Reg64 reg_work_amount = rdx;
 
@@ -1186,7 +1211,7 @@ struct EltwiseKey {
     InferenceEngine::Precision outPrc;
     dnnl::post_ops postOps;
     bool useDynBatch;
-    bool useJit;
+    EltwiseImplType implType;
 
     size_t hash() const {
         using namespace dnnl::impl;
@@ -1204,10 +1229,17 @@ struct EltwiseKey {
             seed = hash_combine_eltwiseData(seed, item);
         });
         seed = get_vector_hash(seed, ops_list);
-        seed = get_vector_hash(seed, outBlkDims);
-        seed = get_vector_hash(seed, outOrder);
-        for (auto&& item : inpDims) {
-            seed = get_vector_hash(seed, item);
+        if (implType == EltwiseImplType::optimizedShapeAgnostic) {
+            seed = hash_combine(seed, outBlkDims.back() == 1);
+            for (auto&& item : inpDims) {
+                seed = hash_combine(seed, item.back() == 1);
+            }
+        } else {
+            seed = get_vector_hash(seed, outOrder);
+            seed = get_vector_hash(seed, outBlkDims);
+            for (auto&& item : inpDims) {
+                seed = get_vector_hash(seed, item);
+            }
         }
         std::for_each(inpPrc.begin(), inpPrc.end(), [&](const Precision& item) {
             seed = hash_combine(seed, item.getPrecVal());
@@ -1215,7 +1247,7 @@ struct EltwiseKey {
         seed = hash_combine(seed, outPrc.getPrecVal());
         seed = get_post_op_hash(seed, *postOps.get());
         seed = hash_combine(seed, useDynBatch);
-        seed = hash_combine(seed, useJit);
+        seed = hash_combine(seed, implType);
         return seed;
     }
 
@@ -1226,17 +1258,30 @@ struct EltwiseKey {
 
         bool result = eltwise_data == rhs.eltwise_data &&
                       ops_list == rhs.ops_list &&
-                      outBlkDims == rhs.outBlkDims &&
-                      outOrder == rhs.outOrder &&
                       inpPrc == rhs.inpPrc &&
                       outPrc == rhs.outPrc &&
                       *postOps.get() == *rhs.postOps.get() &&
                       useDynBatch == rhs.useDynBatch &&
-                      useJit == rhs.useJit;
-
-        for (size_t i = 0; i < inpDims.size() && result; ++i) {
-            result = result && (inpDims[i] == rhs.inpDims[i]);
+                      implType == rhs.implType;
+
+        if (result) {
+            if (implType == EltwiseImplType::optimizedShapeAgnostic) {
+                bool broadcast, rhsBroadcast;
+                for (size_t i = 0; i < inpDims.size(); ++i) {
+                    broadcast = (inpDims[i].back() == 1);
+                    rhsBroadcast = (rhs.inpDims[i].back() == 1);
+                    if (broadcast != rhsBroadcast)
+                        return false;
+                }
+            } else {
+                result = result && outOrder == rhs.outOrder &&
+                         outBlkDims == rhs.outBlkDims;
+                for (size_t i = 0; i < inpDims.size() && result; ++i) {
+                    result = result && (inpDims[i] == rhs.inpDims[i]);
+                }
+            }
         }
+
         return result;
     }
 };
@@ -1267,7 +1312,8 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
                        const std::vector<InferenceEngine::Precision>& inpPrc,
                        const InferenceEngine::Precision& outPrc,
                        const dnnl::post_ops& post_ops,
-                       bool useDynBatch) {
+                       bool useDynBatch,
+                       bool useRuntimePtrs) {
         auto collapseLastDims = [](std::vector<size_t>& dims, int dimsToCollapse) {
             for (int i = dims.size() - 2; i > dims.size() - dimsToCollapse - 2; i--) {
                 dims[dims.size() - 1] *= dims[i];
@@ -1314,6 +1360,8 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
         jit_eltwise_params jep = {};
         size_t inputsNumber = inpDims.size();
 
+        jep.use_runtime_ptrs = useRuntimePtrs;
+
         jep.input_size = inpDims.front().size();
 
         jep.dims.resize(jep.input_size, 1);
@@ -1335,7 +1383,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
         }
 
         if (outBlkDims.size() != outOrder.size()) {
-            IE_THROW() << "Can not make Elwtise executor due to out blocked dims and out order vectors size mismatch.";
+            IE_THROW() << "Can not make Eltwise executor due to out blocked dims and out order vectors size mismatch.";
         }
 
         int lastUnchangedAxis = 0;
@@ -1370,7 +1418,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
         int collapsedDims = 0;
 
         bool hasDifferentDims = false;
-        while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount &&
+        while (!useRuntimePtrs && currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount &&
                // we shouldn't collapse batch dimension in case dynamic batch is enabled
                (!useDynBatch || (outBlkDims.size() - collapsedDims > 2))) {
             if (collapsedDims >= maxCollapsedDims)
@@ -1418,25 +1466,27 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
             }
         }
 
-        _batchDimIdx = jep.input_size - outBlkDims.size() + collapsedDims;
-        _schedulerWorkAmount = fullWorkAmount / jep.dims[jep.dims.size() - 1];
-
         if (inpPrc.size() != inputsNumber) {
-            IE_THROW() << "Can not make Elwtise executor. Wrong input precisions vector size.";
+            IE_THROW() << "Can not make Eltwise executor. Wrong input precisions vector size.";
         }
 
-        // init offset
-        jep.dst_offsets.resize(jep.input_size, 1);
-        offset_out_calc(jep.dst_offsets, jep.dims);
-        for (int j = 0; j < jep.input_size; j++) {
-            jep.dst_offsets[j] *= outPrc.size();
-        }
+        if (!useRuntimePtrs) {
+            _batchDimIdx = jep.input_size - outBlkDims.size() + collapsedDims;
+            _schedulerWorkAmount = fullWorkAmount / jep.dims[jep.dims.size() - 1];
 
-        for (int i = 0; i < inputsNumber; i++) {
-            jep.src_offsets[i].resize(jep.input_size, 1);
-            offset_in_calc(jep.src_offsets[i], inpDims[i], jep.dims);
+            // init offset
+            jep.dst_offsets.resize(jep.input_size, 1);
+            offset_out_calc(jep.dst_offsets, jep.dims);
             for (int j = 0; j < jep.input_size; j++) {
-                jep.src_offsets[i][j] *= inpPrc[i].size();
+                jep.dst_offsets[j] *= outPrc.size();
+            }
+
+            for (int i = 0; i < inputsNumber; i++) {
+                jep.src_offsets[i].resize(jep.input_size, 1);
+                offset_in_calc(jep.src_offsets[i], inpDims[i], jep.dims);
+                for (int j = 0; j < jep.input_size; j++) {
+                    jep.src_offsets[i][j] *= inpPrc[i].size();
+                }
             }
         }
 
@@ -1486,6 +1536,13 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor {
                            });
         } else {
             // execute Optimized Generic
+            if (_pKernel->jep_.use_runtime_ptrs) {
+                // recalculate _schedulerWorkAmount
+                _schedulerWorkAmount = 1;
+                for (size_t i = 0; i < dims_out.size() - 1; i++) {
+                    _schedulerWorkAmount *= dims_out[i];
+                }
+            }
             parallel_nt(0, [&](const int ithr, const int nthr) {
                 size_t start = 0, end = 0;
                 splitter(_schedulerWorkAmount, nthr, ithr, start, end);
@@ -1538,7 +1595,7 @@ class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor {
         }
 
         if (outBlkDims.empty()) {
-            IE_THROW() << "Can not make Elwtise executor from empty output blocked dims vector";
+            IE_THROW() << "Can not make Eltwise executor from empty output blocked dims vector";
         }
 
         _inputNum = inpDims.size();
@@ -1699,7 +1756,7 @@ bool Eltwise::EltwiseData::operator==(const EltwiseData &rhs) const noexcept {
 
 static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) {
     Eltwise::executorPtr execPtr;
-    if (key.useJit) {
+    if (key.implType != EltwiseImplType::reference) {
         execPtr = std::make_shared<EltwiseJitExecutor>(key.eltwise_data,
                                                        key.ops_list,
                                                        key.outBlkDims,
@@ -1708,7 +1765,8 @@ static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) {
                                                        key.inpPrc,
                                                        key.outPrc,
                                                        key.postOps,
-                                                       key.useDynBatch);
+                                                       key.useDynBatch,
+                                                       key.implType == EltwiseImplType::optimizedShapeAgnostic);
     } else {
         execPtr = std::make_shared<EltwiseRefExecutor>(key.eltwise_data.front(),
                                                        key.outBlkDims,
@@ -1840,7 +1898,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
         return;
 
     // if dim rank is greater than the maximum possible, we should use the reference execution
-    canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
+    bool canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
+    bool canUseOptimizedShapeAgnosticImpl = isDynamicNode() && canUseOptimizedImpl;
 
     if (!canUseOptimizedImpl && !fusedWith.empty()) {
         IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' uses reference impl, but unexpectedly fused with other ops";
@@ -1873,7 +1932,12 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
                     inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i));
             }
         }
+        if (fusedNode->getType() == Type::FakeQuantize) {
+            canUseOptimizedShapeAgnosticImpl = false;
+        }
     }
+    implType = canUseOptimizedShapeAgnosticImpl ? EltwiseImplType::optimizedShapeAgnostic :
+            canUseOptimizedImpl ? EltwiseImplType::optimized : EltwiseImplType::reference;
 
     if (inputPrecisions.size() != getParentEdges().size())
         IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration.";
@@ -1894,7 +1958,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
     }
 
     auto filterPrecision = [&](Precision& prc) {
-        if (!canUseOptimizedImpl) {
+        if (implType == EltwiseImplType::reference) {
             return Precision(Precision::FP32);
         } else if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) {
             if (prc == Precision::U32 || prc == Precision::I64 || prc == Precision::U64) {
@@ -2051,17 +2115,35 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
     currentInBlkDims.resize(inputNum);
 }
 
-void Eltwise::prepareParams() {
+void Eltwise::createPrimitive() {
     if (memPtrs.empty()) {
         for (auto i = 0; i < inputNum; i++)
             memPtrs.push_back(getParentEdgeAt(i)->getMemoryPtr());
         memPtrs.push_back(getChildEdgeAt(0)->getMemoryPtr());
     }
 
+    isDynBatchEnabled = getSelectedPrimitiveDescriptor()->getConfig().dynBatchSupport;
+
+    start_offset_in.resize(inputNum);
+    for (size_t i = 0; i < inputNum; i++) {
+        const auto desc = getParentEdgeAt(i)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+        start_offset_in[i] = desc->getOffsetPadding() * desc->getPrecision().size();
+    }
+    const auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    start_offset_out = desc->getOffsetPadding() * desc->getPrecision().size();
+
+    for (size_t i = 0; i < inputNum; ++i) {
+        inpPrc.push_back(getParentEdgeAt(i)->getMemory().getDesc().getPrecision());
+    }
+
+    outPrc = getChildEdgeAt(0)->getMemory().getDesc().getPrecision();
+    Node::createPrimitive();
+}
+
+void Eltwise::prepareParams() {
     auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
     const auto &outOrder = outBlockingDesc->getOrder();
     const auto &currentOutBlkDims = outBlockingDesc->getBlockDims();
-    isDynBatchEnabled = getSelectedPrimitiveDescriptor()->getConfig().dynBatchSupport;
 
     size_t input_size = std::max(static_cast<size_t>(EltwiseJitExecutor::optimalTensorRank), currentOutBlkDims.size());
 
@@ -2094,43 +2176,97 @@ void Eltwise::prepareParams() {
         }
     }
 
-    start_offset_in.resize(inputNum);
-    for (size_t i = 0; i < inputNum; i++) {
-        const auto desc = getParentEdgeAt(i)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-        start_offset_in[i] = desc->getOffsetPadding() * desc->getPrecision().size();
+    // we can skip searching in the cache if broadcast policy for last input dims is not changed
+    // last input dim == 1 means broadcasted (also if output dim == 1)
+    // last input dim != 1 means not broadcasted
+    bool canSkipSearchInCache = false;
+    if (implType == EltwiseImplType::optimizedShapeAgnostic) {
+        if (execPtr) {
+            canSkipSearchInCache = true;
+            // check broadcast policy
+            for (int i = 0; i < inputNum; i++) {
+                if (broadcastPolicy[i] != (dims_in[i].back() == 1)) {
+                    broadcastPolicy[i] = (dims_in[i].back() == 1);
+                    canSkipSearchInCache = false;
+                }
+            }
+        } else {
+            // fill broadcast policy
+            broadcastPolicy.resize(inputNum);
+            for (int i = 0; i < inputNum; i++) {
+                broadcastPolicy[i] = (dims_in[i].back() == 1);
+            }
+        }
     }
-    const auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    start_offset_out = desc->getOffsetPadding() * desc->getPrecision().size();
 
-    std::vector<InferenceEngine::Precision> inpPrc;
-    for (size_t i = 0; i < inputNum; ++i) {
-        inpPrc.push_back(getParentEdgeAt(i)->getMemory().getDesc().getPrecision());
+    if (!canSkipSearchInCache) {
+        EltwiseData thisOp{getAlgorithm(), getOneDnnAlgorithm(), getAlpha(), getBeta(), getGamma()};
+        EltwiseKey key = {{thisOp}, {getType()}, currentOutBlkDims, outOrder, dims_in, inpPrc, outPrc, dnnl::post_ops(), isDynBatchEnabled, implType};
+        fqDataPtrs.clear();
+        for (const auto &node : fusedWith) {
+            key.ops_list.push_back(node->getType());
+            if (node->getType() == Type::Eltwise) {
+                if (auto eltwise = std::dynamic_pointer_cast<Eltwise>(node)) {
+                    key.eltwise_data.push_back({eltwise->getAlgorithm(), eltwise->getOneDnnAlgorithm(), eltwise->getAlpha(),
+                                                eltwise->getBeta(), eltwise->getGamma()});
+                }
+            } else if (node->getType() == Type::FakeQuantize) {
+                node->appendPostOps(key.postOps, {}, fqDataPtrs);
+            } else {
+                IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' has unexpected fused op of type '" << node->getTypeStr() << "'";
+            }
+        }
+
+        auto cache = context->getParamsCache();
+        auto result = cache->getOrCreate(key, buildExecutor);
+        execPtr = result.first;
     }
 
-    auto outPrc = getChildEdgeAt(0)->getMemory().getDesc().getPrecision();
+    // update execParams for shape agnostic kernel
+    if (implType == EltwiseImplType::optimizedShapeAgnostic) {
+        auto &outDims = execParams.outDims;
+        auto &inOffsets = execParams.inOffsets;
+        auto &outOffsets = execParams.outOffsets;
+
+        // outDims recalculation
+        outDims.resize(dims_in[0].size(), 1);
+        for (int i = 0; i < outRank; i++) {
+            outDims[outDims.size() - 1 - i] = currentOutBlkDims[outRank - 1 - i];
+        }
+        // offsets recalculation
+        auto offset_out_calc = [](VectorDims& offset, const VectorDims& dims) {
+            int k = 1;
+            for (int i = offset.size() - 1; i >= 0; i--) {
+                offset[i] = k;
+                k *= dims[i];
+            }
+        };
 
-    EltwiseData thisOp{getAlgorithm(), getOneDnnAlgorithm(), getAlpha(), getBeta(), getGamma()};
+        auto offset_in_calc = [](VectorDims& offset, const VectorDims& dims_in, const VectorDims& dims_out) {
+            int k = 1;
+            for (int i = offset.size() - 1; i >= 0; i--) {
+                offset[i] = (dims_in[i] == dims_out[i]) ? k : 0;
+                k *= dims_in[i];
+            }
+        };
 
-    EltwiseKey key = {{thisOp}, {getType()}, currentOutBlkDims, outOrder, dims_in, inpPrc, outPrc, dnnl::post_ops(), isDynBatchEnabled, canUseOptimizedImpl};
+        auto inputSize = dims_in.front().size();
+        outOffsets.resize(inputSize, 1);
+        offset_out_calc(outOffsets, outDims);
+        for (int j = 0; j < inputSize; j++) {
+            outOffsets[j] *= outPrc.size();
+        }
 
-    fqDataPtrs.clear();
-    for (const auto &node : fusedWith) {
-        key.ops_list.push_back(node->getType());
-        if (node->getType() == Type::Eltwise) {
-            if (auto eltwise = std::dynamic_pointer_cast<Eltwise>(node)) {
-                key.eltwise_data.push_back({eltwise->getAlgorithm(), eltwise->getOneDnnAlgorithm(), eltwise->getAlpha(),
-                                            eltwise->getBeta(), eltwise->getGamma()});
+        auto inputsNumber = dims_in.size();
+        inOffsets.resize(inputsNumber);
+        for (int i = 0; i < inputsNumber; i++) {
+            inOffsets[i].resize(inputSize, 1);
+            offset_in_calc(inOffsets[i], dims_in[i], outDims);
+            for (int j = 0; j < inputSize; j++) {
+                inOffsets[i][j] *= inpPrc[i].size();
             }
-        } else if (node->getType() == Type::FakeQuantize) {
-            node->appendPostOps(key.postOps, {}, fqDataPtrs);
-        } else {
-            IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' has unexpected fused op of type '" << node->getTypeStr() << "'";
         }
     }
-
-    auto cache = context->getParamsCache();
-    auto result = cache->getOrCreate(key, buildExecutor);
-    execPtr = result.first;
 }
 
 bool Eltwise::needPrepareParams() const {
@@ -2148,14 +2284,14 @@ void Eltwise::selectOptimalPrimitiveDescriptor() {
 void Eltwise::execute(dnnl::stream strm) {
     if (execPtr) {
         jit_eltwise_call_args_ptrs args_ptrs = {};
-        auto batchDimIdx = execPtr->getBatchDimIdx();
-        VectorDims dims_out = execPtr->getOutDims();
+        VectorDims dims_out = implType == EltwiseImplType::optimizedShapeAgnostic ? execParams.outDims : execPtr->getOutDims();
         for (int i = 0; i < memPtrs.size() - 1; i++)
             args_ptrs.src_ptr[i] = reinterpret_cast<const uint8_t*>(memPtrs[i]->GetData()) + start_offset_in[i];
         args_ptrs.dst_ptr = reinterpret_cast<uint8_t*>(memPtrs.back()->GetData()) + start_offset_out;
 
         // In general case we need to recompute offsets as well but currently all supported layout assumes batch to be outermost dimension
         if (isDynBatchEnabled) {
+            auto batchDimIdx = execPtr->getBatchDimIdx();
             if (dims_out.size() <= batchDimIdx)
                 IE_THROW() << "Can't set batch dims for eltwise node with rank: " << dims_out.size() << " and batch idx: " << batchDimIdx;
             dims_out[batchDimIdx] = static_cast<size_t>(batchToProcess());
@@ -2163,6 +2299,15 @@ void Eltwise::execute(dnnl::stream strm) {
 
         args_ptrs.post_op_data = fqDataPtrs.data();
 
+        // shape agnostic kernel: offsets and work amount initialization
+        if (implType == EltwiseImplType::optimizedShapeAgnostic) {
+            args_ptrs.work_amount = dims_out.back();
+            for (int i = 0; i < execParams.inOffsets.size(); i++) {
+                args_ptrs.src_offsets[i] = execParams.inOffsets[i].data();
+            }
+            args_ptrs.dst_offsets = execParams.outOffsets.data();
+        }
+
         execPtr->exec(args_ptrs, dims_out);
     } else {
         IE_THROW() << "Can't execute eltwise node with name: " << getName() << ". Primitive isn't created";
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h
index 074994301cf290..80f53523fead20 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.h
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.h
@@ -35,6 +35,7 @@ struct jit_eltwise_params {
     size_t oc_size;
 
     size_t work_amount;
+    bool use_runtime_ptrs;
 };
 
 struct jit_eltwise_call_args_ptrs {
@@ -42,6 +43,11 @@ struct jit_eltwise_call_args_ptrs {
     void *dst_ptr;
     //ptr to array of post op inputs pointers (flat list)
     const void** post_op_data;
+
+    // shape agnostic kernel
+    size_t work_amount;
+    const void *src_offsets[MAX_ELTWISE_INPUTS];
+    const void *dst_offsets;
 };
 
 struct jit_eltwise_call_args_indexes {
@@ -66,6 +72,12 @@ struct jit_uni_eltwise_kernel {
     jit_eltwise_params jep_;
 };
 
+enum class EltwiseImplType {
+    reference = 0,
+    optimized = 1,
+    optimizedShapeAgnostic = 2
+};
+
 class Eltwise : public Node {
 public:
     struct EltwiseData {
@@ -116,6 +128,7 @@ class Eltwise : public Node {
 
     bool needPrepareParams() const override;
     void prepareParams() override;
+    void createPrimitive() override;
 
     void executeDynamicImpl(dnnl::stream strm) override;
 
@@ -137,16 +150,27 @@ class Eltwise : public Node {
 
     dnnl::algorithm onednnAlgorithm = dnnl::algorithm::undef;
 
-    bool canUseOptimizedImpl = false;
+    EltwiseImplType implType = EltwiseImplType::reference;
+    std::vector<bool> broadcastPolicy;
     bool isDynBatchEnabled = false;
     bool specialConvolutionAddFusing = false;
     size_t inputNum = 0;
     std::vector<ptrdiff_t> start_offset_in = {};
     ptrdiff_t start_offset_out = 0;
 
+    std::vector<InferenceEngine::Precision> inpPrc;
+    InferenceEngine::Precision outPrc;
+
     // blocked dims for which kernel compiled and params prepared
     std::vector<VectorDims> currentInBlkDims = {};
 
+    // shape agnostic kernel
+    struct {
+        VectorDims outDims;
+        std::vector<VectorDims> inOffsets;
+        VectorDims outOffsets;
+    } execParams;
+
     float alpha = 0;
     float beta = 0;
     float gamma = 0;
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_caching.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_caching.cpp
new file mode 100644
index 00000000000000..ee5ce38812c95b
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_caching.cpp
@@ -0,0 +1,1332 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Motivation:
+// In a dynamic scenario, depending on the input shapes for the current node, we can either generate a new jit kernel or get an existing one from the cache.
+// But the current single layer tests do not allow checking the case when the same kernel can be used for different nodes.
+// This subgraph test contains 2 eltwise nodes and allows us to check this case.
+// This subgraph can also contain the FakeQuantize nodes, because their shapes affect the result of caching and cache lookups (post op for Eltwise).
+
+//  -----------              -----------    -----------              -----------
+//  |input 0.0|              |input 0.1|    |input 1.0|              |input 1.1|
+//  -----------              -----------    -----------              -----------
+//       |                        |              |                        |
+//  ------------------------------------    ------------------------------------
+//  |            eltwise 0             |    |            eltwise 1             |
+//  ------------------------------------    ------------------------------------
+//                   |                                       |
+//  ------------------------------------    ------------------------------------
+//  |FQ 0 (if withQuantization == true)|    |FQ 1 (if withQuantization == true)|
+//  ------------------------------------    ------------------------------------
+//                   |                                       |
+//                   |                      ------------------------------------
+//                   |                      | reshape (if needReshape == true) |
+//                   |                      ------------------------------------
+//                   |                                       |
+//  ----------------------------------------------------------------------------
+//  |                                 concat                                   |
+//  ----------------------------------------------------------------------------
+//                                       |
+//                                   --------
+//                                   |output|
+//                                   --------
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <debug.h>
+#include <shared_test_classes/base/ov_subgraph.hpp>
+#include <ngraph_functions/builders.hpp>
+#include "common_test_utils/common_utils.hpp"
+#include <common_test_utils/ov_tensor_utils.hpp>
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace CPUTestUtils;
+using ngraph::helpers::EltwiseTypes;
+using namespace ov::test;
+
+namespace CPUSubgraphTestsDefinitions {
+
+using InputShapesTuple = std::tuple<
+        std::vector<InputShape>,            // eltwise input shapes
+        std::vector<std::vector<size_t>>,   // fq input shapes
+        std::vector<int32_t>                // reshape shape
+>;
+
+typedef std::tuple<
+        InputShapesTuple,                   // eltwise and fq input shapes
+        std::vector<ElementType>,           // Input precisions
+        std::vector<EltwiseTypes>,          // Eltwise operations
+        bool,                               // With quantization
+        bool,                               // Need reshape
+        std::string,                        // Device name
+        CPUSpecificParams
+> EltwiseCacheTestParams;
+
+class EltwiseCacheTest : public testing::WithParamInterface<EltwiseCacheTestParams>,
+                         virtual public SubgraphBaseTest, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<EltwiseCacheTestParams> &obj) {
+        InputShapesTuple inputShapesTuple;
+        std::vector<ElementType> inputPrecisions;
+        std::vector<EltwiseTypes> eltwiseOpTypes;
+        bool withQuantization;
+        bool needReshape;
+        std::string targetName;
+        CPUSpecificParams cpuParams;
+        std::tie(inputShapesTuple, inputPrecisions, eltwiseOpTypes, withQuantization, needReshape, targetName,
+                cpuParams) = obj.param;
+
+        std::vector<InputShape> eltwiseInputShapes;
+        std::vector<std::vector<size_t>> fqInputShapes;
+        std::vector<int32_t> reshapeShape;
+        std::tie(eltwiseInputShapes, fqInputShapes, reshapeShape) = inputShapesTuple;
+
+        std::ostringstream results;
+
+        results << "IS=(";
+        for (const auto& shape : eltwiseInputShapes) {
+            results << CommonTestUtils::partialShape2str({shape.first}) << "_";
+        }
+        results << ")_TS=(";
+        for (const auto& shape : eltwiseInputShapes) {
+            for (const auto& item : shape.second) {
+                results << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        if (withQuantization) {
+            results << ")_FQS=(";
+            for (const auto& shape : fqInputShapes) {
+                results << CommonTestUtils::vec2str(shape) << "_";
+            }
+        }
+        if (needReshape) {
+            results << ")_RS=(";
+            results << CommonTestUtils::vec2str(reshapeShape) << "_";
+        }
+        results << ")_";
+        for (int i = 0; i < inputPrecisions.size(); i++) {
+            results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i] << "_";
+        }
+        for (int i = 0; i < eltwiseOpTypes.size(); i++) {
+            results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_";
+        }
+        results << "WithQuant=" << withQuantization << "_";
+        results << "targetDevice=" << targetName;
+
+        results << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return results.str();
+    }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 1, 1);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
+protected:
+    void SetUp() override {
+        abs_threshold = 0.1f;
+
+        InputShapesTuple inputShapesTuple;
+        std::vector<ElementType> inputPrecisions;
+        std::vector<EltwiseTypes> eltwiseOpTypes;
+        bool withQuantization;
+        bool needReshape;
+        CPUSpecificParams cpuParams;
+        std::tie(inputShapesTuple, inputPrecisions, eltwiseOpTypes, withQuantization, needReshape, targetDevice,
+                cpuParams) = this->GetParam();
+
+        std::vector<InputShape> eltwiseInputShapes;
+        std::vector<std::vector<size_t>> fqInputShapes;
+        std::vector<int32_t> reshapeShape;
+        std::tie(eltwiseInputShapes, fqInputShapes, reshapeShape) = inputShapesTuple;
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        init_input_shapes(eltwiseInputShapes);
+
+        ngraph::ParameterVector ngraphParam;
+        std::vector<std::shared_ptr<ngraph::Node>> ngraphInputs;
+
+        for (size_t i = 0; i < inputDynamicShapes.size(); i++) {
+            ngraphParam.push_back(std::make_shared<ngraph::opset1::Parameter>(inputPrecisions[i], inputDynamicShapes[i]));
+            ngraphInputs.push_back(ngraphParam.back());
+        }
+
+        auto lastNode0 = ngraph::builder::makeEltwise(ngraphParam[0], ngraphParam[1], eltwiseOpTypes[0]);
+        lastNode0->get_rt_info() = getCPUInfo();
+        auto lastNode1 = ngraph::builder::makeEltwise(ngraphParam[2], ngraphParam[3], eltwiseOpTypes[1]);
+        lastNode1->get_rt_info() = getCPUInfo();
+        if (withQuantization) {
+            lastNode0 = ngraph::builder::makeFakeQuantize(lastNode0, ::ngraph::element::Type(::ngraph::element::Type_t::f32),
+                                                          256, fqInputShapes[0]);
+            lastNode1 = ngraph::builder::makeFakeQuantize(lastNode1, ::ngraph::element::Type(::ngraph::element::Type_t::f32),
+                                                          256, fqInputShapes[1]);
+        }
+        if (needReshape) {
+            auto reshapeConstNode = ngraph::builder::makeConstant(::ngraph::element::Type(::ngraph::element::Type_t::i32),
+                                                                  {reshapeShape.size()}, reshapeShape);
+            lastNode1 = std::make_shared<ngraph::opset4::Reshape>(lastNode1, reshapeConstNode, false);
+        }
+        auto concat = ngraph::builder::makeConcat({lastNode0, lastNode1}, 0);
+        function = std::make_shared<ngraph::Function>(concat, ngraphParam, "eltwise_cache");
+    }
+};
+
+TEST_P(EltwiseCacheTest, CompareWithRefs) {
+    run();
+}
+
+namespace {
+
+std::vector<std::vector<ElementType>> inputPrecisions {
+        { ElementType::f32, ElementType::f32, ElementType::f32, ElementType::f32 }
+};
+
+std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
+        { EltwiseTypes::ADD, EltwiseTypes::ADD }
+};
+
+CPUSpecificParams cpuParams_empty = {{}, {}, {}, {}};
+
+std::vector<InputShapesTuple> inputShapes_2D_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {5, 6}, // miss
+                    {8, 6}, // no need serach in cache
+                    {5, 6}, // miss
+                    {5, 1}, // hit
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {5, 6},
+                    {8, 6},
+                    {5, 1},
+                    {5, 6},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {7, 6}, // hit
+                    {9, 6}, // no need search in cache
+                    {7, 1}, // miss
+                    {7, 6}, // hit
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {7, 6},
+                    {9, 6},
+                    {7, 6},
+                    {7, 1},
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 6},
+            {1, 6},
+        },
+        // reshape shape
+        {}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_2D_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_2D_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true),
+                                ::testing::Values(false),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_empty)),
+                        EltwiseCacheTest::getTestCaseName);
+
+// different last dim
+std::vector<InputShapesTuple> inputShapes_2D_diff_last_dim_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {5, 6} // miss
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {5, 6}
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {14, 3} // hit
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {14, 3}
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 6},
+            {1, 3},
+        },
+        // reshape shape
+        {-1, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_2D_diff_last_dim_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_2D_diff_last_dim_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_empty)),
+                        EltwiseCacheTest::getTestCaseName);
+
+// 1D + 2D combo
+std::vector<InputShapesTuple> inputShapes_1D_2D_combo_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {1, 6}, // miss
+                    {1, 6}, // miss
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1},
+                // target
+                {
+                    {1, 6},
+                    {1, 1},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1},
+                // target
+                {
+                    {6}, // hit
+                    {6}, // hit
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1},
+                // target
+                {
+                    {6},
+                    {1},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_1D_2D_combo_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_1D_2D_combo_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_empty)),
+                        EltwiseCacheTest::getTestCaseName);
+
+
+// 3D
+CPUSpecificParams cpuParams_3D_planar = {{ncw, ncw}, {ncw}, {}, {}};
+CPUSpecificParams cpuParams_3D_blocked = {{nCw16c, nCw16c}, {nCw16c}, {}, {}};
+CPUSpecificParams cpuParams_3D_nspc = {{nwc, nwc}, {nwc}, {}, {}};
+std::vector<CPUSpecificParams> cpuParams_3D_blocked_vec = {cpuParams_3D_blocked};
+
+std::vector<InputShapesTuple> inputShapes_3D_planar_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 3, 4}, // miss
+                    {2, 3, 4}, // miss
+                    {2, 3, 1}, // hit
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 3, 4},
+                    {2, 3, 1},
+                    {2, 3, 4},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 6, 8}, // hit
+                    {2, 6, 1}, // miss
+                    {2, 6, 1}, // no need search in cache
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 6, 8},
+                    {2, 6, 8},
+                    {2, 6, 16},
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 3, 1}, {1, 6, 1}
+        },
+        // reshape shape
+        {-1, 3, 4}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_3D_planar_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_3D_planar_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true), // withQuantization
+                                ::testing::Values(true), // needReshape
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_3D_planar)),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_3D_blocked_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1},
+                // target
+                {
+                    {2, 24, 5}, // miss
+                    {2, 24, 5}, // no need search in cache
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1},
+                // target
+                {
+                    {2, 24, 5},
+                    {2, 24, 1},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1},
+                // target
+                {
+                    {2, 48, 10}, // hit
+                    {2, 48, 1},  // no need search in cache
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1},
+                // target
+                {
+                    {2, 48, 10},
+                    {2, 48, 10},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 24, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_3D_blocked_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_3D_blocked_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_3D_blocked_vec))),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_3D_nspc_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 3, 4}, // miss
+                    {2, 3, 4}, // miss
+                    {2, 1, 4}, // hit
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 3, 4},
+                    {2, 1, 4},
+                    {2, 3, 4},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 6, 8}, // hit
+                    {2, 1, 8}, // miss
+                    {2, 1, 8}, // no need search in cache
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1},
+                // target
+                {
+                    {2, 6, 8},
+                    {2, 6, 8},
+                    {2, 12, 8},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 3, 4}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_3D_nspc_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_3D_nspc_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_3D_nspc)),
+                        EltwiseCacheTest::getTestCaseName);
+
+
+// 4D
+CPUSpecificParams cpuParams_4D_planar = {{nchw, nchw}, {nchw}, {}, {}};
+CPUSpecificParams cpuParams_4D_blocked = {{nChw16c, nChw16c}, {nChw16c}, {}, {}};
+CPUSpecificParams cpuParams_4D_nspc = {{nhwc, nhwc}, {nhwc}, {}, {}};
+std::vector<CPUSpecificParams> cpuParams_4D_blocked_vec = {cpuParams_4D_blocked};
+
+std::vector<InputShapesTuple> inputShapes_4D_planar_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5},
+                    {2, 3, 4, 5},
+                    {2, 3, 4, 1},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5},
+                    {2, 3, 4, 1},
+                    {2, 3, 4, 5},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10},
+                    {2, 6, 4, 1},
+                    {2, 6, 4, 1},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10},
+                    {2, 6, 4, 10},
+                    {2, 6, 4, 20},
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 3, 1, 1}, {1, 6, 1, 1}
+        },
+        // reshape shape
+        {-1, 3, 4, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_4D_planar_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D_planar_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true), // withQuantization
+                                ::testing::Values(true), // needReshape
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_4D_planar)),
+                        EltwiseCacheTest::getTestCaseName);
+
+// shape for collapse test
+std::vector<InputShapesTuple> inputShapes_4D_planar_collapse_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 4, 5},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 1, 5},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 4, 10},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 1, 10},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 25, 4, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_4D_planar_collapse_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D_planar_collapse_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true), // needReshape
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_4D_planar)),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_4D_blocked_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1},
+                // target
+                {
+                    {2, 24, 40, 5},
+                    {2, 24, 40, 5},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1},
+                // target
+                {
+                    {2, 24, 40, 5},
+                    {2, 24, 40, 1},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1},
+                // target
+                {
+                    {2, 48, 40, 10},
+                    {2, 48, 40, 1},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1},
+                // target
+                {
+                    {2, 48, 40, 10},
+                    {2, 48, 40, 10},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 24, 40, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_4D_blocked_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D_blocked_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_blocked_vec))),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_4D_nspc_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5},
+                    {2, 3, 4, 5},
+                    {2, 1, 4, 5},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5},
+                    {2, 1, 4, 5},
+                    {2, 3, 4, 5},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10},
+                    {2, 1, 4, 10},
+                    {2, 1, 4, 10},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10},
+                    {2, 6, 4, 10},
+                    {2, 12, 4, 10},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 3, 4, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_4D_nspc_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D_nspc_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_4D_nspc)),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_4D_nspc_collapse_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 25, 5},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 25, 1},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 25, 5},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 25, 1},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 3, 25, 5}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_4D_nspc_collapse_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D_nspc_collapse_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_4D_nspc)),
+                        EltwiseCacheTest::getTestCaseName);
+
+
+// 5D
+CPUSpecificParams cpuParams_5D_planar = {{ncdhw, ncdhw}, {ncdhw}, {}, {}};
+CPUSpecificParams cpuParams_5D_blocked = {{nCdhw16c, nCdhw16c}, {nCdhw16c}, {}, {}};
+CPUSpecificParams cpuParams_5D_nspc = {{ndhwc, ndhwc}, {ndhwc}, {}, {}};
+std::vector<CPUSpecificParams> cpuParams_5D_blocked_vec = {cpuParams_5D_blocked};
+
+std::vector<InputShapesTuple> inputShapes_5D_planar_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5, 6},
+                    {2, 3, 4, 5, 6},
+                    {2, 3, 4, 5, 1},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5, 6},
+                    {2, 3, 4, 5, 1},
+                    {2, 3, 4, 5, 6},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 5, 12},
+                    {2, 6, 4, 5, 1},
+                    {2, 6, 4, 5, 1},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 5, 12},
+                    {2, 6, 4, 5, 12},
+                    {2, 6, 4, 5, 24},
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 3, 1, 1, 1}, {1, 6, 1, 1, 1}
+        },
+        // reshape shape
+        {-1, 3, 4, 5, 6}
+    }
+};
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_5D_planar_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D_planar_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true), // withQuantization
+                                ::testing::Values(true), // needReshape
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_5D_planar)),
+                        EltwiseCacheTest::getTestCaseName);
+
+// shape for collapse test
+std::vector<InputShapesTuple> inputShapes_5D_planar_collapse_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 4, 5, 6},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 1, 5, 6},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 4, 10, 6},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 25, 1, 10, 6},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 25, 4, 5, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_5D_planar_collapse_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D_planar_collapse_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_5D_planar)),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_5D_blocked_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1, -1},
+                // target
+                {
+                    {2, 24, 40, 5, 6},
+                    {2, 24, 40, 5, 6},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1, -1},
+                // target
+                {
+                    {2, 24, 40, 5, 6},
+                    {2, 24, 40, 5, 1},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1, -1},
+                // target
+                {
+                    {2, 48, 40, 10, 12},
+                    {2, 48, 40, 10, 1},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, {2, 64}, -1, -1, -1},
+                // target
+                {
+                    {2, 48, 40, 10, 12},
+                    {2, 48, 40, 10, 12},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 24, 40, 5, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_5D_blocked_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D_blocked_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_blocked_vec))),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_5D_nspc_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5, 6},
+                    {2, 3, 4, 5, 6},
+                    {2, 1, 4, 5, 6},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 4, 5, 6},
+                    {2, 1, 4, 5, 6},
+                    {2, 3, 4, 5, 6},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10, 12},
+                    {2, 1, 4, 10, 12},
+                    {2, 1, 4, 10, 12},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 4, 10, 12},
+                    {2, 6, 4, 10, 12},
+                    {2, 12, 4, 10, 12},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 3, 4, 5, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_5D_nspc_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D_nspc_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_5D_nspc)),
+                        EltwiseCacheTest::getTestCaseName);
+
+std::vector<InputShapesTuple> inputShapes_5D_nspc_collapse_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 25, 5, 6},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 3, 25, 1, 6},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 25, 5, 6},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1},
+                // target
+                {
+                    {2, 6, 25, 1, 6},
+                }
+            }
+        },
+        // fq shapes
+        {},
+        // reshape shape
+        {-1, 3, 25, 5, 6}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_5D_nspc_collapse_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D_nspc_collapse_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(true),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_5D_nspc)),
+                        EltwiseCacheTest::getTestCaseName);
+
+
+// 7D
+std::vector<InputShapesTuple> inputShapes_7D_dyn = {
+    {
+        // eltwise shapes
+        {
+            // inp0.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1, -1, -1},
+                // target
+                {
+                    {1, 2, 3, 4, 5, 6, 7},
+                    {1, 2, 3, 4, 5, 6, 7},
+                    {1, 2, 3, 4, 5, 6, 7},
+                }
+            },
+            // inp0.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1, -1, -1},
+                // target
+                {
+                    {1, 2, 3, 4, 5, 6, 7},
+                    {1, 2, 3, 4, 5, 6, 1},
+                    {1, 2, 3, 4, 5, 6, 7},
+                }
+            },
+            // inp1.0
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1, -1, -1},
+                // target
+                {
+                    {1, 4, 3, 4, 10, 6, 14},
+                    {1, 4, 3, 4, 10, 6, 1},
+                    {1, 4, 3, 4, 10, 6, 1},
+                }
+            },
+            // in1.1
+            {
+                // dynamic
+                {-1, -1, -1, -1, -1, -1, -1},
+                // target
+                {
+                    {1, 4, 3, 4, 10, 6, 14},
+                    {1, 4, 3, 4, 10, 6, 14},
+                    {1, 4, 3, 4, 10, 6, 28},
+                }
+            }
+        },
+        // fq shapes
+        {
+            {1, 2, 1, 1, 1, 1, 1}, {1, 4, 1, 1, 1, 1, 1}
+        },
+        // reshape shape
+        {-1, 2, 3, 4, 5, 6, 7}
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseCache_7D_dyn, EltwiseCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_7D_dyn),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false, true), // withQuantization
+                                ::testing::Values(true), // needReshape
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(cpuParams_empty)),
+                        EltwiseCacheTest::getTestCaseName);
+
+} // namespace
+} // namespace CPUSubgraphTestsDefinitions

From ee0bb79ed67251bd45b5fbd1b07f15b28cb34681 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 31 Mar 2023 11:34:42 +0400
Subject: [PATCH 190/296] Fixed LTO build (#16629)

* Partially fixed LTO

* Fixed issues with cnpy LTO

* CPU

* Disabled failing GPU test
---
 cmake/developer_package/features.cmake                   | 2 +-
 src/frontends/paddle/CMakeLists.txt                      | 1 -
 src/frontends/paddle/tests/op_fuzzy.cpp                  | 2 --
 src/frontends/tests/CMakeLists.txt                       | 2 --
 src/frontends/tests/frontend/shared/src/op_fuzzy.cpp     | 4 ++--
 src/plugins/intel_cpu/tests/unit/CMakeLists.txt          | 3 +++
 src/plugins/intel_cpu/thirdparty/onednn                  | 2 +-
 src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp | 2 +-
 thirdparty/cnpy/cnpy.cpp                                 | 4 ++--
 9 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake
index b6bc479fb06b3c..5c810c713c6a31 100644
--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@@ -8,7 +8,7 @@ include(target_flags)
 # FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
 #        this must be addressed in a proper way
 ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF
-    "LINUX OR (APPLE AND AARCH64);EMSCRIPTEN OR NOT CMAKE_CROSSCOMPILING;CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
+    "LINUX;EMSCRIPTEN OR NOT CMAKE_CROSSCOMPILING;CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
 
 ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)
 
diff --git a/src/frontends/paddle/CMakeLists.txt b/src/frontends/paddle/CMakeLists.txt
index 7cd24e56b75342..e11be311e03574 100644
--- a/src/frontends/paddle/CMakeLists.txt
+++ b/src/frontends/paddle/CMakeLists.txt
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-
 add_subdirectory(src)
 
 if(ENABLE_TESTS)
diff --git a/src/frontends/paddle/tests/op_fuzzy.cpp b/src/frontends/paddle/tests/op_fuzzy.cpp
index 598b49dc41a548..f5d6479c9dddf8 100644
--- a/src/frontends/paddle/tests/op_fuzzy.cpp
+++ b/src/frontends/paddle/tests/op_fuzzy.cpp
@@ -4,8 +4,6 @@
 
 #include "op_fuzzy.hpp"
 
-#include <cnpy.h>
-
 #include <fstream>
 
 #include "engines_util/test_engines.hpp"
diff --git a/src/frontends/tests/CMakeLists.txt b/src/frontends/tests/CMakeLists.txt
index eca0bd8f5623c7..d7dd08e195b79b 100644
--- a/src/frontends/tests/CMakeLists.txt
+++ b/src/frontends/tests/CMakeLists.txt
@@ -2,6 +2,4 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
-
 add_subdirectory(frontend/shared)
diff --git a/src/frontends/tests/frontend/shared/src/op_fuzzy.cpp b/src/frontends/tests/frontend/shared/src/op_fuzzy.cpp
index 8760cce41eae6d..0e3a7e9999880b 100644
--- a/src/frontends/tests/frontend/shared/src/op_fuzzy.cpp
+++ b/src/frontends/tests/frontend/shared/src/op_fuzzy.cpp
@@ -38,8 +38,8 @@ void FrontEndFuzzyOpTest::doLoadFromFile() {
 }
 
 template <typename T>
-inline void addInputOutput(cnpy::NpyArray& npy_array, test::TestCase& test_case, bool is_input = true) {
-    T* npy_begin = npy_array.data<T>();
+inline void addInputOutput(const cnpy::NpyArray& npy_array, test::TestCase& test_case, bool is_input = true) {
+    const T* npy_begin = npy_array.data<T>();
     std::vector<T> data(npy_begin, npy_begin + npy_array.num_vals);
     if (is_input)
         test_case.add_input(npy_array.shape, data);
diff --git a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
index cb66641f91f9c2..0f21ef9934354e 100644
--- a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
+++ b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
@@ -44,6 +44,9 @@ addIeTargetTest(
             CPU
 )
 
+# LTO
+set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
+
 target_include_directories(${TARGET_NAME} SYSTEM PRIVATE
     $<TARGET_PROPERTY:dnnl,INCLUDE_DIRECTORIES>)
 
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index 02857209960e9d..d0573f56da28fb 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit 02857209960e9d91c1b3df90ab4c7ac359bf0973
+Subproject commit d0573f56da28fbdb08ba55d35145c0ac58ed87b6
diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
index 2d62d73290d746..e96922a8213e14 100644
--- a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
@@ -2360,7 +2360,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16_cached) {
     generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
 }
 
-TEST(lstm_gpu, generic_lstm_input_forget_f16_cached) {
+TEST(DISABLED_lstm_gpu, generic_lstm_input_forget_f16_cached) {
     generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
 }
 
diff --git a/thirdparty/cnpy/cnpy.cpp b/thirdparty/cnpy/cnpy.cpp
index f0c32147a668ef..225ed4f88ada77 100644
--- a/thirdparty/cnpy/cnpy.cpp
+++ b/thirdparty/cnpy/cnpy.cpp
@@ -139,11 +139,11 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& sh
         throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
 
     std::regex num_regex("[0-9][0-9]*");
-    std::smatch sm;
+    std::cmatch sm;
     shape.clear();
 
     std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
-    while(std::regex_search(str_shape, sm, num_regex)) {
+    while(std::regex_search(str_shape.c_str(), sm, num_regex)) {
         shape.push_back(std::stoi(sm[0].str()));
         str_shape = sm.suffix().str();
     }

From 6d064d26cb7c08d073024fc2953ca32ccc6d0498 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 31 Mar 2023 10:28:04 +0200
Subject: [PATCH 191/296] remove deprecated MO args (#16626)

Co-authored-by: Andrei Kochin <andrei.kochin@intel.com>
Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 tests/layer_tests/common/layer_test_class.py  |  5 +++--
 .../layer_tests/common/utils/common_utils.py  |  4 ++++
 .../openvino/tools/mo/analysis/tf_od_api.py   |  2 +-
 .../tools/mo/analysis/tf_retinanet.py         |  4 ++--
 .../mo/openvino/tools/mo/analysis/tf_yolo.py  |  6 ++---
 .../tools/mo/back/offline_transformations.py  |  2 +-
 tools/mo/openvino/tools/mo/convert_impl.py    | 15 +------------
 .../tools/mo/front/tf/ObjectDetectionAPI.py   |  2 +-
 tools/mo/openvino/tools/mo/load/tf/loader.py  |  4 ++--
 tools/mo/openvino/tools/mo/main.py            |  3 +--
 .../tools/mo/moc_frontend/serialize.py        |  2 +-
 .../mo/openvino/tools/mo/utils/cli_parser.py  | 22 +------------------
 12 files changed, 21 insertions(+), 50 deletions(-)

diff --git a/tests/layer_tests/common/layer_test_class.py b/tests/layer_tests/common/layer_test_class.py
index a43c2e742298d3..99ae5a8b61fc9a 100644
--- a/tests/layer_tests/common/layer_test_class.py
+++ b/tests/layer_tests/common/layer_test_class.py
@@ -40,10 +40,11 @@ def _test(self, framework_model, ref_net, ie_device, precision, ir_version, temp
         os.environ['MO_ENABLED_TRANSFORMS'] = enabled_transforms
         os.environ['MO_DISABLED_TRANSFORMS'] = disabled_transforms
 
+        compress_to_fp16 = False if precision == 'FP32' else True
         mo_params = {self.input_model_key: model_path,
                      "output_dir": temp_dir,
-                     "data_type": precision, "model_name": 'model'
-                     }
+                     "compress_to_fp16": compress_to_fp16,
+                     "model_name": 'model'}
 
         if 'input_shapes' in kwargs and len(kwargs['input_shapes']):
             input_shapes_str = []
diff --git a/tests/layer_tests/common/utils/common_utils.py b/tests/layer_tests/common/utils/common_utils.py
index 419acdd262a06f..dda6286c504a12 100644
--- a/tests/layer_tests/common/utils/common_utils.py
+++ b/tests/layer_tests/common/utils/common_utils.py
@@ -26,6 +26,10 @@ def generate_ir(coverage=False, **kwargs):
             params.extend(("-b", str(value)))
         elif key == "k":
             params.extend(("-k", str(value)))
+        # for FP32 set explicitly compress_to_fp16=False,
+        # if we omit this argument for FP32, it will be set implicitly to True as the default
+        elif key == 'compress_to_fp16':
+            params.append("--{}={}".format(key, value))
         elif isinstance(value, bool) and value:
             params.append("--{}".format(key))
         elif isinstance(value, bool) and not value:
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
index bf70a78f48f886..7f75ca6872dcd7 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_od_api.py
@@ -70,7 +70,7 @@ def analyze(self, graph: Graph):
                 if all([graph_contains_scope(graph, scope) for scope in scopes]):
                     result = dict()
                     result['flavor'] = flavor
-                    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
+                    result['mandatory_parameters'] = {'transformations_config':
                                                           files_by_pattern(get_mo_root_dir() + '/openvino/tools/mo/front/tf',
                                                                            __class__.file_patterns[flavor],
                                                                            add_prefix=True),
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
index 8ee620322b7707..9c36e08a1a805a 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_retinanet.py
@@ -52,14 +52,14 @@ def analyze(self, graph: Graph):
 
         if pattern_instance_counter.counter > 0:
             result = dict()
-            result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config':
+            result['mandatory_parameters'] = {'transformations_config':
                                                   'openvino/tools/mo/front/tf/retinanet.json'}
 
             message = "Your model looks like TensorFlow RetinaNet Model.\n" \
                       "To generate the IR, provide model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model>.pb\n" \
                       "\t--input_shape [1,600,600,3]\n" \
-                      "\t--tensorflow_use_custom_operations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
+                      "\t--transformations_config <OPENVINO_INSTALL_DIR>/tools/model_optimizer/openvino/tools/mo/front/tf/retinanet.json\n" \
                       "\t--reverse_input_channels"
 
             return {'model_type': {'TF_RetinaNet': result}}, message
diff --git a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
index 7379546eda1ecf..86f4822c932549 100644
--- a/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
+++ b/tools/mo/openvino/tools/mo/analysis/tf_yolo.py
@@ -47,7 +47,7 @@ def pattern_instance_counter(graph: Graph, match: dict):
 def get_YOLO_params_by_flavor(flavor: str):
     result = dict()
     result['flavor'] = flavor
-    result['mandatory_parameters'] = {'tensorflow_use_custom_operations_config': YOLO_CONFIGS[flavor]}
+    result['mandatory_parameters'] = {'transformations_config': YOLO_CONFIGS[flavor]}
     return result
 
 
@@ -72,7 +72,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv1 or YOLOv2 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/<model_name>.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
+                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/<yolo_config>.json\n" \
                       "All detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
@@ -99,7 +99,7 @@ def analyze(self, graph: Graph):
                       "To generate the IR, provide TensorFlow YOLOv3 Model to the Model Optimizer with the following parameters:\n" \
                       "\t--input_model <path_to_model>/yolo_v3.pb\n" \
                       "\t--batch 1\n" \
-                      "\t--tensorflow_use_custom_operations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
+                      "\t--transformations_config <PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/yolo_v3.json\n" \
                       "Detailed information about conversion of this model can be found at\n" \
                       "https://docs.openvino.ai/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html"
             return {'model_type': {'YOLO': get_YOLO_params_by_flavor(flavor)}}, message
diff --git a/tools/mo/openvino/tools/mo/back/offline_transformations.py b/tools/mo/openvino/tools/mo/back/offline_transformations.py
index 519bbcc726dd8a..9d270c32311304 100644
--- a/tools/mo/openvino/tools/mo/back/offline_transformations.py
+++ b/tools/mo/openvino/tools/mo/back/offline_transformations.py
@@ -65,7 +65,7 @@ def apply_offline_transformations(func: Model, argv: argparse.Namespace):
     apply_moc_legacy_transformations(func, params_with_custom_types)
     apply_user_transformations(func, parse_transform(argv.transform))
 
-    if "compress_fp16" in argv and argv.compress_fp16:
+    if "compress_to_fp16" in argv and argv.compress_to_fp16:
         compress_model(func)
 
     apply_fused_names_cleanup(func)
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index 0b4e4ed7acd317..abcf84b2613bb5 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -214,13 +214,6 @@ def raise_ie_not_found():
         log.error(e)
         raise_ie_not_found()
 
-    # Turn off compression only if it's disabled explicitly by --compress_to_fp16=False or --data_type=FP32.
-    # By default, in all other cases compression is enabled
-    if ('data_type' in argv and argv.data_type in ['FP32', 'float']) or \
-            ('compress_to_fp16' in argv and argv.compress_to_fp16 is False):
-        argv.compress_fp16 = False
-    else:
-        argv.compress_fp16 = True
     argv.data_type = 'FP32'  # if compression was enabled will be restored back to 'FP16' after apply_offline_transformations
 
     # This is just to check that transform key is valid and transformations are available
@@ -234,12 +227,6 @@ def raise_ie_not_found():
     if ret_code:
         raise Error('check_requirements exited with return code {}'.format(ret_code))
 
-    if hasattr(argv, 'tensorflow_use_custom_operations_config') and \
-            argv.tensorflow_use_custom_operations_config is not None:
-        # update command-line arguments even for new TensorFlow Frontend
-        # because it should fallback to the Legacy Frontend in this case
-        argv.transformations_config = argv.tensorflow_use_custom_operations_config
-
     if argv.scale and argv.scale_values:
         raise Error(
             'Both --scale and --scale_values are defined. Specify either scale factor or scale values per input ' +
@@ -509,7 +496,7 @@ def clear_tmp_ir_files():
         try:
             from openvino.tools.mo.back.offline_transformations import apply_offline_transformations
             func = apply_offline_transformations(func, argv)
-            if "compress_fp16" in argv and argv.compress_fp16:
+            if "compress_to_fp16" in argv and argv.compress_to_fp16:
                 # restore data_type cmd parameter
                 argv.data_type = 'FP16'
             return_code = 0
diff --git a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
index c769cbcea55f82..df316b81868cb9 100644
--- a/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
+++ b/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py
@@ -98,7 +98,7 @@ def _value_or_raise(match: SubgraphMatch, pipeline_config: PipelineConfig, key:
         raise Error('The sub-graph replacer "[REPLACEMENT_ID]" was not able to find the value for key "{}" in the '
                     'pipeline configuration file specified with the --tensorflow_object_detection_api_pipeline_config '
                     'command line parameter. Update the sub-graph replacement configuration file specified with the '
-                    '--tensorflow_use_custom_operations_config command line parameter by adding key "{}" with required '
+                    '--transformations_config command line parameter by adding key "{}" with required '
                     'value to the "custom_attributes" dictionary of the "[REPLACEMENT_ID]" replacer.'.format(key, key))
     return value
 
diff --git a/tools/mo/openvino/tools/mo/load/tf/loader.py b/tools/mo/openvino/tools/mo/load/tf/loader.py
index 590d52a5762d65..425c509fa5ea8f 100644
--- a/tools/mo/openvino/tools/mo/load/tf/loader.py
+++ b/tools/mo/openvino/tools/mo/load/tf/loader.py
@@ -91,7 +91,7 @@ def load(self, graph: Graph):
         graph.__setattr__('name', argv.model_name)
         # 'layout' parameter change may cause an issue in EltwiseInputReshape replacer
         # and convert_nhwc_to_nchw(graph)
-        graph.graph['layout'] = 'NCHW' if argv.disable_nhwc_to_nchw else 'NHWC'
+        graph.graph['layout'] = 'NHWC'
         graph.graph['fw'] = 'tf'
 
         graph.graph['variables_values'] = variables_values
@@ -114,7 +114,7 @@ def load(self, graph: Graph):
 
         # try to detect layout from the nodes of the graph. If there are no convolution nodes in N(D)HWC layout then we
         # consider that the graph is in NCHW layout and no layout conversion should be performed
-        if not argv.disable_nhwc_to_nchw and not graph_or_sub_graph_has_nhwc_ops(graph):
+        if not graph_or_sub_graph_has_nhwc_ops(graph):
             if not argv.silent:
                 log.debug('disable_nhwc_to_nchw" was automatically enabled.')
             for_graph_and_each_sub_graph_recursively(graph, update_cmd_params_and_layout)
diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py
index 927122722ba4a9..08d02514130ba3 100644
--- a/tools/mo/openvino/tools/mo/main.py
+++ b/tools/mo/openvino/tools/mo/main.py
@@ -37,8 +37,7 @@ def main(cli_parser: argparse.ArgumentParser, framework=None):
     try:
         ngraph_function, argv = _convert(cli_parser, framework, {})
         is_tf, _, _, _, _ = deduce_legacy_frontend_by_namespace(argv)
-        if ('compress_to_fp16' in argv and argv.compress_to_fp16) \
-                or ('data_type' in argv and argv.data_type in ['half', 'FP16']):
+        if 'compress_to_fp16' in argv and argv.compress_to_fp16:
             print(get_compression_message())
 
         ov_update_message = get_ov_update_message()
diff --git a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
index 34eb377a7a2648..90b87f750feb83 100644
--- a/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/serialize.py
@@ -32,7 +32,7 @@ def moc_emit_ir(ngraph_function: Model, argv: argparse.Namespace):
 
     apply_user_transformations(ngraph_function, parse_transform(argv.transform))
 
-    if argv.compress_fp16:
+    if argv.compress_to_fp16:
         from openvino.tools.mo.back.offline_transformations import compress_model
         compress_model(ngraph_function)
 
diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py
index 083e412be14e8f..a497006e6488a7 100644
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -836,8 +836,6 @@ class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExiste
     def __call__(self, parser, namespace, values, option_string=None):
         dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
             option_string)
-        if 'tensorflow_use_custom_operations_config' in option_string:
-            dep_msg += 'Please use --transformations_config cli option instead'
         log.error(dep_msg, extra={'is_warning': True})
         super().__call__(parser, namespace, values, option_string)
 
@@ -1021,15 +1019,6 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               help=mo_convert_params_common['layout'].description.format(
                                   mo_convert_params_common['layout'].possible_types_command_line),
                               default=())
-    # TODO: isn't it a weights precision type
-    common_group.add_argument('--data_type',
-                              help='[DEPRECATED] Data type for model weights and biases. '
-                                   'If original model has FP32 weights or biases and --data_type=FP16 is specified, '
-                                   'FP32 model weights and biases are compressed to FP16. '
-                                   'All intermediate data is kept in original precision.',
-                              choices=["FP16", "FP32", "half", "float"],
-                              default='FP16',
-                              action=DeprecatedOptionCommon)
     common_group.add_argument('--compress_to_fp16',
                               help=mo_convert_params_common['compress_to_fp16'].description,
                               type=check_bool,
@@ -1104,7 +1093,6 @@ def get_common_cli_options(model_name):
     d['mean_values'] = ['- Mean values', lambda x: x if x else 'Not specified']
     d['scale_values'] = ['- Scale values', lambda x: x if x else 'Not specified']
     d['scale'] = ['- Scale factor', lambda x: x if x else 'Not specified']
-    d['data_type'] = ['- Precision of IR', lambda x: 'FP32' if x == 'float' else 'FP16' if x == 'half' else x]
     d['transform'] = ['- User transformations', lambda x: x if x else 'Not specified']
     d['reverse_input_channels'] = '- Reverse input channels'
     d['static_shape'] = '- Enable IR generation for fixed input shape'
@@ -1133,7 +1121,6 @@ def get_tf_cli_options():
     d = {
         'input_model_is_text': '- Input model in text protobuf format',
         'tensorflow_custom_operations_config_update': '- Update the configuration file with input/output node names',
-        'tensorflow_use_custom_operations_config': '- Use the config file',
         'tensorflow_object_detection_api_pipeline_config': '- Use configuration file used to generate the model with '
                                                            'Object Detection API',
         'tensorflow_custom_layer_libraries': '- List of shared libraries with TensorFlow custom layers implementation',
@@ -1177,7 +1164,7 @@ def get_params_with_paths_list():
             'input_checkpoint', 'input_meta_graph', 'input_proto', 'input_symbol',
             'pretrained_model_name', 'saved_model_dir', 'tensorboard_logdir',
             'tensorflow_custom_layer_libraries', 'tensorflow_custom_operations_config_update',
-            'tensorflow_object_detection_api_pipeline_config', 'tensorflow_use_custom_operations_config',
+            'tensorflow_object_detection_api_pipeline_config',
             'transformations_config']
 
 
@@ -1256,9 +1243,6 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
     tf_group.add_argument('--tensorflow_custom_operations_config_update',
                           help=mo_convert_params_tf['tensorflow_custom_operations_config_update'].description,
                           action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--tensorflow_use_custom_operations_config',
-                          help='Use the configuration file with custom operation description.',
-                          action=DeprecatedCanonicalizePathCheckExistenceAction)
     tf_group.add_argument('--tensorflow_object_detection_api_pipeline_config',
                           help=mo_convert_params_tf['tensorflow_object_detection_api_pipeline_config'].description,
                           action=CanonicalizePathCheckExistenceAction)
@@ -1270,10 +1254,6 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
                           help=mo_convert_params_tf['tensorflow_custom_layer_libraries'].description,
                           default=None,
                           action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--disable_nhwc_to_nchw',
-                          help='[DEPRECATED] Disables the default translation from NHWC to NCHW. Since 2022.1 this option '
-                               'is deprecated and used only to maintain backward compatibility with previous releases.',
-                          action=DeprecatedStoreTrue, default=False)
     return parser
 
 

From 35398e339d9ecf71ac7bf600f69fc25ffda56654 Mon Sep 17 00:00:00 2001
From: Chen Xu <chen.xu@intel.com>
Date: Fri, 31 Mar 2023 16:28:20 +0800
Subject: [PATCH 192/296] [CPU] Implement TopK-11 to CPU plugin (#16522)

---
 .../src/transformations/convert_precision.cpp |   5 +-
 src/plugins/intel_cpu/src/nodes/topk.cpp      |  64 ++++++---
 src/plugins/intel_cpu/src/nodes/topk.h        |   2 +
 .../intel_cpu/src/transformation_pipeline.cpp |   4 +
 .../functional/single_layer_tests/topk.cpp    | 123 ++++++++++--------
 .../src/base/utils/compare_results.cpp        |   1 +
 .../src/base/utils/generate_inputs.cpp        |   1 +
 .../include/ngraph_functions/builders.hpp     |   2 +
 8 files changed, 129 insertions(+), 73 deletions(-)

diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index e7369c4a23066a..4f66a064484540 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -8,6 +8,7 @@
 #include <ngraph/runtime/reference/convert.hpp>
 #include <openvino/opsets/opset1.hpp>
 #include <openvino/opsets/opset10.hpp>
+#include <openvino/opsets/opset11.hpp>
 #include <openvino/opsets/opset3.hpp>
 #include <openvino/opsets/opset4.hpp>
 #include <openvino/opsets/opset5.hpp>
@@ -334,7 +335,9 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ngraph::Func
         {opset9::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
         {opset9::GenerateProposals::get_type_info_static(), fuse_type_to_generate_proposals},
         {opset6::CTCGreedyDecoderSeqLen::get_type_info_static(), fuse_type_to_ctc_greedy_decoder_seq_len},
+        {opset1::TopK::get_type_info_static(), fuse_type_to_topk},
         {opset4::TopK::get_type_info_static(), fuse_type_to_topk},
+        {opset11::TopK::get_type_info_static(), fuse_type_to_topk},
         {opset8::MaxPool::get_type_info_static(), fuse_type_to_maxpool},
         {opset4::NonZero::get_type_info_static(), fuse_type_to_nonzero},
         {opset4::Bucketize::get_type_info_static(), fuse_type_to_bucketize},
@@ -653,7 +656,7 @@ bool fuse_type_to_generate_proposals(const std::shared_ptr<ngraph::Node>& node,
 }
 
 bool fuse_type_to_topk(const std::shared_ptr<ngraph::Node>& node, const precisions_map& precisions) {
-    if (auto topk = ov::as_type_ptr<opset4::TopK>(node)) {
+    if (auto topk = ov::as_type_ptr<ov::op::util::TopKBase>(node)) {
         return update_type(1, node, precisions, [&](const element::Type& to) {
             topk->set_index_element_type(to);
         });
diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp
index 275c70216df5ab..12fa471773537b 100644
--- a/src/plugins/intel_cpu/src/nodes/topk.cpp
+++ b/src/plugins/intel_cpu/src/nodes/topk.cpp
@@ -270,12 +270,12 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato
     inline void topk_loop() {
         if (jcp_.algorithm == TopKAlgorithm::topk_bubble_sort) {
             if (jcp_.layout == TopKLayoutType::topk_blocked && jcp_.topk_innermost) {
-                if (jcp_.top_k == 1) {
+                if (jcp_.top_k == 1 && !jcp_.stable) {
                     topk_bubble_horiz();
                 } else {
                     topk_bubble_BLK_on_channel_verti();
                 }
-            } else if (jcp_.topk_innermost && jcp_.top_k == 1) {
+            } else if (jcp_.topk_innermost && jcp_.top_k == 1 && !jcp_.stable) {
                 topk_bubble_horiz();
             } else {
                 topk_bubble_vector();
@@ -1788,30 +1788,32 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato
     }
 };
 
-bool TopK::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool TopK::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
-        const auto topKOp = ngraph::as_type_ptr<const ngraph::op::v1::TopK>(op);
-        if (!topKOp) {
-            errorMessage = "Node is not an instance of the TopK from the operations set v1 or v3";
+        if (!one_of(op->get_type_info(), ov::op::v1::TopK::get_type_info_static(),
+                                         ov::op::v3::TopK::get_type_info_static(),
+                                         ov::op::v11::TopK::get_type_info_static())) {
+            errorMessage = "Node is not an instance of the TopK from the operation sets v1, v3 or v11";
             return false;
         }
 
+        auto topKOp = ov::as_type_ptr<const ov::op::util::TopKBase>(op);
         if (!isDynamicNgraphNode(op)) {
-            auto topKConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
+            auto topKConst = std::dynamic_pointer_cast<const ov::op::v0::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
             if (!topKConst) {
                 errorMessage = "Second tensor is not constant in static shape mode";
                 return false;
             }
         }
 
-        if (topKOp->get_mode() != ngraph::op::TopKMode::MAX &&
-                topKOp->get_mode() != ngraph::op::TopKMode::MIN) {
+        if (topKOp->get_mode() != ov::op::TopKMode::MAX &&
+            topKOp->get_mode() != ov::op::TopKMode::MIN) {
             errorMessage = "Unsupported mode.";
             return false;
         }
-        if (!one_of(topKOp->get_sort_type(), ngraph::op::TopKSortType::NONE,
-                                  ngraph::op::TopKSortType::SORT_VALUES,
-                                  ngraph::op::TopKSortType::SORT_INDICES)) {
+        if (!one_of(topKOp->get_sort_type(), ov::op::TopKSortType::NONE,
+                                             ov::op::TopKSortType::SORT_VALUES,
+                                             ov::op::TopKSortType::SORT_INDICES)) {
             errorMessage = "Unsupported sort type.";
             return false;
         }
@@ -1821,13 +1823,13 @@ bool TopK::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, s
     return true;
 }
 
-TopK::TopK(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+TopK::TopK(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
         : Node(op, context, NgraphShapeInferFactory(op, PortMask(TOPK_K))) {
     std::string errorMessage;
     if (isSupportedOperation(op, errorMessage)) {
         errorPrefix = "TopK layer with name '" + getName() + "'";
 
-        auto topKOp = ngraph::as_type_ptr<ngraph::op::v1::TopK>(op);
+        auto topKOp = ov::as_type_ptr<const ov::op::util::TopKBase>(op);
 
         auto in_dims = topKOp->get_input_partial_shape(TOPK_DATA);
         auto out_dims = topKOp->get_output_partial_shape(TOPK_DATA);
@@ -1835,15 +1837,23 @@ TopK::TopK(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr con
         auto in_dims_size = in_dims.size();
 
         if (!isDynamicNgraphNode(op)) {
-            auto topKConst = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
+            auto topKConst = std::dynamic_pointer_cast<const ov::op::v0::Constant>(topKOp->get_input_node_shared_ptr(TOPK_K));
             if (!topKConst) {
                 IE_THROW() << errorPrefix <<  "gets non-constant second tensor in static shape mode!";
             }
         }
 
         axis = topKOp->get_axis();
-        mode_max = topKOp->get_mode() == ngraph::op::TopKMode::MAX;
-        sort_index = topKOp->get_sort_type() == ngraph::op::TopKSortType::SORT_INDICES;
+        mode_max = topKOp->get_mode() == ov::op::TopKMode::MAX;
+        sort_index = topKOp->get_sort_type() == ov::op::TopKSortType::SORT_INDICES;
+
+        stable = false;
+        if (!sort_index) {
+            const auto topKOpV11 = ngraph::as_type_ptr<const ov::op::v11::TopK>(op);
+            if (topKOpV11) {
+                stable = topKOpV11->get_stable();
+            }
+        }
 
         top_k = 0;
         preset_params_done = false;
@@ -1959,7 +1969,10 @@ void TopK::preset_params() {
     }
 
     if (isDynamicNode()) {
-        if ((layout == TopKLayoutType::topk_ncsp || layout == TopKLayoutType::topk_nspc) && topk_innermost) {
+        if (stable) {
+            algorithm = TopKAlgorithm::topk_bubble_sort;
+            bubble_inplace = false;
+        } else if ((layout == TopKLayoutType::topk_ncsp || layout == TopKLayoutType::topk_nspc) && topk_innermost) {
             algorithm = TopKAlgorithm::topk_heap_sort;
         } else {
             algorithm = TopKAlgorithm::topk_bubble_sort;
@@ -2006,8 +2019,11 @@ void TopK::prepareParams() {
         // [case 1]: if 2 * (top_k + 1) + 2 <= count_xmm, thus top_k is small enough that the vector registers are sufficient
         //           to keep all necessary data for sorting, no need to load and store frequently, use inplace bubble sort;
         //           (horizotal sorting cases not included)
-        // [case 2]: only when topk is imposed on innermost dimsension of planar(ncsp/nspc) layout, should heap sort be used;
-        // [case 3]: by default, use bitonic sort when alg_cost_bitonic < alg_cost_bubble, otherwise use bubble sort.
+        // [case 2]: if stable sorting is required, bubble sort(topk_bubble_vector/topk_bubble_BLK_on_channel_verti) will be
+        //           applied currently, because among the implemented sorting algorithms, these bubble sort implementations
+        //           are the only stable ones;
+        // [case 3]: only when topk is imposed on innermost dimsension of planar(ncsp/nspc) layout, should heap sort be used;
+        // [case 4]: by default, use bitonic sort when alg_cost_bitonic < alg_cost_bubble, otherwise use bubble sort.
         //           alg_cost_bitonic = (N / 4) * logN * (logN + 1)
         //           alg_cost_bubble = K * (K - 1) / 2 + (N - K) * K
         //           where, N = axis_dim, K = topk_k
@@ -2018,6 +2034,9 @@ void TopK::prepareParams() {
             if (top_k <= count_xmm / 2 - 2) {
                 algorithm = TopKAlgorithm::topk_bubble_sort;
                 bubble_inplace = topk_innermost && top_k == 1 ? false : true;
+            } else if (stable) {
+                algorithm = TopKAlgorithm::topk_bubble_sort;
+                bubble_inplace = false;
             } else if ((layout == TopKLayoutType::topk_ncsp || layout == TopKLayoutType::topk_nspc) && topk_innermost) {
                 algorithm = TopKAlgorithm::topk_heap_sort;
             } else {
@@ -2074,6 +2093,7 @@ void TopK::createPrimitive() {
         jcp.topk_innermost = topk_innermost;
         jcp.algorithm = algorithm;
         jcp.bubble_inplace = bubble_inplace;
+        jcp.stable = stable;
         jcp.sort_stride = static_cast<int>(I);
         jcp.work_amount = static_cast<int>(I);
         jcp.bitonic_idx_cnt = 0;
@@ -2207,7 +2227,9 @@ inline void TopK::prepare_original_idx() {
     bool shape_agnostic_alg = algorithm == TopKAlgorithm::topk_heap_sort ||
                              (algorithm == TopKAlgorithm::topk_bubble_sort && !bubble_inplace);
     if (shape_agnostic_alg) {
-        if (topk_innermost) {
+        bool use_idx_seq = stable ? topk_innermost && (layout == TopKLayoutType::topk_blocked || (top_k == 1 && !stable))
+                                  : topk_innermost;
+        if (use_idx_seq) {
             if (vec_idx_seq.empty()) {
                 vec_idx_seq.resize(axis_dim);
                 std::iota(vec_idx_seq.begin(), vec_idx_seq.end(), 0);
diff --git a/src/plugins/intel_cpu/src/nodes/topk.h b/src/plugins/intel_cpu/src/nodes/topk.h
index cda0720a35e6bb..29060026e70fd2 100644
--- a/src/plugins/intel_cpu/src/nodes/topk.h
+++ b/src/plugins/intel_cpu/src/nodes/topk.h
@@ -32,6 +32,7 @@ struct jit_topk_config_params {
     bool sort_index;         // sort by value or index. true: index; false: value
     bool topk_innermost;     // if topk sorting is applied on innermost dimension or other dimension
     bool bubble_inplace;     // all the elements in sorting is right in the register, no need to load and store for each comparison
+    bool stable;             // if require stable sorting
     TopKLayoutType layout;   // memory layout
     TopKAlgorithm algorithm; // topk sorting algorithm
     InferenceEngine::Precision precision; // precision
@@ -115,6 +116,7 @@ class TopK : public Node {
     bool topk_innermost;
     bool jit_mode;
     bool sort_index;
+    bool stable;
     bool mode_max;
     int axis;
     static const size_t TOPK_DATA = 0;
diff --git a/src/plugins/intel_cpu/src/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
index 2bedc4d32df2e2..de6fd09844dc42 100644
--- a/src/plugins/intel_cpu/src/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
@@ -71,6 +71,8 @@
 #include "transformations/op_conversions/softsign_decomposition.hpp"
 #include "transformations/op_conversions/softmax_decomposition.hpp"
 #include "transformations/op_conversions/unique_decomposition.hpp"
+#include "transformations/op_conversions/convert_topk3.hpp"
+#include "transformations/op_conversions/convert_topk11_downgrade.hpp"
 #include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
 #include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
 #include "transformations/smart_reshape/matmul_sr.hpp"
@@ -398,6 +400,8 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     pass_config->disable<ov::pass::ConvertROIAlign9To3>();
     pass_config->disable<ov::pass::SoftSignDecomposition>();
     pass_config->disable<ov::pass::UniqueDecomposition>();
+    pass_config->disable<ov::pass::ConvertTopK3>();
+    pass_config->disable<ov::pass::ConvertTopK11ToTopK3>();
 
     pass_config->enable<ov::pass::NormalizeL2Decomposition>();
     pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
index fadf1c5f768a70..5026bd76becead 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/topk.cpp
@@ -10,18 +10,20 @@
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
 using namespace ov::test;
+using SortMode = ov::op::TopKMode;
+using SortType = ov::op::TopKSortType;
 
 namespace CPULayerTestsDefinitions {
 
 typedef std::tuple<
-        int64_t,                        // keepK
-        int64_t,                        // axis
-        ngraph::opset4::TopK::Mode,     // mode
-        ngraph::opset4::TopK::SortType, // sort
-        ElementType,                    // Net precision
-        ElementType,                    // Input precision
-        ElementType,                    // Output precision
-        InputShape                      // inputShape
+        int64_t,                    // keepK
+        int64_t,                    // axis
+        SortMode,                   // mode
+        std::tuple<SortType, bool>, // sort and stable
+        ElementType,                // Net precision
+        ElementType,                // Input precision
+        ElementType,                // Output precision
+        InputShape                  // inputShape
 > basicTopKParams;
 
 typedef std::tuple<
@@ -39,11 +41,13 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
 
         int64_t keepK, axis;
-        ngraph::opset4::TopK::Mode mode;
-        ngraph::opset4::TopK::SortType sort;
+        SortMode mode;
+        std::tuple<SortType, bool> sortTypeStable;
         ElementType netPrecision, inPrc, outPrc;
         InputShape inputShape;
-        std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape) = basicParamsSet;
+        std::tie(keepK, axis, mode, sortTypeStable, netPrecision, inPrc, outPrc, inputShape) = basicParamsSet;
+        SortType sort = std::get<0>(sortTypeStable);
+        bool stable = std::get<1>(sortTypeStable);
 
         std::ostringstream result;
         bool staticShape = inputShape.first.rank() == 0;
@@ -52,6 +56,7 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         result << "axis=" << axis << "_";
         result << "mode=" << mode << "_";
         result << "sort=" << sort << "_";
+        result << "stable=" << (stable ? "True" : "False") << "_";
         result << "netPRC=" << netPrecision << "_";
         result << "inPRC=" << inPrc << "_";
         result << "outPRC=" << outPrc << "_";
@@ -85,11 +90,13 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
         int64_t keepK;
-        ngraph::opset4::TopK::Mode mode;
-        ngraph::opset4::TopK::SortType sort;
+        SortMode mode;
+        std::tuple<SortType, bool> sortTypeStable;
         ElementType inPrc, outPrc;
         InputShape inputShape;
-        std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape) = basicParamsSet;
+        std::tie(keepK, axis, mode, sortTypeStable, netPrecision, inPrc, outPrc, inputShape) = basicParamsSet;
+        sort = std::get<0>(sortTypeStable);
+        stable = std::get<1>(sortTypeStable);
 
         if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES)
             inPrc = outPrc = netPrecision = ElementType::bf16;
@@ -112,33 +119,34 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         auto params = ngraph::builder::makeDynamicParams(netPrecision, {inputDynamicShapes[0]});
 
         // static shape need specific const k to test different sorting algorithms, dynamic shape tests random param k
-        std::shared_ptr<ngraph::opset4::TopK> topk;
+        std::shared_ptr<ov::op::v11::TopK> topk;
         if (staticShape) {
-            auto k = std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
-            topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(
-                    std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
+            auto k = std::make_shared<ov::op::v0::Constant>(ElementType::i64, ov::Shape{}, &keepK);
+            topk = std::dynamic_pointer_cast<ov::op::v11::TopK>(
+                    std::make_shared<ov::op::v11::TopK>(params[0], k, axis, mode, sort, ElementType::i32, stable));
         } else {
-            auto k = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::Type_t::i64, inputDynamicShapes[1]);
+            auto k = std::make_shared<ov::op::v0::Parameter>(ElementType::i64, inputDynamicShapes[1]);
             params.push_back(k);
-            topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(
-                    std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
+            topk = std::dynamic_pointer_cast<ov::op::v11::TopK>(
+                    std::make_shared<ov::op::v11::TopK>(params[0], k, axis, mode, sort, ElementType::i32, stable));
         }
 
         topk->get_rt_info() = getCPUInfo();
 
         ngraph::ResultVector results;
         for (size_t i = 0; i < topk->get_output_size(); i++) {
-            results.push_back(std::make_shared<ngraph::opset4::Result>(topk->output(i)));
+            results.push_back(std::make_shared<ov::op::v0::Result>(topk->output(i)));
         }
 
         function = std::make_shared<ngraph::Function>(results, params, "TopK");
     }
 
-    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
         inputs.clear();
         const auto& funcInputs = function->inputs();
 
-        // Spec TopK_3.md allows to use unstable sorting, thus generate unreapeated input data to avoid a. and b.
+        // For unstable sorting, generate unrepeated input data to avoid a. and b. While for stable sorting,
+        // repeating values are explicitly set.
         // a. Skip comparing of index results, because an element in actual index tensor can be different with
         //    its counterpart in expected index tensor
         // b. If SortType is SORT_INDICES or NONE, the test program still needs to apply std::sort for all pairs
@@ -153,7 +161,11 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
 
             // For int32, deliberately set big numbers which are not accurately representable in fp32
             int start = netPrecision == ElementType::i32 ? pow(2, 30) + 1 : - static_cast<int>(size / 2);
-            std::iota(data.begin(), data.end(), start);
+            size_t set_size = sort == SortType::SORT_VALUES && stable ? size / 2 : size;
+            std::iota(data.begin(), data.begin() + set_size, start);
+            if (sort == SortType::SORT_VALUES && stable) {
+                std::copy(data.begin(), data.begin() + set_size, data.begin() + set_size);
+            }
             std::mt19937 gen(0);
             std::shuffle(data.begin(), data.end(), gen);
 
@@ -178,7 +190,7 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
             if (O * A * I != size)
                 FAIL() << "Incorrect blob shape " << shape;
 
-            auto *rawBlobDataPtr = static_cast<ngraph::bfloat16 *>(tensor.data());
+            auto *rawBlobDataPtr = static_cast<ov::bfloat16 *>(tensor.data());
             for (size_t o = 0; o < O; o++) {
                 for (size_t i = 0; i < I; i++) {
                     std::vector<int> data(A);
@@ -188,7 +200,7 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
                     std::mt19937 gen(seed);
                     std::shuffle(data.begin(), data.end(), gen);
                     for (size_t a = 0; a < A; a++) {
-                        rawBlobDataPtr[o * A * I + a * I + i] = static_cast<ngraph::bfloat16>(data[a]);
+                        rawBlobDataPtr[o * A * I + a * I + i] = static_cast<ov::bfloat16>(data[a]);
                     }
                 }
             }
@@ -198,20 +210,28 @@ class TopKLayerCPUTest : public testing::WithParamInterface<TopKLayerCPUTestPara
         inputs.insert({funcInputs[0].get_node_shared_ptr(), tensor});
 
         if (!staticShape) {
-            const auto& kPrecision = funcInputs[1].get_element_type();
-            const auto& kShape = targetInputStaticShapes[1];
+            generate_dynamic_k(funcInputs, targetInputStaticShapes);
+        }
+    }
 
-            const size_t startFrom = 1;
-            const size_t range = targetInputStaticShapes[0][axis];
-            const size_t seed = inferRequestNum++;
-            const auto kTensor = ov::test::utils::create_and_fill_tensor(kPrecision, kShape, range, startFrom, 1, seed);
+private:
+    void generate_dynamic_k(const std::vector<ov::Output<ov::Node>>& funcInputs,
+                            const std::vector<ov::Shape>& targetInputStaticShapes) {
+        const auto& kPrecision = funcInputs[1].get_element_type();
+        const auto& kShape = targetInputStaticShapes[1];
 
-            inputs.insert({funcInputs[1].get_node_shared_ptr(), kTensor});
-        }
+        const size_t startFrom = 1;
+        const size_t range = targetInputStaticShapes[0][axis];
+        const size_t seed = inferRequestNum++;
+        const auto kTensor = ov::test::utils::create_and_fill_tensor(kPrecision, kShape, range, startFrom, 1, seed);
+
+        inputs.insert({funcInputs[1].get_node_shared_ptr(), kTensor});
     }
 
 private:
     int64_t axis;
+    SortType sort;
+    bool stable;
     size_t inferRequestNum = 0;
     ElementType netPrecision;
     bool staticShape;
@@ -236,14 +256,15 @@ std::vector<std::map<std::string, std::string>> additionalConfig = {
 const std::vector<int64_t> axes = {0, 1, 2, 3};
 const std::vector<int64_t> k = {1, 5, 7, 18, 21};
 
-const std::vector<ngraph::opset4::TopK::Mode> modes = {
-    ngraph::opset4::TopK::Mode::MIN,
-    ngraph::opset4::TopK::Mode::MAX
+const std::vector<SortMode> modes = {
+    SortMode::MIN,
+    SortMode::MAX
 };
 
-const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
-    ngraph::opset4::TopK::SortType::SORT_VALUES,
-    ngraph::opset4::TopK::SortType::SORT_INDICES,
+const std::vector<std::tuple<SortType, bool>> sortTypeStable = {
+    std::tuple<SortType, bool>{SortType::SORT_VALUES, false},
+    std::tuple<SortType, bool>{SortType::SORT_VALUES, true},
+    std::tuple<SortType, bool>{SortType::SORT_INDICES, false}
 };
 
 std::vector<ov::test::InputShape> inputShapes = {
@@ -266,7 +287,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK, TopKLayerCPUTest,
             ::testing::ValuesIn(k),
             ::testing::ValuesIn(axes),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -281,7 +302,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_dynamic, TopKLayerCPUTest,
             ::testing::Values(1),
             ::testing::ValuesIn(axes),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -306,7 +327,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_int32, TopKLayerCPUTest,
             ::testing::ValuesIn(k_int32),
             ::testing::ValuesIn(axes),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::Values(ElementType::i32),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -321,7 +342,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_int32_dynamic, TopKLayerCPUTest,
             ::testing::Values(1),
             ::testing::ValuesIn(axes),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::Values(ElementType::i32),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -344,7 +365,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_bubble_BLK_on_channel_horiz, TopKLayerCPUTest
             ::testing::Values(1),
             ::testing::Values(1),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -359,7 +380,7 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_bubble_BLK_on_channel_horiz_dynamic, TopKLaye
             ::testing::Values(1),
             ::testing::Values(1),
             ::testing::ValuesIn(modes),
-            ::testing::ValuesIn(sortTypes),
+            ::testing::ValuesIn(sortTypeStable),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -381,8 +402,8 @@ INSTANTIATE_TEST_CASE_P(smoke_Top1, TopKLayerCPUTest,
         ::testing::Combine(
             ::testing::Values(1),
             ::testing::Values(3),
-            ::testing::Values(ngraph::opset4::TopK::Mode::MAX),
-            ::testing::Values(ngraph::opset4::TopK::SortType::SORT_INDICES),
+            ::testing::Values(SortMode::MAX),
+            ::testing::Values(std::tuple<SortType, bool>(SortType::SORT_INDICES, false)),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
@@ -396,8 +417,8 @@ INSTANTIATE_TEST_CASE_P(smoke_Top1_dynamic, TopKLayerCPUTest,
         ::testing::Combine(
             ::testing::Values(1),
             ::testing::Values(3),
-            ::testing::Values(ngraph::opset4::TopK::Mode::MAX),
-            ::testing::Values(ngraph::opset4::TopK::SortType::SORT_INDICES),
+            ::testing::Values(SortMode::MAX),
+            ::testing::Values(std::tuple<SortType, bool>(SortType::SORT_INDICES, false)),
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(ElementType::undefined),
             ::testing::Values(ElementType::undefined),
diff --git a/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp b/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp
index e285a1b86d38d8..76a6be2b7790f7 100644
--- a/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp
@@ -80,6 +80,7 @@ CompareMap getCompareMap() {
 #include "openvino/opsets/opset8_tbl.hpp"
 #include "openvino/opsets/opset9_tbl.hpp"
 #include "openvino/opsets/opset10_tbl.hpp"
+#include "openvino/opsets/opset11_tbl.hpp"
 
 #include "ov_ops/opset_private_tbl.hpp"
 #undef _OPENVINO_OP_REG
diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
index 4a4d20c259b9a3..db7818d5c21731 100644
--- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
@@ -835,6 +835,7 @@ InputsMap getInputMap() {
 #include "openvino/opsets/opset8_tbl.hpp"
 #include "openvino/opsets/opset9_tbl.hpp"
 #include "openvino/opsets/opset10_tbl.hpp"
+#include "openvino/opsets/opset11_tbl.hpp"
 
 #include "ov_ops/opset_private_tbl.hpp"
 #undef _OPENVINO_OP_REG
diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
index 9a72ae31b321dc..73962af5e1202c 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -16,6 +16,8 @@
 #include <ngraph/opsets/opset7.hpp>
 #include <ngraph/opsets/opset8.hpp>
 #include <ngraph/opsets/opset9.hpp>
+#include <ngraph/opsets/opset10.hpp>
+#include <ngraph/opsets/opset11.hpp>
 
 #include "ngraph_functions/utils/data_utils.hpp"
 #include "openvino/core/partial_shape.hpp"

From fc88bed604fb53bfb2d4f36ae6e322394bfd1209 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi3.zhang@intel.com>
Date: Fri, 31 Mar 2023 17:05:43 +0800
Subject: [PATCH 193/296] [CPU] Improvement for NoneZero and Gather (#16641)

---
 .../common_optimizations/nop_elimination.cpp  | 25 +++++++++++++----
 .../common_optimizations/nop_elimination.cpp  | 28 +++++++++++++++++++
 src/plugins/intel_cpu/src/nodes/non_zero.cpp  | 27 +++++++-----------
 3 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
index 8ac258993f2085..786c37dea8235b 100644
--- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@@ -10,6 +10,7 @@
 #include <numeric>
 #include <openvino/core/validation_util.hpp>
 #include <openvino/opsets/opset3.hpp>
+#include <openvino/opsets/opset7.hpp>
 #include <openvino/opsets/opset8.hpp>
 #include <openvino/opsets/opset9.hpp>
 #include <openvino/pass/pattern/op/or.hpp>
@@ -25,7 +26,7 @@ using namespace ov;
 //`simplify_gather`, optimizes gather if Gather is gathering the
 // whole input tensor
 static bool simplify_gather(shared_ptr<Node> node) {
-    if (auto gather = ov::as_type_ptr<opset3::Gather>(node)) {
+    if (auto gather = ov::as_type_ptr<op::util::GatherBase>(node)) {
         // check if we are gathering the whole input
         auto data = gather->input_value(0);
         auto indices = gather->input_value(1);
@@ -34,10 +35,6 @@ static bool simplify_gather(shared_ptr<Node> node) {
         if (data.get_partial_shape().is_dynamic() || indices.get_partial_shape().is_dynamic()) {
             return false;
         }
-        // if rank of data and gather output dont match, we will skip
-        if (data.get_shape().size() != node->get_shape().size()) {
-            return false;
-        }
 
         auto axis = gather->get_axis();
         if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
@@ -45,6 +42,22 @@ static bool simplify_gather(shared_ptr<Node> node) {
             return false;
         }
 
+        if (data.get_shape().size() != node->get_shape().size()) {
+            auto constant_indices = ov::as_type_ptr<opset3::Constant>(gather->input_value(1).get_node_shared_ptr());
+            if (!constant_indices)
+                return false;
+            // case_3: if input_shape is (1,3,5,5) and axis = 0, indices = 0, then gather is just a Squeeze
+            const auto const_indices = constant_indices->cast_vector<int64_t>();
+            if (data.get_shape()[axis] == 1 && const_indices.size() == 1 && const_indices[0] == 0) {
+                auto squeeze = std::make_shared<opset8::Squeeze>(gather->input_value(0), gather->input_value(2));
+                squeeze->set_friendly_name(gather->get_friendly_name());
+                ov::copy_runtime_info(gather, squeeze);
+                ov::replace_node(gather, squeeze);
+                return true;
+            }
+            return false;
+        }
+
         // case_1 : if the input tensor is of shape (4, 1, 4)
         // and axis = 1, then the gather would be simply
         // gathering the whole input tensor, so we can optimize this
@@ -297,7 +310,7 @@ static bool eliminate_unsqueeze(const shared_ptr<Node>& node) {
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, eliminate_reshape_v1, opset3::Reshape);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, eliminate_unsqueeze, opset3::Unsqueeze);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, eliminate_nop, op::v1::Broadcast, op::v3::Broadcast);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, simplify_gather, opset3::Gather);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, simplify_gather, opset3::Gather, opset7::Gather, opset8::Gather);
 
 pass::EliminatePad::EliminatePad() {
     MATCHER_SCOPE(EliminatePad);
diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
index 77d595e621f812..8f8a20661de29a 100644
--- a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
+++ b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
@@ -1310,3 +1310,31 @@ TEST(SplitConcatElimination, no_sequence_found) {
     EXPECT_EQ(count_ops_of_type<ov::opset9::Split>(model), 1) << "SplitConcatElimination transformation has failed. "
                                                                  "The number of Split ops is not 1";
 }
+
+TEST(nop_elimination, gather_to_squeeze) {
+    auto generate_func = [](int64_t gather_axis) {
+        ov::Shape shape{3, 3, 4, 4};
+        shape[gather_axis] = 1;
+        auto arg = std::make_shared<op::Parameter>(element::f32, shape);
+        auto indices = op::Constant::create(element::i64, Shape{}, vector<int64_t>{0});
+        auto axis = op::Constant::create(element::i64, Shape{}, vector<int64_t>{gather_axis});
+        auto gather = std::make_shared<op::v8::Gather>(arg, indices, axis);
+        return std::make_shared<Function>(NodeVector{gather}, ParameterVector{arg});
+    };
+
+    auto func_axis_0 = generate_func(0);
+    auto func_axis_1 = generate_func(1);
+    auto func_axis_2 = generate_func(2);
+    auto func_axis_3 = generate_func(3);
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::NopElimination>();
+    auto run_and_check = [&](std::shared_ptr<Function>& func) {
+        pass_manager.run_passes(func);
+        EXPECT_EQ(count_ops_of_type<op::v8::Gather>(func), 0);
+        EXPECT_EQ(count_ops_of_type<op::v0::Squeeze>(func), 1);
+    };
+    run_and_check(func_axis_0);
+    run_and_check(func_axis_1);
+    run_and_check(func_axis_2);
+    run_and_check(func_axis_3);
+}
diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp
index 6ee8ad65e7d3ea..e430a2951770e6 100644
--- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp
+++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp
@@ -82,16 +82,6 @@ std::vector<size_t> NonZero::getNonZeroElementsCount(const T* src, const Shape&
         counts.push_back(count);
         break;
     }
-    case 1: {
-        size_t count = 0;
-        for (size_t i = 0; i < inSize; i++) {
-            if (src[i] != zero) {
-                count++;
-            }
-        }
-        counts.push_back(count);
-        break;
-    }
     default: {
         threadsCount = parallel_get_num_threads();
         if (inSize < blockSize * threadsCount)
@@ -174,13 +164,16 @@ void NonZero::executeSpecified() {
         dst[0] = 0;
         break;
     case 1: {
-        size_t outputIndex = 0;
-        for (int i = 0; i < srcDims[0]; ++i) {
-            if (src[i] != zero) {
-                dst[outputIndex] = i;
-                outputIndex++;
-            }
-        }
+        //if nonZeroCounts.size() > 1, then the 2nd round scan could run in parallel.
+        parallel_nt(threadsCount, [&](int ithr, int nthr){
+            size_t outputIndex = std::accumulate(nonZeroCounts.begin(), nonZeroCounts.begin() + ithr, 0);
+            for_1d(ithr, nthr, inShape.getElementsCount(), [&](size_t i) {
+                if (src[i] != zero) {
+                    dst[outputIndex] = i;
+                    outputIndex++;
+                }
+            });
+        });
         break;
     }
     case 2: {

From bb93bfd90f0a1a231b5b1dd28e1153c354b7f8c1 Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Fri, 31 Mar 2023 13:05:59 +0400
Subject: [PATCH 194/296] [GPU] Add clDNN shape agnostic kernels usage as an
 initial impls for dGPU (#16018)

* [GPU] Add clDNN shape agnostic kernels usage as an initial impls for dGPU

* [GPU] Use layout as a key of weights cache, implement logic for weights cache capacity calculation based on available memory
---
 .../intel_gpu/graph/kernel_impl_params.hpp    |  2 -
 .../include/intel_gpu/graph/network.hpp       |  3 +
 .../include/intel_gpu/runtime/layout.hpp      |  6 ++
 .../include/intel_gpu/runtime/lru_cache.hpp   | 15 +++-
 .../graph/graph_optimizer/compile_graph.cpp   | 19 ++++-
 .../src/graph/include/convolution_inst.h      |  6 +-
 .../src/graph/include/deconvolution_inst.h    |  6 +-
 .../src/graph/include/fully_connected_inst.h  |  8 ++-
 .../src/graph/include/primitive_inst.h        |  3 +
 src/plugins/intel_gpu/src/graph/network.cpp   | 50 +++++++++++++
 .../intel_gpu/src/graph/primitive_inst.cpp    | 56 +++++++++++----
 .../cl_kernels/reorder_weights.cl             |  2 +
 .../test_cases/fully_connected_gpu_test.cpp   | 68 ++++++++++++++++++
 .../tests/test_cases/gemm_gpu_test.cpp        | 72 +++++++++++++++++++
 14 files changed, 290 insertions(+), 26 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
index 7fac2eb0a1b11d..a962eb402c0a28 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
@@ -52,8 +52,6 @@ struct kernel_impl_params {
     std::map<size_t, memory::ptr> memory_deps = {};
     size_t primary_input_idx = 0;
 
-    memory::ptr reordered_weights = nullptr;
-
     kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {}
 
     kernel_impl_params(program& _prog,
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
index 8f8b72ca8e4f6d..6dd3f8338609b2 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -213,6 +213,7 @@ struct network {
     bool is_internal() const { return _internal; }
     bool is_primary_stream() const { return _is_primary_stream; }
     bool is_dynamic() const { return _is_dynamic; }
+    size_t get_weights_cache_capacity() const { return _weights_cache_capacity; }
 
     memory_pool& get_memory_pool() {
         return *_memory_pool;
@@ -254,6 +255,7 @@ struct network {
     std::vector<std::shared_ptr<primitive_inst>> _variable_state_primitives;
     program::primitives_info _prims_info;
     std::map<primitive_id, primitive_id> _ext_id_mapping;
+    size_t _weights_cache_capacity = 1;
 
     std::unordered_map<primitive_id, event::ptr> _events;
     output_chains_map _output_chains;
@@ -268,6 +270,7 @@ struct network {
     std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id) const;
     void check_names();
     void add_default_output_chains();
+    void calculate_weights_cache_capacity();
     output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);
 
     // Move from cldnn::program to cldnn::network for multi-threads issue.
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
index 431ff60c26a4ed..29a2357178d453 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
@@ -359,6 +359,12 @@ struct padding {
 /// @brief Describes memory layout.
 /// @details Contains information about data stored in @ref memory.
 struct layout {
+    struct Hasher {
+        size_t operator()(const layout &l) const {
+            return l.hash();
+        }
+    };
+
     /// Constructs layout based on @p data_type and @p size information described by @ref tensor
     layout(data_types data_type, cldnn::format fmt, tensor size, padding apadding = padding())
         : data_type(data_type)
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
index 6ef82e4421f18d..f9381eb003fe6d 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
@@ -34,7 +34,7 @@ class LruCache {
     /**
      * @brief Get the least recently used element with key and value pair in the cache
      *
-     * @return Value
+     * @return std::pair<Key, Value>
      */
     std::pair<Key, Value> get_lru_element() const {
         if (_lru_data_list.size()) {
@@ -49,7 +49,7 @@ class LruCache {
      *
      * @param key if same key is existed in the cache, the value of key is updated new entry.
      * @param value
-     * @return true, if cache is full and lease recently used entry are removed to add new entry.
+     * @return true, if cache is full and least recently used entry are removed to add new entry.
      * @return false Otherwise
      */
     bool add(const Key& key, const Value& value) {
@@ -123,6 +123,15 @@ class LruCache {
         return _capacity;
     }
 
+    /**
+     * @brief Return whether the cache is full or not
+     *
+     * @return true, if cache is full, false otherwise
+     */
+    size_t is_full() const {
+        return _lru_data_list.size() == _capacity;
+    }
+
     /**
      * @brief Get the all keys object
      *
@@ -154,7 +163,7 @@ class LruCache {
     }
 
     /**
-     * @brief Pop n lease recently used cache data.
+     * @brief Pop n least recently used cache data.
      *
      * @param n number of data to be popped
      */
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
index efa7043b630878..698c2ad909bc6f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@@ -34,7 +34,13 @@ void compile_graph::run(program& p) {
     std::exception_ptr exception;
     for (size_t idx = 0; idx < proc_order.size(); idx++) {
         auto& node = *(std::next(proc_order.begin(), idx));
-        bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape);
+        const bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape);
+        const impl_types original_impl_type = node->get_preferred_impl_type();
+        const bool change_initial_impl = node->is_dynamic() && original_impl_type == impl_types::onednn;
+
+        if (change_initial_impl)
+            node->set_preferred_impl_type(impl_types::ocl);
+
         bool can_select_impl = !node->is_type<data>() &&
                                !(node->is_type<mutable_data>() && node->get_dependencies().empty()) &&
                                (!node->is_dynamic() || (use_shape_agnostic_impl && node->type()->does_dynamic_implementation_exist(*node)));
@@ -66,13 +72,22 @@ void compile_graph::run(program& p) {
             can_select_impl = false;
 
         if (can_select_impl) {
-            tasks.push_back([node, &exception] {
+            tasks.push_back([node, &exception, change_initial_impl, original_impl_type] {
                 try {
                     node->selected_impl = node->type()->choose_impl(*node);
+                    if (change_initial_impl) {
+                        GPU_DEBUG_TRACE_DETAIL << node->id() << ": use " << node->get_preferred_impl_type()
+                                               << " as initial impl instead of " << original_impl_type << std::endl;
+                        node->set_preferred_impl_type(original_impl_type);
+                    }
                 } catch(...) {
                     exception = std::current_exception();
                 }
             });
+        } else {
+            if (change_initial_impl) {
+                node->set_preferred_impl_type(original_impl_type);
+            }
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
index 498d0b8d38ad06..01d691c33f3f13 100644
--- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
@@ -148,8 +148,10 @@ class typed_primitive_inst<convolution> : public typed_primitive_inst_base<convo
     typed_primitive_inst(network& network, convolution_node const& node);
 
     memory::ptr weights_memory() const {
-        if (is_dynamic() && _impl_params->reordered_weights != nullptr) {
-            return _impl_params->reordered_weights;
+        if (is_dynamic()) {
+            auto weights_mem = _reordered_weights_cache.get(*_impl_params->weights_layout);
+            OPENVINO_ASSERT(weights_mem != nullptr, "[GPU] Can't find proper weights memory buffer in cache");
+            return weights_mem;
         } else {  // all weights are in one buffer
             return dep_memory_ptr(1 + _deform_conv_dep_offset);
         }
diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
index afbd06a303e5ef..a35c43a58f38d0 100644
--- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h
@@ -80,8 +80,10 @@ class typed_primitive_inst<deconvolution> : public typed_primitive_inst_base<dec
     typed_primitive_inst(network& network, deconvolution_node const& node);
 
     memory::ptr weights_memory() const {
-        if (is_dynamic() && _impl_params->reordered_weights != nullptr) {
-            return _impl_params->reordered_weights;
+        if (is_dynamic()) {
+            auto weights_mem = _reordered_weights_cache.get(*_impl_params->weights_layout);
+            OPENVINO_ASSERT(weights_mem != nullptr, "[GPU] Can't find proper weights memory buffer in cache");
+            return weights_mem;
         } else {
             return dep_memory_ptr(1);
         }
diff --git a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h
index 3355a6812a58a1..f350cd8ef15b4b 100644
--- a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h
@@ -53,7 +53,13 @@ class typed_primitive_inst<fully_connected> : public typed_primitive_inst_base<f
     typed_primitive_inst(network& network, fully_connected_node const& node);
 
     memory::ptr weights_memory() const {
-        return is_dynamic() && _impl_params->reordered_weights != nullptr ? _impl_params->reordered_weights : dep_memory_ptr(1);
+        if (is_dynamic()) {
+            auto weights_mem = _reordered_weights_cache.get(*_impl_params->weights_layout);
+            OPENVINO_ASSERT(weights_mem != nullptr, "[GPU] Can't find proper weights memory buffer in cache");
+            return weights_mem;
+        } else {
+            return dep_memory_ptr(1);
+        }
     }
     memory::ptr bias_memory() const { return dep_memory_ptr(2); }
 
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index f3819713aa8581..ba347ae069d0bd 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -7,6 +7,7 @@
 #include "intel_gpu/primitives/concatenation.hpp"
 #include "intel_gpu/runtime/event.hpp"
 #include "intel_gpu/runtime/memory.hpp"
+#include "intel_gpu/runtime/lru_cache.hpp"
 #include "intel_gpu/graph/network.hpp"
 #include "intel_gpu/runtime/utils.hpp"
 #include "program_node.h"
@@ -267,6 +268,8 @@ class primitive_inst {
 
     std::vector<memory::cptr> _intermediates_memory;
 
+    mutable LruCache<layout, memory::ptr, layout::Hasher> _reordered_weights_cache;
+
     // Buffer to store actual shapes of dynamic tensor which is automatically asigned as 1st argument to shape agnostic kernels
     memory::ptr _shape_info_memory = nullptr;
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 0157286fb8df35..96d12795500ab9 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -21,6 +21,9 @@
 
 #include "primitive_inst.h"
 #include "input_layout_inst.h"
+#include "fully_connected_inst.h"
+#include "convolution_inst.h"
+#include "deconvolution_inst.h"
 #include "mutable_data_inst.h"
 #include "condition_inst.h"
 #include "loop_inst.h"
@@ -323,6 +326,7 @@ network::network(program::ptr program, const ExecutionConfig& config, stream::pt
         wait_for_the_turn();
     }
 
+    calculate_weights_cache_capacity();
     allocate_primitives();
     configure_primitives_second_output();
     check_names();
@@ -691,6 +695,52 @@ void network::add_default_output_chains() {
     }
 }
 
+void network::calculate_weights_cache_capacity() {
+    auto get_buffer_size = [](const program_node& node) {
+        size_t weights_size = 0;
+        auto get_size = [](const layout& layout) {
+            return layout.is_dynamic() ? 0 : layout.bytes_count();
+        };
+
+        #define is_weightable(T) node.is_type<T>() && node.as<T>().weights().is_constant()
+        if (node.is_type<data>())
+            weights_size = get_size(node.get_output_layout());
+        else if (is_weightable(fully_connected))
+            weights_size = get_size(node.as<fully_connected>().weights().get_output_layout());
+        else if (is_weightable(convolution))
+            weights_size = get_size(node.as<convolution>().weights().get_output_layout());
+        else if (is_weightable(deconvolution))
+            weights_size = get_size(node.as<deconvolution>().weights().get_output_layout());
+        #undef is_weightable
+
+        return weights_size;
+    };
+
+    size_t total_const_size = 0;
+    size_t weights_const_size = 0;
+    size_t required_mem_size = 0;
+    for (auto node : _program->get_processing_order()) {
+        if (node->is_type<fully_connected>() || node->is_type<convolution>() || node->is_type<deconvolution>())
+            weights_const_size += get_buffer_size(*node);
+        else if (node->is_type<data>())
+            total_const_size += get_buffer_size(*node);
+    }
+
+    // Sum all weights constants for each stream
+    required_mem_size += weights_const_size * _config.get_property(ov::streams::num);
+    // Add all other constants (shared between streams)
+    required_mem_size += total_const_size - weights_const_size;
+
+    if (required_mem_size != 0) {
+        const size_t required_weights_cache_capacity = 3;
+        const size_t max_device_mem_size = _engine.get_device_info().max_global_mem_size;
+        const size_t max_weights_cache_capacity = max_device_mem_size / required_mem_size;
+
+        if (max_weights_cache_capacity > 1)
+            _weights_cache_capacity = std::min(max_weights_cache_capacity, required_weights_cache_capacity);
+    }
+}
+
 network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<primitive_inst>& p_inst) {
     std::vector<std::shared_ptr<primitive_inst>> chain;
     std::stack<std::shared_ptr<const primitive_inst>> candidates;
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 0869965153e378..b447e93c201469 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -354,6 +354,14 @@ bool primitive_inst::update_impl() {
     if (!_node->is_type<data>() && !(_node->is_type<mutable_data>() && _node->get_dependencies().empty())) {
         // Update param if fake_alignment is available
         auto updated_params = _node->type()->get_fake_aligned_params(*_impl_params);
+        // Change weights layout of `updated_params` to original one to have valid information
+        // in _impl->_weights_reorder_params about required weights format after impl selection
+        if (_node->is_type<fully_connected>() || _node->is_type<convolution>() || _node->is_type<deconvolution>()) {
+            const auto weights_idx = _node->get_primitive()->input.size();
+            const auto original_weights_memory = dep_memory_ptr(weights_idx);
+            updated_params.weights_layout = optional_layout(original_weights_memory->get_layout());
+        }
+
         auto& cache = get_network().get_program()->get_implementations_cache();
         std::shared_ptr<primitive_impl> cached_impl = nullptr;
         {
@@ -558,6 +566,7 @@ primitive_inst::primitive_inst(network& network)
     , _impl(nullptr)
     , _dynamic_impl(nullptr)
     , _outputs({memory::ptr()})
+    , _reordered_weights_cache(network.get_weights_cache_capacity())
     , _output_changed(false)
     , _mem_allocated(false) {}
 
@@ -569,6 +578,7 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
     , _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr)
     , _dynamic_impl(nullptr)
     , _outputs({memory::ptr()})
+    , _reordered_weights_cache(network.get_weights_cache_capacity())
     , _output_changed(false)
     , _mem_allocated(allocate_memory)
     , _is_dynamic(node.is_dynamic() || node.generates_dynamic_output())
@@ -707,14 +717,19 @@ event::ptr primitive_inst::update_weights() {
         return nullptr;
 
     auto& weights_params = _impl->_weights_reorder_params;
-    bool requires_reorder = weights_params.engine != kernel_selector::GenericKernelParams::Engine::NONE &&
-                            (!_impl_params->reordered_weights || _impl_params->reordered_weights->get_layout() != from_weights_tensor(weights_params.dest));
-    if (requires_reorder) {
+    bool requires_reorder = weights_params.engine != kernel_selector::GenericKernelParams::Engine::NONE;
+
+    const auto weights_idx = _node->get_primitive()->input.size();
+    const auto original_weights_memory = dep_memory_ptr(weights_idx);
+    auto expected_layout = requires_reorder ? from_weights_tensor(weights_params.dest)
+                                            : original_weights_memory->get_layout();
+
+    // Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
+    expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
+
+    if (requires_reorder && !_reordered_weights_cache.has(expected_layout)) {
         GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(false);
-        auto weights_idx = _node->get_primitive()->input.size();
-        auto original_weights_memory = dep_memory_ptr(weights_idx);
         auto original_layout = original_weights_memory->get_layout();
-        layout expected_layout = from_weights_tensor(weights_params.dest);
         auto& engine = _network.get_engine();
 
         auto get_kernel_key = [&]() -> size_t {
@@ -729,12 +744,12 @@ event::ptr primitive_inst::update_weights() {
         auto& cache = get_network().get_in_mem_kernels_cache();
         if (cache.has(kernel_key)) {
             GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights (cached) from " << original_layout.to_short_string()
-                                    << " to " << expected_layout.to_short_string() << std::endl;
+                                   << " to " << expected_layout.to_short_string() << std::endl;
             GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
             kernel = cache.get(kernel_key);
         } else {
             GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string()
-                                    << " to " << expected_layout.to_short_string() << std::endl;
+                                   << " to " << expected_layout.to_short_string() << std::endl;
             auto& kernels_cache = get_network().get_program()->get_kernels_cache();
             auto kernels = kernels_cache.compile(*_impl_params, {weights_params.clKernel->code.kernelString});
             OPENVINO_ASSERT(kernels.size() == 1, "The output of kernel compile has issue");
@@ -744,18 +759,30 @@ event::ptr primitive_inst::update_weights() {
 
         auto& stream = get_network().get_stream();
 
-        bool can_reuse = _impl_params->reordered_weights != nullptr && _impl_params->reordered_weights->size() <= expected_layout.bytes_count();
+        bool can_reuse = false;
+        memory::ptr weights_memory = nullptr;
+        if (_reordered_weights_cache.is_full()) {
+            weights_memory = _reordered_weights_cache.get_lru_element().second;
+            can_reuse = weights_memory->size() <= expected_layout.bytes_count() && weights_memory != original_weights_memory;
+        }
+
         if (can_reuse) {
             GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory" << std::endl;
-            _impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout);
+            weights_memory = engine.reinterpret_buffer(*weights_memory, expected_layout);
         } else {
+            GPU_DEBUG_TRACE_DETAIL << id() << ": allocate weights memory" << std::endl;
             auto alloc_type = engine.get_preferred_memory_allocation_type();
-            _impl_params->reordered_weights = engine.allocate_memory(expected_layout, alloc_type);
+            weights_memory = engine.allocate_memory(expected_layout, alloc_type);
         }
 
+        _reordered_weights_cache.add(expected_layout, weights_memory);
+        _impl_params->weights_layout = optional_layout(expected_layout);
+        GPU_DEBUG_TRACE_DETAIL << id() << ": update weights cache: " << expected_layout.to_short_string() << " cache_size="
+                               << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl;
+
         kernel_arguments_data args;
         args.inputs.push_back(original_weights_memory);
-        args.outputs.push_back(_impl_params->reordered_weights);
+        args.outputs.push_back(weights_memory);
         stream.set_arguments(*kernel, weights_params.clKernel->params, args);
         auto ev = stream.enqueue_kernel(*kernel, weights_params.clKernel->params, args, {}, true);
 
@@ -767,9 +794,10 @@ event::ptr primitive_inst::update_weights() {
         return ev;
     } else {
         // If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
-        // incorrect memory buffer may be assigned, so reset cached weights for such case
+        // incorrect memory buffer may be assigned, so push front original memory in LRU cache
         if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
-            _impl_params->reordered_weights.reset();
+            _reordered_weights_cache.add(expected_layout, original_weights_memory);
+            _impl_params->weights_layout = optional_layout(expected_layout);
         }
     }
     GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl
index 708c06f8601084..37e32b62929833 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl
@@ -29,6 +29,8 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
     return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 16);
 #elif defined INPUT0_LAYOUT_OS_I_OSV8__AI8
     return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 8);
+#elif defined INPUT0_LAYOUT_OS_IYX_OSV32
+    return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
 #elif defined INPUT0_LAYOUT_IYX_OSV32
     return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
 #elif defined INPUT0_LAYOUT_OS_IYX_OSV32__AI32
diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
index 16f4f6f446867e..7d3e4985f78745 100644
--- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp
@@ -10,6 +10,7 @@
 #include <intel_gpu/primitives/quantize.hpp>
 #include <intel_gpu/primitives/data.hpp>
 
+#include "compilation_context.hpp"
 #include "fully_connected_inst.h"
 
 #include <cmath>
@@ -1656,6 +1657,73 @@ INSTANTIATE_TEST_SUITE_P(
 );
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
+TEST(fully_connected_onednn, impl_replacement_with_cldnn) {
+    auto& engine = get_test_engine();
+
+    if (!engine.get_device_info().supports_immad)
+        return;
+
+    const int32_t input_f = 3, input_b = 1, weight_b = 4;
+
+    auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
+    auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f }, data_types::f32,format::bfyx });
+    auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f }, data_types::f32,format::bfyx });
+
+    set_values(input_data, { -0.5f, 2.0f, 0.5f });
+    set_values(weights_data, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
+
+    cldnn::topology topology{
+        input_layout("input", input_dyn_layout),
+        data("weights", weights_data),
+        fully_connected("fc", input_info("input"), "weights")
+    };
+
+    ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
+    ExecutionConfig cfg{ ov::intel_gpu::queue_type(QueueTypes::in_order),
+                         ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }),
+                         ov::intel_gpu::optimize_data(true),
+                         ov::intel_gpu::allow_new_shape_infer(true) };
+
+    network network(engine, topology, cfg);
+    network.set_input_data("input", input_data);
+
+    // Check if shape agnostic kernel is used as default impl or not
+    auto inst = network.get_primitive("fc");
+    auto impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_TRUE(impl->is_dynamic());
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "fc");
+
+    auto output_prim_mem = outputs.begin()->second.get_memory();
+
+    auto out_l = network.get_output_layout(outputs.begin()->first);
+    ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
+    ASSERT_EQ(out_l.batch(), input_b);
+    ASSERT_EQ(out_l.feature(), weight_b);
+    ASSERT_EQ(out_l.spatial(0), 1);
+    ASSERT_EQ(out_l.spatial(1), 1);
+
+    cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
+
+    ASSERT_EQ(1.5f, output_ptr[0]);
+    ASSERT_EQ(0.75f, output_ptr[1]);
+    ASSERT_EQ(-2.25f, output_ptr[2]);
+    ASSERT_EQ(3.0f, output_ptr[3]);
+
+    // WA: Call cancel() to wait for all queued kernels compilation finish
+    network.get_program()->get_compilation_context().cancel();
+
+    // Check if OneDNN's impl is used for the next execute() call
+    network.execute();
+    inst = network.get_primitive("fc");
+    impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_FALSE(impl->is_dynamic());
+}
+
 TEST(fully_connected_onednn_gpu, no_biases_int8) {
     //  Input  : 3x1
     //  Output : 4x1
diff --git a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
index d2d31bd2b5dfd1..702aff8e70fbb6 100644
--- a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
@@ -9,6 +9,7 @@
 #include <intel_gpu/primitives/crop.hpp>
 #include "ngraph/runtime/reference/matmul.hpp"
 
+#include "compilation_context.hpp"
 #include "gemm_inst.h"
 
 #include <cstddef>
@@ -1483,6 +1484,77 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_onednn_ndims, ::testing::ValuesIn(std::v
     gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_6D },
 }));
 
+TEST(gemm_onednn, impl_replacement_with_cldnn) {
+    auto& engine = get_test_engine();
+
+    if (!engine.get_device_info().supports_immad)
+        return;
+
+    ov::Shape in1_shape = { 1, 1, 3, 4 };
+    ov::Shape in2_shape = { 1, 4 };
+    auto in1_layout = layout{ov::PartialShape::dynamic(in1_shape.size()), data_types::f32, format::bfyx};
+    auto in2_layout = layout{ov::PartialShape::dynamic(in2_shape.size()), data_types::f32, format::bfyx};
+    auto input1 = engine.allocate_memory(layout{ov::PartialShape(in1_shape), data_types::f32, format::bfyx});
+    auto input2 = engine.allocate_memory(layout{ov::PartialShape(in2_shape), data_types::f32, format::bfyx});
+
+    std::vector<float> input1_data = {
+        1.f, -2.f, 3.f, -4.f,
+        5.f, 6.f, 1.f, 2.f,
+        3.f, 3.f, 2.f, -1.f,
+    };
+
+    std::vector<float> input2_data = {
+        2.f, 5.f, -4.f, -7.f,
+    };
+    set_values(input1, input1_data);
+    set_values(input2, input2_data);
+
+    std::vector<float> out_data = {
+        8.f, 22.f, 20.f
+    };
+
+    topology topology;
+    topology.add(input_layout("input1", in1_layout),
+                 input_layout("input2", in2_layout),
+                 gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, true, 1.0f, 0.0f, 4, 2)
+    );
+
+    ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
+    ExecutionConfig cfg{ ov::intel_gpu::queue_type(QueueTypes::in_order),
+                         ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm", fc_impl} }),
+                         ov::intel_gpu::optimize_data(true),
+                         ov::intel_gpu::allow_new_shape_infer(true) };
+
+    network network(engine, topology, cfg);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+
+    auto inst = network.get_primitive("gemm");
+    auto impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_TRUE(impl->is_dynamic());
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gemm").get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    ASSERT_EQ(output_ptr.size(), (uint32_t)3);
+    for (uint32_t i = 0; i < out_data.size(); ++i) {
+        ASSERT_FLOAT_EQ(output_ptr[i], out_data[i]);
+    }
+
+    // WA: Call cancel() to wait for all queued kernels compilation finish
+    network.get_program()->get_compilation_context().cancel();
+
+    // Check if OneDNN's impl is used for the next execute() call
+    network.execute();
+    inst = network.get_primitive("gemm");
+    impl = inst->get_impl();
+    ASSERT_TRUE(impl != nullptr);
+    ASSERT_FALSE(impl->is_dynamic());
+}
+
 class gemm_int8_simple_tests_onednn : public ::GemmBaseTest<gemm_base_test_params, int8_t, int8_t, float, float, int32_t> {};
 TEST_P(gemm_int8_simple_tests_onednn, basic) { auto p = GetParam(); execute(p); }
 

From f9ff518d16f1bba4e5145ce39d7e96d852665505 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Fri, 31 Mar 2023 11:26:04 +0200
Subject: [PATCH 195/296] DOCS shift to rst - `Model Optimization Guide`
 articles (#16598)

---
 .../images}/DEVELOPMENT_FLOW_V3_crunch.svg    |  0
 docs/{img => _static/images}/WHAT_TO_USE.svg  |  0
 .../images}/quantization_picture.svg          |  0
 .../_static}/images/workflow_simple.svg       |  0
 .../model_optimization_guide.md               | 28 +++---
 docs/optimization_guide/ptq_introduction.md   | 14 +--
 tools/pot/docs/DefaultQuantizationUsage.md    | 12 ++-
 tools/pot/docs/Introduction.md                | 89 +++++++++++++------
 8 files changed, 97 insertions(+), 46 deletions(-)
 rename docs/{img => _static/images}/DEVELOPMENT_FLOW_V3_crunch.svg (100%)
 rename docs/{img => _static/images}/WHAT_TO_USE.svg (100%)
 rename docs/{img => _static/images}/quantization_picture.svg (100%)
 rename {tools/pot/docs => docs/_static}/images/workflow_simple.svg (100%)

diff --git a/docs/img/DEVELOPMENT_FLOW_V3_crunch.svg b/docs/_static/images/DEVELOPMENT_FLOW_V3_crunch.svg
similarity index 100%
rename from docs/img/DEVELOPMENT_FLOW_V3_crunch.svg
rename to docs/_static/images/DEVELOPMENT_FLOW_V3_crunch.svg
diff --git a/docs/img/WHAT_TO_USE.svg b/docs/_static/images/WHAT_TO_USE.svg
similarity index 100%
rename from docs/img/WHAT_TO_USE.svg
rename to docs/_static/images/WHAT_TO_USE.svg
diff --git a/docs/img/quantization_picture.svg b/docs/_static/images/quantization_picture.svg
similarity index 100%
rename from docs/img/quantization_picture.svg
rename to docs/_static/images/quantization_picture.svg
diff --git a/tools/pot/docs/images/workflow_simple.svg b/docs/_static/images/workflow_simple.svg
similarity index 100%
rename from tools/pot/docs/images/workflow_simple.svg
rename to docs/_static/images/workflow_simple.svg
diff --git a/docs/optimization_guide/model_optimization_guide.md b/docs/optimization_guide/model_optimization_guide.md
index ca2d463227cacf..5936cf9c9b7563 100644
--- a/docs/optimization_guide/model_optimization_guide.md
+++ b/docs/optimization_guide/model_optimization_guide.md
@@ -1,21 +1,17 @@
- # Model Optimization Guide {#openvino_docs_model_optimization_guide}
+# Model Optimization Guide {#openvino_docs_model_optimization_guide}
 
 @sphinxdirective
 
 .. toctree::
    :maxdepth: 1
    :hidden:
-   
+
    ptq_introduction
    tmo_introduction
    (Experimental) Protecting Model <pot_ranger_README>
 
-@endsphinxdirective
-
 
- Model optimization is an optional offline step of improving final model performance by applying special optimization methods, such as quantization, pruning, preprocessing optimization, etc. OpenVINO provides several tools to optimize models at different steps of model development:
-
-@sphinxdirective
+Model optimization is an optional offline step of improving final model performance by applying special optimization methods, such as quantization, pruning, preprocessing optimization, etc. OpenVINO provides several tools to optimize models at different steps of model development:
 
 - :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` implements most of the optimization parameters to a model by default. Yet, you are free to configure mean/scale values, batch size, RGB vs BGR input channels, and other parameters to speed up preprocess of a model (:doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_Additional_Optimization_Use_Cases>`).
 
@@ -23,25 +19,29 @@
 
 - :doc:`Training-time Optimization <nncf_ptq_introduction>`, a suite of advanced methods for training-time model optimization within the DL framework, such as PyTorch and TensorFlow 2.x. It supports methods, like Quantization-aware Training and Filter Pruning. NNCF-optimized models can be inferred with OpenVINO using all the available workflows.
 
-@endsphinxdirective
 
+Detailed workflow:
+##################
 
-## Detailed workflow: 
 To understand which development optimization tool you need, refer to the diagram:
 
-![](../img/DEVELOPMENT_FLOW_V3_crunch.svg)
+.. image:: _static/images/DEVELOPMENT_FLOW_V3_crunch.svg
 
 Post-training methods are limited in terms of achievable accuracy-performance trade-off for optimizing models. In this case, training-time optimization with NNCF is an option.
 
 Once the model is optimized using the aforementioned tools it can be used for inference using the regular OpenVINO inference workflow. No changes to the inference code are required.
 
-![](../img/WHAT_TO_USE.svg)
+.. image:: _static/images/WHAT_TO_USE.svg
 
 Post-training methods are limited in terms of achievable accuracy, which may degrade for certain scenarios.  In such cases, training-time optimization with NNCF may give better results.
 
 Once the model has been optimized using the aforementioned tools, it can be used for inference using the regular OpenVINO inference workflow. No changes to the code are required.
 
-If you are not familiar with model optimization methods, refer to [post-training methods](@ref pot_introduction).
+If you are not familiar with model optimization methods, refer to :doc:`post-training methods <pot_introduction>`.
+
+Additional Resources
+####################
 
-## Additional Resources
-- [Deployment optimization](./dldt_deployment_optimization_guide.md)
\ No newline at end of file
+- :doc:`Deployment optimization <openvino_docs_deployment_optimization_guide_dldt_optimization_guide>`
+
+@endsphinxdirective
diff --git a/docs/optimization_guide/ptq_introduction.md b/docs/optimization_guide/ptq_introduction.md
index acf2fa3ec23b56..1f218cb780c004 100644
--- a/docs/optimization_guide/ptq_introduction.md
+++ b/docs/optimization_guide/ptq_introduction.md
@@ -9,23 +9,27 @@
    pot_introduction
    nncf_ptq_introduction
 
-@endsphinxdirective
 
 Post-training model optimization is the process of applying special methods that transform the model into a more hardware-friendly representation without retraining or fine-tuning. The most popular and widely-spread method here is 8-bit post-training quantization because it is:
+
 * It is easy-to-use.
 * It does not hurt accuracy a lot.
 * It provides significant performance improvement.
 * It suites many hardware available in stock since most of them support 8-bit computation natively.
 
-8-bit integer quantization lowers the precision of weights and activations to 8 bits, which leads to almost 4x reduction in the model footprint and significant improvements in inference speed, mostly due to lower throughput required for the inference. This lowering step is done offline, before the actual inference, so that the model gets transformed into the quantized representation. The process does not require a training dataset or a training pipeline in the source DL framework. 
+8-bit integer quantization lowers the precision of weights and activations to 8 bits, which leads to almost 4x reduction in the model footprint and significant improvements in inference speed, mostly due to lower throughput required for the inference. This lowering step is done offline, before the actual inference, so that the model gets transformed into the quantized representation. The process does not require a training dataset or a training pipeline in the source DL framework.
 
-![](../img/quantization_picture.svg)
+.. image:: _static/images/quantization_picture.svg
 
 To apply post-training methods in OpenVINO, you need:
+
 * A floating-point precision model, FP32 or FP16, converted into the OpenVINO Intermediate Representation (IR) format that can be run on CPU.
 * A representative calibration dataset, representing a use case scenario, for example, of 300 samples.
 * In case of accuracy constraints, a validation dataset and accuracy metrics should be available.
 
 Currently, OpenVINO provides two workflows with post-training quantization capabilities:
-* [Post-training Quantization with POT](@ref pot_introduction) - works with models in OpenVINO Intermediate Representation (IR) only.
-* [Post-training Quantization with NNCF](@ref nncf_ptq_introduction) - cross-framework solution for model optimization that provides a new simple API for post-training quantization.
\ No newline at end of file
+
+* :doc:`Post-training Quantization with POT <pot_introduction>` - works with models in OpenVINO Intermediate Representation (IR) only.
+* :doc:`Post-training Quantization with NNCF <nncf_ptq_introduction>` - cross-framework solution for model optimization that provides a new simple API for post-training quantization.
+
+@endsphinxdirective
diff --git a/tools/pot/docs/DefaultQuantizationUsage.md b/tools/pot/docs/DefaultQuantizationUsage.md
index ac7a2e114d3d29..a30183a2fc37cc 100644
--- a/tools/pot/docs/DefaultQuantizationUsage.md
+++ b/tools/pot/docs/DefaultQuantizationUsage.md
@@ -1,5 +1,6 @@
 # Quantizing Models {#pot_default_quantization_usage}
 
+
 @sphinxdirective
 
 .. toctree::
@@ -9,7 +10,10 @@
    DefaultQuantization Method <pot_compression_algorithms_quantization_default_README>
 
 
-This guide describes how to apply model quantization with the Default Quantization method without accuracy control, using an unannotated dataset. To use this method, create a Python script using an API of Post-Training Optimization Tool (POT) and implement data preparation logic and quantization pipeline. If you are not familiar with Python, try :doc:`command-line interface <pot_compression_cli_README>` of POT which is designed to quantize models from OpenVINO `Model Zoo <https://github.com/openvinotoolkit/open_model_zoo>`__. The figure below shows the common workflow of the quantization script implemented with POT API.
+This guide describes how to apply model quantization with the Default Quantization method without accuracy control, using an unannotated dataset. 
+To use this method, create a Python script using an API of Post-Training Optimization Tool (POT) and implement data preparation logic and quantization pipeline. 
+If you are not familiar with Python, try :doc:`command-line interface <pot_compression_cli_README>` of POT which is designed to quantize models from 
+OpenVINO `Model Zoo <https://github.com/openvinotoolkit/open_model_zoo>`__. The figure below shows the common workflow of the quantization script implemented with POT API.
 
 .. image:: _static/images/default_quantization_flow.svg
 
@@ -26,10 +30,13 @@ In most cases, it is required to implement only the ``openvino.tools.pot.DataLoa
 
 * The ``__len__()``, returns the size of the dataset.
 * The ``__getitem__()``, provides access to the data by index in range of 0 to ``len(self)``. It can also encapsulate the logic of model-specific pre-processing. This method should return data in the ``(data, annotation)`` format, in which:
+
   * The ``data`` is the input that is passed to the model at inference so that it should be properly preprocessed. It can be either the ``numpy.array`` object or a dictionary, where the key is the name of the model input and value is ``numpy.array`` which corresponds to this input.
   * The ``annotation`` is not used by the Default Quantization method. Therefore, this object can be ``None`` in this case.
 
-Framework data loading classes can be wrapped by the ``openvino.tools.pot.DataLoader`` interface which is usually straightforward. For example, the ``torch.utils.data.Dataset`` has a similar interface as the ``openvino.tools.pot.DataLoader``, so that its TorchVision implementations can be easily wrapped by POT API.
+Framework data loading classes can be wrapped by the ``openvino.tools.pot.DataLoader`` interface which is usually straightforward. 
+For example, the ``torch.utils.data.Dataset`` has a similar interface as the ``openvino.tools.pot.DataLoader``, 
+so that its TorchVision implementations can be easily wrapped by POT API.
 
 .. note::
 
@@ -180,4 +187,5 @@ Examples
   * :doc:`Quantization of Face Detection model <pot_example_face_detection_README>`
   * :doc:`Quantization of speech model for GNA device <pot_example_speech_README>`
 
+
 @endsphinxdirective
diff --git a/tools/pot/docs/Introduction.md b/tools/pot/docs/Introduction.md
index a92b2839f2ea09..d36524bc090cae 100644
--- a/tools/pot/docs/Introduction.md
+++ b/tools/pot/docs/Introduction.md
@@ -14,46 +14,85 @@
    Examples <pot_examples_description>
    pot_docs_FrequentlyAskedQuestions
 
-@endsphinxdirective
 
 
-For the needs of post-training optimization, OpenVINO&trade; provides a **Post-training Optimization Tool (POT)** which supports the **uniform integer quantization** method. This method allows moving from floating-point precision to integer precision (for example, 8-bit) for weights and activations during the inference time. It helps to reduce the model size, memory footprint and latency, as well as improve the computational efficiency, using integer arithmetic. During the quantization process the model undergoes the transformation process when additional operations, that contain quantization information, are inserted into the model. The actual transition to integer arithmetic happens at model inference.
+For the needs of post-training optimization, OpenVINO&trade; provides a **Post-training Optimization Tool (POT)** 
+which supports the **uniform integer quantization** method. This method allows moving from floating-point precision 
+to integer precision (for example, 8-bit) for weights and activations during the inference time. It helps to reduce 
+the model size, memory footprint and latency, as well as improve the computational efficiency, using integer arithmetic. 
+During the quantization process the model undergoes the transformation process when additional operations, that contain 
+quantization information, are inserted into the model. The actual transition to integer arithmetic happens at model inference.
+
+The post-training quantization algorithm takes samples from the representative dataset, inputs them into the network, 
+and calibrates the network based on the resulting weights and activation values. Once calibration is complete, 
+values in the network are converted to 8-bit integer format.
+
+While post-training quantization makes your model run faster and take less memory, it may cause a slight reduction 
+in accuracy. If you performed post-training quantization on your model and find that it isn’t accurate enough, 
+try using :doc:`Quantization-aware Training <qat_introduction>` to increase its accuracy.
+
+
+| **Post-Training Quantization Quick Start Examples:**
+| Try out these interactive Jupyter Notebook examples to learn the POT API and see post-training quantization in action:
+
+* `Quantization of Image Classification Models with POT <https://docs.openvino.ai/nightly/notebooks/113-image-classification-quantization-with-output.html>`__.
+* `Object Detection Quantization with POT <https://docs.openvino.ai/nightly/notebooks/111-detection-quantization-with-output.html>`__.
+
 
-The post-training quantization algorithm takes samples from the representative dataset, inputs them into the network, and calibrates the network based on the resulting weights and activation values. Once calibration is complete, values in the network are converted to 8-bit integer format.
 
-While post-training quantization makes your model run faster and take less memory, it may cause a slight reduction in accuracy. If you performed post-training quantization on your model and find that it isn’t accurate enough, try using [Quantization-aware Training](@ref qat_introduction) to increase its accuracy.
+Quantizing Models with POT
+####################################### 
 
+The figure below shows the post-training quantization workflow with POT. In a typical workflow, a pre-trained 
+model is converted to OpenVINO IR format using Model Optimizer. Then, the model is quantized with a representative dataset using POT.
 
-### Post-Training Quantization Quick Start Examples
-Try out these interactive Jupyter Notebook examples to learn the POT API and see post-training quantization in action:
+.. image:: _static/images/workflow_simple.svg
+   :alt: OVMS Benchmark Setup Diagram
 
-* [Quantization of Image Classification Models with POT](https://docs.openvino.ai/latest/notebooks/113-image-classification-quantization-with-output.html).
-* [Object Detection Quantization with POT](https://docs.openvino.ai/latest/notebooks/111-detection-quantization-with-output.html).
 
-## Quantizing Models with POT
-The figure below shows the post-training quantization workflow with POT. In a typical workflow, a pre-trained model is converted to OpenVINO IR format using Model Optimizer. Then, the model is quantized with a representative dataset using POT.
+Post-training Quantization Methods
++++++++++++++++++++++++++++++++++++++++
 
+Depending on your needs and requirements, POT provides two quantization methods that can be used: 
+Default Quantization and Accuracy-aware Quantization.
 
-![](./images/workflow_simple.svg)
 
+Default Quantization
+---------------------------------------
 
-### Post-training Quantization Methods
-Depending on your needs and requirements, POT provides two quantization methods that can be used: Default Quantization and Accuracy-aware Quantization.
+Default Quantization uses an unannotated dataset to perform quantization. It uses representative 
+dataset items to estimate the range of activation values in a network and then quantizes the network. 
+This method is recommended to start with, because it results in a fast and accurate model in most cases. 
+To quantize your model with Default Quantization, see the :doc:`Quantizing Models <pot_default_quantization_usage>` page.
 
-#### Default Quantization
-Default Quantization uses an unannotated dataset to perform quantization. It uses representative dataset items to estimate the range of activation values in a network and then quantizes the network. This method is recommended to start with, because it results in a fast and accurate model in most cases. To quantize your model with Default Quantization, see the [Quantizing Models](@ref pot_default_quantization_usage) page.
+Accuracy-aware Quantization
+---------------------------------------
 
-#### Accuracy-aware Quantization
-Accuracy-aware Quantization is an advanced method that maintains model accuracy within a predefined range by leaving some network layers unquantized. It uses a trade-off between speed and accuracy to meet user-specified requirements. This method requires an annotated dataset and may require more time for quantization. To quantize your model with Accuracy-aware Quantization, see the [Quantizing Models with Accuracy Control](@ref pot_accuracyaware_usage) page.
+Accuracy-aware Quantization is an advanced method that maintains model accuracy within a predefined 
+range by leaving some network layers unquantized. It uses a trade-off between speed and accuracy to meet 
+user-specified requirements. This method requires an annotated dataset and may require more time for quantization. 
+To quantize your model with Accuracy-aware Quantization, see the :doc:`Quantizing Models with Accuracy Control <pot_accuracyaware_usage>` page.
 
-### Quantization Best Practices and FAQs
-If you quantized your model and it isn’t accurate enough, visit the [Quantization Best Practices](@ref pot_docs_BestPractices) page for tips on improving quantized performance. Sometimes, older Intel CPU generations can encounter a saturation issue when running quantized models that can cause reduced accuracy: learn more on the [Saturation Issue Workaround](@ref pot_saturation_issue) page.
+Quantization Best Practices and FAQs
++++++++++++++++++++++++++++++++++++++++
 
-Have more questions about post-training quantization or encountering errors using POT? Visit the [POT FAQ](@ref pot_docs_FrequentlyAskedQuestions) page for answers to frequently asked questions and solutions to common errors.
+If you quantized your model and it isn’t accurate enough, visit the :doc:`Quantization Best Practices <pot_docs_BestPractices>` 
+page for tips on improving quantized performance. Sometimes, older Intel CPU generations can encounter a saturation issue when 
+running quantized models that can cause reduced accuracy: learn more on the :doc:`Saturation Issue Workaround <pot_saturation_issue>` page.
 
-## Additional Resources
+Have more questions about post-training quantization or encountering errors using POT? Visit the 
+:doc:`POT FAQ <pot_docs_FrequentlyAskedQuestions>` page for answers to frequently asked questions and solutions to common errors.
+
+
+
+Additional Resources
+#######################################
+
+* :doc:`Post-training Quantization Examples <pot_examples_description>`
+* :doc:`Quantization Best Practices <pot_docs_BestPractices>`
+* :doc:`Post-training Optimization Tool FAQ <pot_docs_FrequentlyAskedQuestions>`
+* :doc:`Performance Benchmarks <openvino_docs_performance_benchmarks>`
+
+
+@endsphinxdirective
 
-* [Post-training Quantization Examples](@ref pot_examples_description)
-* [Quantization Best Practices](@ref pot_docs_BestPractices)
-* [Post-training Optimization Tool FAQ](@ref pot_docs_FrequentlyAskedQuestions)
-* [Performance Benchmarks](@ref openvino_docs_performance_benchmarks_openvino)

From 6d1e5d336d7b079258f94913b2baf591beda6da8 Mon Sep 17 00:00:00 2001
From: Egor Duplenskii <egor.duplensky@gmail.com>
Date: Fri, 31 Mar 2023 12:29:18 +0200
Subject: [PATCH 196/296] [CPU] Enable execution_mode API property (#16367)

---
 src/plugins/intel_cpu/src/config.cpp          | 41 +++++++---
 src/plugins/intel_cpu/src/config.h            |  3 +
 src/plugins/intel_cpu/src/exec_network.cpp    |  3 +
 src/plugins/intel_cpu/src/plugin.cpp          | 81 ++++++++++---------
 src/plugins/intel_cpu/src/plugin.h            |  2 +-
 .../intel_cpu/src/transformation_pipeline.h   |  4 +-
 .../ov_executable_network/get_metric.cpp      | 81 ++++++++++++++++++-
 .../behavior/ov_plugin/core_integration.cpp   | 72 ++++++++++++++++-
 8 files changed, 231 insertions(+), 56 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index cc0f8d621fc887..50844e7557135c 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -24,6 +24,7 @@ namespace ov {
 namespace intel_cpu {
 
 using namespace InferenceEngine;
+using namespace dnnl::impl::cpu::x64;
 
 Config::Config() {
     // this is default mode
@@ -46,7 +47,7 @@ Config::Config() {
         }
     #endif
 
-    if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
+    if (!mayiuse(avx512_core_bf16))
         enforceBF16 = false;
 
     CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
@@ -158,9 +159,14 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 lpTransformsMode = LPTransformsMode::On;
             else
                 IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE;
+        } else if (key == ov::device::id.name()) {
+            device_id = val;
+            if (!device_id.empty()) {
+                IE_THROW() << "CPU plugin supports only '' as device id";
+            }
         } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
             if (val == PluginConfigParams::YES) {
-                if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
+                if (mayiuse(avx512_core)) {
                     enforceBF16 = true;
                 } else {
                     IE_THROW() << "Platform doesn't support BF16 format";
@@ -171,14 +177,10 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                     << ". Expected only YES/NO";
             }
-        } else if (key == ov::device::id.name()) {
-            device_id = val;
-            if (!device_id.empty()) {
-                IE_THROW() << "CPU plugin supports only '' as device id";
-            }
+            inferencePrecisionSetExplicitly = true;
         } else if (key == ov::hint::inference_precision.name()) {
             if (val == "bf16") {
-                if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
+                if (mayiuse(avx512_core)) {
                     enforceBF16 = true;
                 } else {
                     IE_THROW() << "Platform doesn't support BF16 format";
@@ -189,6 +191,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 IE_THROW() << "Wrong value for property key " << ov::hint::inference_precision.name()
                     << ". Supported values: bf16, f32";
             }
+            inferencePrecisionSetExplicitly = true;
         } else if (PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) {
             int val_i = -1;
             try {
@@ -220,10 +223,28 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             else
                 IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_SNIPPETS_MODE
                             << ". Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
+        } else if (key == ov::hint::execution_mode.name()) {
+            if (val == "PERFORMANCE") {
+                executionMode = ov::hint::ExecutionMode::PERFORMANCE;
+            } else if (val == "ACCURACY") {
+                executionMode = ov::hint::ExecutionMode::ACCURACY;
+            } else {
+                IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
+                    << ". Supported values: PERFORMANCE, ACCURACY";
+            }
         } else {
             IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
         }
     }
+    // apply execution mode after all the params are handled to prevent possible conflicts
+    // when both execution_mode and inference_precision are specified
+    if (!inferencePrecisionSetExplicitly) {
+        if (executionMode == ov::hint::ExecutionMode::PERFORMANCE && (mayiuse(avx512_core_bf16))) {
+            enforceBF16 = true;
+        } else {
+            enforceBF16 = false;
+        }
+    }
 
     if (!prop.empty())
         _config.clear();
@@ -277,15 +298,17 @@ void Config::updateProperties() {
     IE_SUPPRESS_DEPRECATED_START
         _config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
     IE_SUPPRESS_DEPRECATED_END;
+
     if (enforceBF16) {
         _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
     } else {
         _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
     }
+
     _config.insert({ PluginConfigParams::KEY_PERFORMANCE_HINT, perfHintsConfig.ovPerfHint });
     _config.insert({ PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS,
             std::to_string(perfHintsConfig.ovPerfHintNumRequests) });
 }
 
-}   // namespace intel_cpu
+}  // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 0f22ce45bb1484..319ed1d0cb0767 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -10,6 +10,7 @@
 #include <openvino/runtime/properties.hpp>
 #include <openvino/util/common_util.hpp>
 #include "utils/debug_caps_config.h"
+#include "openvino/runtime/properties.hpp"
 
 #include <bitset>
 #include <string>
@@ -61,6 +62,8 @@ struct Config {
     LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
     bool enforceBF16 = false;
 #endif
+    bool inferencePrecisionSetExplicitly = false;
+    ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE;
 
     DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep;
 
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index ed9f4e01e790a7..e1e86bc4031256 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -311,6 +311,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::enable_profiling.name()),
             RO_property(ov::hint::inference_precision.name()),
             RO_property(ov::hint::performance_mode.name()),
+            RO_property(ov::hint::execution_mode.name()),
             RO_property(ov::hint::num_requests.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
             RO_property(ov::hint::use_hyper_threading.name()),
@@ -360,6 +361,8 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::hint::use_hyper_threading.name()) {
         const bool use_ht = config.useHyperThreading;
         return decltype(ov::hint::use_hyper_threading)::value_type(use_ht);
+    } else if (name == ov::hint::execution_mode) {
+        return config.executionMode;
     } else if (name == ov::hint::num_requests) {
         const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests;
         return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 9b0a3705bcc77d..9edd708453e804 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -4,6 +4,7 @@
 
 #include "ie_metric_helpers.hpp" // must be included first
 
+#include "openvino/runtime/properties.hpp"
 #include "plugin.h"
 
 #include "transformation_pipeline.h"
@@ -405,6 +406,40 @@ StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBinding
     return stream_cfg;
 }
 
+static bool shouldEnforceBF16(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
+    const auto& enforceBF16 = modelConfig.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
+    if (enforceBF16 == modelConfig.end()) { // not set for the model
+        return engineConfig.enforceBF16 && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core); // use value from engine
+    }
+
+    if (enforceBF16->second == PluginConfigParams::YES) {
+        return dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
+    } else {
+        return false;
+    }
+}
+
+static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
+    const auto& dynamicBatchProp = modelConfig.find(InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED);
+    const bool enableDynamicBatch = (dynamicBatchProp != modelConfig.end() && dynamicBatchProp->second == PluginConfigParams::YES)
+            || engineConfig.enableDynamicBatch;
+
+    if (enableDynamicBatch) // dynamic batch is not supported
+        return Config::SnippetsMode::Disable;
+
+    const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
+    if (snippetsMode == modelConfig.end()) // not set explicitly
+        return Config::SnippetsMode::Enable; // enable by default
+
+    const auto& val = snippetsMode->second;
+    if (val == PluginConfigInternalParams::IGNORE_CALLBACK)
+        return Config::SnippetsMode::IgnoreCallback;
+    else if (val == PluginConfigInternalParams::DISABLE)
+        return Config::SnippetsMode::Disable;
+    else
+        IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
+}
+
 InferenceEngine::IExecutableNetworkInternal::Ptr
 Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -438,32 +473,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
     const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
     const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
             || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled for the plugin */;
-    const auto& BF16Prop = config.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
-    bool enableBF16 = false;
-    if (BF16Prop != config.end()) {
-        if (BF16Prop->second == PluginConfigParams::YES) {
-            enableBF16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
-        } else {
-            enableBF16 = false;
-        }
-    } else {
-        enableBF16 = engConfig.enforceBF16 && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
-    }
-    const auto& dynamicBatchProp = config.find(InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED);
-    const bool enableDynamicBatch = (dynamicBatchProp != config.end() && dynamicBatchProp->second == PluginConfigParams::YES)
-            || engConfig.enableDynamicBatch;
-
-    auto snippetsMode = enableDynamicBatch ? Config::SnippetsMode::Disable : Config::SnippetsMode::Enable;
-    const auto& snippetsModeProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
-    if (snippetsMode == Config::SnippetsMode::Enable && snippetsModeProp != config.end()) {
-        const auto& val = snippetsModeProp->second;
-        if (val == PluginConfigInternalParams::IGNORE_CALLBACK)
-            snippetsMode =  Config::SnippetsMode::IgnoreCallback;
-        else if (val == PluginConfigInternalParams::DISABLE)
-            snippetsMode =  Config::SnippetsMode::Disable;
-        else
-            IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
-    }
+    const bool enableBF16 = shouldEnforceBF16(config, engConfig);
+    const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
 
     auto nGraphFunc = clonedNetwork.getFunction();
 
@@ -517,6 +528,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
 }
 
 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
+    // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and "ifSetExplicitly" flag
     streamsExplicitlySetForEngine = streamsSet(config);
 
     engConfig.readProperties(config);
@@ -593,6 +605,8 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::hint::num_requests) {
         const auto perfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests;
         return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
+    } else if (name == ov::hint::execution_mode) {
+        return engConfig.executionMode;
     }
     /* Internally legacy parameters are used with new API as part of migration procedure.
      * This fallback can be removed as soon as migration completed */
@@ -677,6 +691,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::enable_profiling.name()),
                                                     RW_property(ov::hint::inference_precision.name()),
                                                     RW_property(ov::hint::performance_mode.name()),
+                                                    RW_property(ov::hint::execution_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
                                                     RW_property(ov::hint::scheduling_core_type.name()),
                                                     RW_property(ov::hint::use_hyper_threading.name()),
@@ -738,18 +753,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
     const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
     const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
                         || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
-
-    auto snippetsMode = conf.enableDynamicBatch ? Config::SnippetsMode::Disable : Config::SnippetsMode::Enable;
-    const auto& snippetsModeProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
-    if (snippetsMode == Config::SnippetsMode::Enable && snippetsModeProp != config.end()) {
-        const auto& val = snippetsModeProp->second;
-        if (val == PluginConfigInternalParams::IGNORE_CALLBACK)
-            snippetsMode =  Config::SnippetsMode::IgnoreCallback;
-        else if (val == PluginConfigInternalParams::DISABLE)
-            snippetsMode =  Config::SnippetsMode::Disable;
-        else
-            IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
-    }
+    const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
 
     auto model = network.getFunction();
     if (model == nullptr) {
@@ -826,7 +830,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
 
     return execNetwork;
 }
-
 }   // namespace intel_cpu
 }   // namespace ov
 
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index ae20c40f2194c5..596ca4ab137357 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -57,7 +57,7 @@ class Engine : public InferenceEngine::IInferencePlugin {
 
     Config engConfig;
     ExtensionManager::Ptr extensionManager = std::make_shared<ExtensionManager>();
-    /* Explicily configured streams have higher priority even than performance hints.
+    /* Explicily configured streams have higher priority than performance hints.
        So track if streams is set explicitly (not auto-configured) */
     bool streamsExplicitlySetForEngine = false;
     const std::string deviceFullName;
diff --git a/src/plugins/intel_cpu/src/transformation_pipeline.h b/src/plugins/intel_cpu/src/transformation_pipeline.h
index a0b4c3aa114198..0346f6bca732ca 100644
--- a/src/plugins/intel_cpu/src/transformation_pipeline.h
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.h
@@ -28,7 +28,7 @@ class Transformations {
                     const bool                        enableLpt,
                     const bool                        enableBF16,
                     const bool                        isLegacyApi,
-                    Config::SnippetsMode&             snippetsMode,
+                    const Config::SnippetsMode&       snippetsMode,
                     const Config&                     config)
         : model(initialModel),
           enableLpt(enableLpt),
@@ -40,7 +40,7 @@ class Transformations {
           }
 
     void UpToCpuSpecificOpSet();
-    void CpuSpecificOpSet(void);
+    void CpuSpecificOpSet();
 
 private:
     std::shared_ptr<ov::Model> model;
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
index 85036bdec4796f..457ac67db2c031 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
@@ -10,6 +10,7 @@
 #include "openvino/runtime/compiled_model.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
+#include "ie_system_conf.h"
 
 #include <gtest/gtest.h>
 
@@ -61,7 +62,7 @@ TEST_F(OVClassConfigTestCPU, smoke_SetROPropertiesThrow) {
 TEST_F(OVClassConfigTestCPU, smoke_CheckCoreStreamsHasHigherPriorityThanThroughputHint) {
     ov::Core ie;
     int32_t streams = 1; // throughput hint should apply higher number of streams
-    int32_t value;
+    int32_t value = 0;
 
     OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::num_streams(streams)));
     OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)));
@@ -74,7 +75,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CheckCoreStreamsHasHigherPriorityThanThroughp
 TEST_F(OVClassConfigTestCPU, smoke_CheckCoreStreamsHasHigherPriorityThanLatencyHint) {
     ov::Core ie;
     int32_t streams = 4; // latency hint should apply lower number of streams
-    int32_t value;
+    int32_t value = 0;
 
     OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::num_streams(streams)));
     OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)));
@@ -87,7 +88,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CheckCoreStreamsHasHigherPriorityThanLatencyH
 TEST_F(OVClassConfigTestCPU, smoke_CheckModelStreamsHasHigherPriorityThanLatencyHints) {
     ov::Core ie;
     int32_t streams = 4; // latency hint should apply lower number of streams
-    int32_t value;
+    int32_t value = 0;
 
     OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)));
 
@@ -102,7 +103,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CheckModelStreamsHasHigherPriorityThanLatency
 TEST_F(OVClassConfigTestCPU, smoke_CheckModelStreamsHasHigherPriorityThanThroughputHint) {
     ov::Core ie;
     int32_t streams = 1; // throughput hint should apply higher number of streams
-    int32_t value;
+    int32_t value = 0;
 
     ov::AnyMap config;
     config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
@@ -121,6 +122,78 @@ TEST_F(OVClassConfigTestCPU, smoke_CheckSparseWeigthsDecompressionRate) {
     ASSERT_NO_THROW(ov::CompiledModel compiledModel = core.compile_model(model, deviceName));
 }
 
+const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
+
+TEST_F(OVClassConfigTestCPU, smoke_CheckExecutionModeIsAvailableInCoreAndModel) {
+    ov::Core ie;
+    std::vector<ov::PropertyName> ie_properties;
+
+    ASSERT_NO_THROW(ie_properties = ie.get_property(deviceName, ov::supported_properties));
+    const auto ie_exec_mode_it = find(ie_properties.begin(), ie_properties.end(), ov::hint::execution_mode);
+    ASSERT_NE(ie_exec_mode_it, ie_properties.end());
+    ASSERT_TRUE(ie_exec_mode_it->is_mutable());
+
+    ov::AnyMap config;
+    ov::CompiledModel compiledModel = ie.compile_model(model, deviceName, config);
+    std::vector<ov::PropertyName> model_properties;
+
+    ASSERT_NO_THROW(model_properties = compiledModel.get_property(ov::supported_properties));
+    const auto model_exec_mode_it = find(model_properties.begin(), model_properties.end(), ov::hint::execution_mode);
+    ASSERT_NE(model_exec_mode_it, model_properties.end());
+    ASSERT_FALSE(model_exec_mode_it->is_mutable());
+}
+
+TEST_F(OVClassConfigTestCPU, smoke_CheckModelInferencePrecisionHasHigherPriorityThanCoreInferencePrecision) {
+    ov::Core ie;
+    auto inference_precision_value = ov::element::undefined;
+
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::inference_precision(ov::element::f32)));
+
+    ov::AnyMap config;
+    config[ov::hint::inference_precision.name()] = bf16_if_can_be_emulated;
+    ov::CompiledModel compiledModel = ie.compile_model(model, deviceName, config);
+
+    ASSERT_NO_THROW(inference_precision_value = compiledModel.get_property(ov::hint::inference_precision));
+    ASSERT_EQ(inference_precision_value, bf16_if_can_be_emulated);
+}
+
+TEST_F(OVClassConfigTestCPU, smoke_CheckCoreInferencePrecisionHasHigherPriorityThanModelPerformanceExecutionMode) {
+    ov::Core ie;
+    auto execution_mode_value = ov::hint::ExecutionMode::ACCURACY;
+    auto inference_precision_value = ov::element::undefined;
+
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::inference_precision(ov::element::f32)));
+
+    ov::AnyMap config;
+    config[ov::hint::execution_mode.name()] = ov::hint::ExecutionMode::PERFORMANCE;
+    ov::CompiledModel compiledModel = ie.compile_model(model, deviceName, config);
+
+    ASSERT_NO_THROW(execution_mode_value = compiledModel.get_property(ov::hint::execution_mode));
+    ASSERT_EQ(execution_mode_value, ov::hint::ExecutionMode::PERFORMANCE);
+
+    ASSERT_NO_THROW(inference_precision_value = compiledModel.get_property(ov::hint::inference_precision));
+    ASSERT_EQ(inference_precision_value, ov::element::f32);
+}
+
+TEST_F(OVClassConfigTestCPU, smoke_CheckModelInferencePrecisionHasHigherPriorityThanCorePerformanceExecutionMode) {
+    ov::Core ie;
+    auto execution_mode_value = ov::hint::ExecutionMode::PERFORMANCE;
+    auto inference_precision_value = ov::element::undefined;
+    const auto inference_precision_expected = bf16_if_can_be_emulated;
+
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)));
+
+    ov::AnyMap config;
+    config[ov::hint::inference_precision.name()] = inference_precision_expected;
+    ov::CompiledModel compiledModel = ie.compile_model(model, deviceName, config);
+
+    ASSERT_NO_THROW(execution_mode_value = compiledModel.get_property(ov::hint::execution_mode));
+    ASSERT_EQ(execution_mode_value, ov::hint::ExecutionMode::ACCURACY);
+
+    ASSERT_NO_THROW(inference_precision_value = compiledModel.get_property(ov::hint::inference_precision));
+    ASSERT_EQ(inference_precision_value, inference_precision_expected);
+}
+
 const std::vector<ov::AnyMap> multiDevicePriorityConfigs = {
         {ov::device::priorities(CommonTestUtils::DEVICE_CPU)}};
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index a1fa42b82e877c..1db9e1864e161d 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -298,7 +298,7 @@ TEST(OVClassBasicTest, smoke_SetConfigHintInferencePrecision) {
 
 TEST(OVClassBasicTest, smoke_SetConfigEnableProfiling) {
     ov::Core ie;
-    bool value;
+    auto value = false;
     const bool enableProfilingDefault = false;
 
     OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::enable_profiling));
@@ -311,6 +311,76 @@ TEST(OVClassBasicTest, smoke_SetConfigEnableProfiling) {
     ASSERT_EQ(enableProfiling, value);
 }
 
+const auto bf16_if_supported       = InferenceEngine::with_cpu_x86_bfloat16() ?    ov::element::bf16 : ov::element::f32;
+const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
+using ExpectedModeAndType = std::pair<ov::hint::ExecutionMode, ov::element::Type>;
+
+const std::map<ov::hint::ExecutionMode, ExpectedModeAndType> exectedTypeByMode {
+    {ov::hint::ExecutionMode::PERFORMANCE, {ov::hint::ExecutionMode::PERFORMANCE,
+                                            bf16_if_supported}},
+    {ov::hint::ExecutionMode::ACCURACY,    {ov::hint::ExecutionMode::ACCURACY,
+                                            ov::element::f32}},
+};
+
+TEST(OVClassBasicTest, smoke_SetConfigExecutionModeExpectCorrespondingInferencePrecision) {
+    ov::Core ie;
+    const auto inference_precision_default = bf16_if_supported;
+    const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
+    auto execution_mode_value = ov::hint::ExecutionMode::PERFORMANCE;
+    auto inference_precision_value = ov::element::undefined;
+
+    // check default values
+    OV_ASSERT_NO_THROW(inference_precision_value = ie.get_property("CPU", ov::hint::inference_precision));
+    ASSERT_EQ(inference_precision_value, inference_precision_default);
+    OV_ASSERT_NO_THROW(execution_mode_value = ie.get_property("CPU", ov::hint::execution_mode));
+    ASSERT_EQ(execution_mode_value, execution_mode_default);
+
+    for (const auto& m : exectedTypeByMode) {
+        const auto execution_mode = m.first;
+        const auto execution_mode_exected = m.second.first;
+        const auto inference_precision_exected = m.second.second;
+
+        OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::execution_mode(execution_mode)));
+        OV_ASSERT_NO_THROW(execution_mode_value = ie.get_property("CPU", ov::hint::execution_mode));
+        ASSERT_EQ(execution_mode_value, execution_mode_exected);
+
+        OV_ASSERT_NO_THROW(inference_precision_value = ie.get_property("CPU", ov::hint::inference_precision));
+        ASSERT_EQ(inference_precision_value, inference_precision_exected);
+    }
+}
+
+TEST(OVClassBasicTest, smoke_SetConfigExecutionModeAndInferencePrecision) {
+    ov::Core ie;
+    const auto inference_precision_default = bf16_if_supported;
+    const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
+
+    auto expect_execution_mode = [&](const ov::hint::ExecutionMode expected_value) {
+        auto execution_mode_value = ov::hint::ExecutionMode::ACCURACY;
+        OV_ASSERT_NO_THROW(execution_mode_value = ie.get_property("CPU", ov::hint::execution_mode));
+        ASSERT_EQ(execution_mode_value, expected_value);
+    };
+
+    auto expect_inference_precision = [&](const ov::element::Type expected_value) {
+        auto inference_precision_value = ov::element::undefined;;
+        OV_ASSERT_NO_THROW(inference_precision_value = ie.get_property("CPU", ov::hint::inference_precision));
+        ASSERT_EQ(inference_precision_value, expected_value);
+    };
+
+    // check default values
+    expect_execution_mode(execution_mode_default);
+    expect_inference_precision(inference_precision_default);
+    // verify that conflicting property values work as expect
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)));
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::inference_precision(ov::element::f32)));
+    expect_execution_mode(ov::hint::ExecutionMode::PERFORMANCE); // inference_preicision does not affect execution_mode property itself
+    expect_inference_precision(ov::element::f32); // inference_preicision has more priority than performance mode
+
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)));
+    OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::inference_precision(bf16_if_can_be_emulated)));
+    expect_execution_mode(ov::hint::ExecutionMode::ACCURACY);
+    expect_inference_precision(bf16_if_can_be_emulated);
+}
+
 // IE Class Query network
 
 INSTANTIATE_TEST_SUITE_P(

From 6e09e53f0d1ef3eaea5959da239779e9480f74ef Mon Sep 17 00:00:00 2001
From: Gorokhov Dmitriy <dmitry.gorokhov@intel.com>
Date: Fri, 31 Mar 2023 15:02:39 +0400
Subject: [PATCH 197/296] [CORE] Added optimized fp32->fp16 precision
 conversion implementation (#16672)

---
 .../src/transformations/convert_precision.cpp | 20 +++++++++++++++++++
 .../ngraph/runtime/reference/convert.hpp      |  2 ++
 .../src/runtime/reference/convert.cpp         | 15 ++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 4f66a064484540..67132082857f3d 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -826,6 +826,26 @@ std::shared_ptr<Node> change_constant_precision<ov::element::Type_t::f16, ov::el
     return new_constant;
 }
 
+template <>
+std::shared_ptr<Node> change_constant_precision<ov::element::Type_t::f32, ov::element::Type_t::f16>(
+    std::shared_ptr<opset4::Constant>& constant) {
+    using src_type = typename element_type_traits<ov::element::Type_t::f32>::value_type;
+    using dst_type = typename element_type_traits<ov::element::Type_t::f16>::value_type;
+
+    const auto* src_data = constant->get_data_ptr<src_type>();
+    const auto size = shape_size(constant->get_shape());
+
+    auto new_constant = std::make_shared<opset4::Constant>(ov::element::Type_t::f16, constant->get_shape());
+    new_constant->output(0).set_names(constant->output(0).get_names());
+    auto* dst_data = const_cast<dst_type*>(reinterpret_cast<const dst_type*>(new_constant->get_data_ptr()));
+    if (dst_data == nullptr)
+        throw Exception("Can't get destination data pointer");
+
+    ngraph::runtime::reference::convert<src_type, dst_type>(src_data, dst_data, size);
+
+    return new_constant;
+}
+
 /**
  * @brief Method converts low precision integer types
  * The method uses the next logic for conversion:
diff --git a/src/core/reference/include/ngraph/runtime/reference/convert.hpp b/src/core/reference/include/ngraph/runtime/reference/convert.hpp
index 75a0bb847b1b40..0bab3471d8ae78 100644
--- a/src/core/reference/include/ngraph/runtime/reference/convert.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/convert.hpp
@@ -107,6 +107,8 @@ void convert<uint8_t, float16>(const uint8_t* arg, float16* out, size_t count);
 template <>
 void convert<float16, float>(const float16* arg, float* out, size_t count);
 template <>
+void convert<float, float16>(const float* arg, float16* out, size_t count);
+template <>
 void convert<float, int8_t>(const float* arg, int8_t* out, size_t count);
 template <>
 void convert<float16, int8_t>(const float16* arg, int8_t* out, size_t count);
diff --git a/src/core/reference/src/runtime/reference/convert.cpp b/src/core/reference/src/runtime/reference/convert.cpp
index 4c64ce82f43880..3e43e779a3637f 100644
--- a/src/core/reference/src/runtime/reference/convert.cpp
+++ b/src/core/reference/src/runtime/reference/convert.cpp
@@ -43,6 +43,16 @@ void jit_convert_vec<float16, float>(jit::Generator& gen, const Xbyak::RegExp& s
     gen.vmovups(gen.yword[dst], f32vec);
 }
 
+template <>
+void jit_convert_vec<float, float16>(jit::Generator& gen, const Xbyak::RegExp& src, const Xbyak::RegExp& dst) {
+    auto f16vec = gen.xmm3;
+    auto f32vec = gen.ymm4;
+
+    gen.vmovups(f32vec, gen.yword[src]);
+    gen.vcvtps2ph(f16vec, f32vec, 0);
+    gen.vmovdqu(gen.xword[dst], f16vec);
+}
+
 template <>
 void jit_convert_vec_prepare<float, int8_t>(jit::Generator& gen) {
     auto order = gen.ymm1;
@@ -206,6 +216,11 @@ void convert<float16, float>(const float16* arg, float* out, size_t count) {
     convert_impl(arg, out, count);
 }
 
+template <>
+void convert<float, float16>(const float* arg, float16* out, size_t count) {
+    convert_impl(arg, out, count);
+}
+
 template <>
 void convert<float, int8_t>(const float* arg, int8_t* out, size_t count) {
     convert_impl(arg, out, count);

From c33a3f87f059a67219e2be825097e7f5618c1fd1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 31 Mar 2023 11:35:38 +0000
Subject: [PATCH 198/296] Bump attrs from 22.1.0 to 22.2.0 in /tests (#16676)

Bumps [attrs](https://github.com/python-attrs/attrs) from 22.1.0 to 22.2.0.
- [Release notes](https://github.com/python-attrs/attrs/releases)
- [Changelog](https://github.com/python-attrs/attrs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/python-attrs/attrs/compare/22.1.0...22.2.0)

---
updated-dependencies:
- dependency-name: attrs
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 tests/constraints.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/constraints.txt b/tests/constraints.txt
index 3cccdc2872afc9..cf72968514114f 100644
--- a/tests/constraints.txt
+++ b/tests/constraints.txt
@@ -1,4 +1,4 @@
-attrs==22.1.0
+attrs==22.2.0
 distro==1.8.0
 h5py>=3.1.0
 Jinja2>=2.11.2

From 9a5a8f6abc42af186d62ecb17a2f8a3a225f4ef6 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Fri, 31 Mar 2023 19:44:58 +0400
Subject: [PATCH 199/296] [TF FE] Move to TopK-11 operation and update
 downgrading TopK transformation (#16590)

* [TF FE] Move to TopK-11 operation

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Update downgrading transformation

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../convert_topk11_downgrade.cpp              |  6 ++-
 .../convert_topk11_downgrade_test.cpp         | 42 +++++++++++++------
 .../tensorflow_common/src/op/top_k.cpp        |  9 ++--
 .../tensorflow_tests/test_tf_TopKV2.py        |  8 ++--
 4 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/src/common/transformations/src/transformations/op_conversions/convert_topk11_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_topk11_downgrade.cpp
index e54b9dacd785d4..5eff705bbc4b0c 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_topk11_downgrade.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_topk11_downgrade.cpp
@@ -18,11 +18,13 @@ ov::pass::ConvertTopK11ToTopK3::ConvertTopK11ToTopK3() {
 
     const matcher_pass_callback callback = [=](pattern::Matcher& m) {
         const auto topk_v11 = std::dynamic_pointer_cast<opset11::TopK>(m.get_match_root());
-        if (!topk_v11 || topk_v11->get_stable() || transformation_callback(topk_v11)) {
+        if (!topk_v11 || transformation_callback(topk_v11)) {
             return false;
         }
 
-        // downgrade only if the stable sort is NOT required
+        // downgrade even if stable attribute is True
+        // this is needed to provide backward-compatibility
+        // and operation working in the plugins that have not yet added stable mode
 
         const auto topk_v3 = std::make_shared<opset3::TopK>(topk_v11->input_value(0),
                                                             topk_v11->input_value(1),
diff --git a/src/common/transformations/tests/op_conversions/convert_topk11_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_topk11_downgrade_test.cpp
index 15bde748c5b5a3..65fe2db35a78cc 100644
--- a/src/common/transformations/tests/op_conversions/convert_topk11_downgrade_test.cpp
+++ b/src/common/transformations/tests/op_conversions/convert_topk11_downgrade_test.cpp
@@ -47,18 +47,34 @@ TEST_F(TransformationTestsF, ConvertTopK11ToTopK3) {
     }
 }
 
-TEST_F(TransformationTestsF, ConvertTopK11ToTopK3_fail) {
-    const auto input = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{2, 3, 4});
-    const auto k = std::make_shared<ov::opset11::Parameter>(ov::element::i8, ov::Shape{});
-    const auto topk = std::make_shared<ov::opset11::TopK>(input,
-                                                          k,
-                                                          -2,
-                                                          ov::op::TopKMode::MAX,
-                                                          ov::op::TopKSortType::SORT_VALUES,
-                                                          ov::element::i64,
-                                                          true);  // stable sort on
-    topk->set_friendly_name("topk11");
+TEST_F(TransformationTestsF, ConvertTopK11ToTopK3StableMode) {
+    {
+        const auto input = std::make_shared<ov::opset11::Parameter>(ov::element::i32, ov::Shape{2, 3, 4});
+        const auto k = std::make_shared<ov::opset11::Parameter>(ov::element::i8, ov::Shape{});
+        const auto topk = std::make_shared<ov::opset11::TopK>(input,
+                                                              k,
+                                                              -2,
+                                                              ov::op::TopKMode::MAX,
+                                                              ov::op::TopKSortType::SORT_VALUES,
+                                                              ov::element::i64,
+                                                              true);
+        topk->set_friendly_name("topk11");
+
+        function = std::make_shared<ov::Model>(topk->outputs(), ov::ParameterVector{input, k});
+        manager.register_pass<ov::pass::ConvertTopK11ToTopK3>();
+    }
 
-    function = std::make_shared<ov::Model>(topk->outputs(), ov::ParameterVector{input, k});
-    manager.register_pass<ov::pass::ConvertTopK11ToTopK3>();
+    {
+        const auto input = std::make_shared<ov::opset3::Parameter>(ov::element::i32, ov::Shape{2, 3, 4});
+        const auto k = std::make_shared<ov::opset3::Parameter>(ov::element::i8, ov::Shape{});
+        const auto topk = std::make_shared<ov::opset3::TopK>(input,
+                                                             k,
+                                                             -2,
+                                                             ov::op::TopKMode::MAX,
+                                                             ov::op::TopKSortType::SORT_VALUES,
+                                                             ov::element::i64);
+        topk->set_friendly_name("topk11");
+
+        function_ref = std::make_shared<ov::Model>(topk->outputs(), ov::ParameterVector{input, k});
+    }
 }
diff --git a/src/frontends/tensorflow_common/src/op/top_k.cpp b/src/frontends/tensorflow_common/src/op/top_k.cpp
index 42c38975cdcb98..f1f709c214f448 100644
--- a/src/frontends/tensorflow_common/src/op/top_k.cpp
+++ b/src/frontends/tensorflow_common/src/op/top_k.cpp
@@ -3,10 +3,10 @@
 //
 
 #include "common_op_table.hpp"
-#include "openvino/opsets/opset8.hpp"
+#include "openvino/opsets/opset11.hpp"
 
 using namespace std;
-using namespace ov::opset8;
+using namespace ov::opset11;
 
 namespace ov {
 namespace frontend {
@@ -23,10 +23,12 @@ OutputVector translate_top_k_base_op(const NodeContext& node, const ov::Output<o
                                    -1,
                                    ov::op::v1::TopK::Mode::MAX,
                                    sorted ? TopK::SortType::SORT_VALUES : TopK::SortType::SORT_INDICES,
-                                   ov::element::i32);
+                                   ov::element::i32,
+                                   true);
     set_node_name(node.get_name(), top_k);
     return top_k->outputs();
 }
+
 OutputVector translate_top_k_op(const NodeContext& node) {
     // retrieve k attribute
     auto k = node.get_attribute<int64_t>("k");
@@ -39,7 +41,6 @@ OutputVector translate_top_k_v2_op(const NodeContext& node) {
     auto k_input = node.get_input(1);
     return translate_top_k_base_op(node, k_input, 1);
 }
-
 }  // namespace op
 }  // namespace tensorflow
 }  // namespace frontend
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
index 21528d33000aac..a061a357b10cc4 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
@@ -36,13 +36,11 @@ def create_topk_v2_net(self, input_shape, input_type, k, sorted, is_first_output
         dict(input_shape=[10], input_type=tf.float32, k=5, sorted=True, is_first_output=True, is_second_output=False),
         dict(input_shape=[2, 3, 10], input_type=tf.int32, k=10, sorted=True, is_first_output=True,
              is_second_output=False),
-        # Currently, OpenVINO TopK supports only TensorFlow TopK with sorted=True and the first output
-        # For other cases, we need to introduce new version of TopK in OpenVINO opset due to multiple misalignments
-        # described in 88024
+        # Expect stable mode support by the CPU plugin. See 101503
         pytest.param(dict(input_shape=[4, 12], input_type=tf.float32, k=10, sorted=True, is_first_output=True,
-                          is_second_output=True), marks=pytest.mark.xfail(reason="88024")),
+                          is_second_output=True), marks=pytest.mark.xfail(reason="101503")),
         pytest.param(dict(input_shape=[5, 10], input_type=tf.int32, k=8, sorted=False, is_first_output=True,
-                          is_second_output=True), marks=pytest.mark.xfail(reason="88024")),
+                          is_second_output=True), marks=pytest.mark.xfail(reason="101503")),
     ]
 
     @pytest.mark.parametrize("params", test_basic)

From 341217de992e12de3ff52ec87035a25712a887e3 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Sat, 1 Apr 2023 00:40:41 +0800
Subject: [PATCH 200/296] Unify code path for MULTI and AUTO CTPUT hint
 (#16349)

[MULTI] pass through to AUTO with CTPUT hint
After this change
-- MULTI doesn't support setting infer request via CPU(4),GPU(8).
-- MULTI doesn't support CompiledModel::set_property() and ExecutableNetwork::GetConfig().
---
 src/bindings/c/tests/ie_c_api_test.cpp        |  30 --
 .../c/tests/ov_compiled_model_test.cpp        |  39 --
 src/plugins/auto/auto_executable_network.cpp  |  39 +-
 src/plugins/auto/auto_schedule.cpp            |  37 +-
 src/plugins/auto/auto_schedule.hpp            |  10 +-
 src/plugins/auto/bind_multi_schedule.cpp      | 154 ++------
 src/plugins/auto/bind_multi_schedule.hpp      |  12 +-
 src/plugins/auto/common.hpp                   |   2 +-
 src/plugins/auto/multi_schedule.cpp           |   2 +-
 src/plugins/auto/plugin.cpp                   | 346 +++++-------------
 .../behavior/ov_plugin/core_integration.cpp   |   4 +-
 .../behavior/ov_plugin/properties_tests.cpp   |  42 +--
 .../behavior/plugin/configuration_tests.cpp   |  12 +-
 .../skip_tests_config.cpp                     |   4 +-
 .../behavior/ov_plugin/properties_tests.cpp   |  42 +--
 .../behavior/plugin/configuration_tests.cpp   |  16 +-
 .../multi/gpu_remote_blob_tests.cpp           |  11 +-
 .../skip_tests_config.cpp                     |   4 +
 .../executable_network/exec_network_base.hpp  |   4 -
 .../behavior/ov_plugin/core_integration.hpp   |   3 +-
 .../behavior/ov_plugin/properties_tests.cpp   |   2 -
 .../behavior/plugin/configuration_tests.cpp   |   4 +-
 src/tests/unit/auto/auto_ctput_call_multi.cpp |  59 +--
 .../unit/auto/auto_default_perf_hint_test.cpp |  19 +-
 24 files changed, 269 insertions(+), 628 deletions(-)

diff --git a/src/bindings/c/tests/ie_c_api_test.cpp b/src/bindings/c/tests/ie_c_api_test.cpp
index f4d527d6180b2f..013001b85c2aec 100644
--- a/src/bindings/c/tests/ie_c_api_test.cpp
+++ b/src/bindings/c/tests/ie_c_api_test.cpp
@@ -954,36 +954,6 @@ TEST_P(ie_c_api_test, ie_exec_network_get_config) {
     ie_core_free(&core);
 }
 
-TEST_P(ie_c_api_test, ie_exec_network_set_config) {
-    ie_core_t *core = nullptr;
-    IE_ASSERT_OK(ie_core_create("", &core));
-    ASSERT_NE(nullptr, core);
-
-    ie_param_t param;
-    if (ie_core_get_metric(core, "GPU", "AVAILABLE_DEVICES", &param) != IEStatusCode::OK) {
-        ie_core_free(&core);
-        GTEST_SKIP();
-    }
-
-    ie_network_t *network = nullptr;
-    IE_EXPECT_OK(ie_core_read_network(core, xml_file_name.c_str(), bin_file_name.c_str(), &network));
-    EXPECT_NE(nullptr, network);
-
-    const char *device_name = "MULTI:GPU,CPU";
-    ie_config_t config = {nullptr, nullptr, nullptr};
-    ie_executable_network_t *exe_network = nullptr;
-    IE_EXPECT_OK(ie_core_load_network(core, network, device_name, &config, &exe_network));
-    EXPECT_NE(nullptr, exe_network);
-
-    ie_config_t config_param = {"MULTI_DEVICE_PRIORITIES", "GPU,CPU", nullptr};
-    IE_EXPECT_OK(ie_exec_network_set_config(exe_network, &config_param));
-
-    ie_exec_network_free(&exe_network);
-    ie_network_free(&network);
-    ie_core_free(&core);
-    ie_param_free(&param);
-}
-
 TEST_P(ie_c_api_test, ie_exec_network_get_metric) {
     ie_core_t *core = nullptr;
     IE_ASSERT_OK(ie_core_create("", &core));
diff --git a/src/bindings/c/tests/ov_compiled_model_test.cpp b/src/bindings/c/tests/ov_compiled_model_test.cpp
index 0ff98f5ba2dff4..c1b6ed33cadac6 100644
--- a/src/bindings/c/tests/ov_compiled_model_test.cpp
+++ b/src/bindings/c/tests/ov_compiled_model_test.cpp
@@ -121,45 +121,6 @@ TEST_P(ov_compiled_model_test, ov_compiled_model_input_by_name) {
     ov_core_free(core);
 }
 
-TEST_P(ov_compiled_model_test, set_and_get_property) {
-    // It seems that all set_property() for CPU plugin are not implement in compiled_model.
-    auto device_name = "MULTI:GPU,CPU";
-    ov_core_t* core = nullptr;
-    OV_EXPECT_OK(ov_core_create(&core));
-    EXPECT_NE(nullptr, core);
-
-    char* info = nullptr;
-    const char* key_0 = ov_property_key_available_devices;
-    if (ov_core_get_property(core, "GPU", key_0, &info) != ov_status_e::OK) {
-        ov_core_free(core);
-        GTEST_SKIP();
-    }
-
-    ov_model_t* model = nullptr;
-    OV_EXPECT_OK(ov_core_read_model(core, xml_file_name.c_str(), bin_file_name.c_str(), &model));
-    EXPECT_NE(nullptr, model);
-
-    ov_compiled_model_t* compiled_model = nullptr;
-    OV_EXPECT_OK(ov_core_compile_model(core, model, device_name, 0, &compiled_model));
-    EXPECT_NE(nullptr, compiled_model);
-
-    const char* key_1 = ov_property_key_device_priorities;
-    const char* value_1 = "GPU,CPU";
-    OV_EXPECT_OK(ov_compiled_model_set_property(compiled_model, key_1, value_1));
-    char* result = nullptr;
-    OV_EXPECT_OK(ov_compiled_model_get_property(compiled_model, key_1, &result));
-    EXPECT_STREQ(value_1, result);
-    ov_free(result);
-
-    const char* key_2 = ov_property_key_supported_properties;
-    OV_EXPECT_OK(ov_compiled_model_get_property(compiled_model, key_2, &result));
-    ov_free(result);
-
-    ov_compiled_model_free(compiled_model);
-    ov_model_free(model);
-    ov_core_free(core);
-}
-
 TEST_P(ov_compiled_model_test, get_property) {
     auto device_name = GetParam();
     ov_core_t* core = nullptr;
diff --git a/src/plugins/auto/auto_executable_network.cpp b/src/plugins/auto/auto_executable_network.cpp
index 64e0e8f1a91d4d..fa7b814e13eaae 100644
--- a/src/plugins/auto/auto_executable_network.cpp
+++ b/src/plugins/auto/auto_executable_network.cpp
@@ -17,12 +17,21 @@ AutoExecutableNetwork::AutoExecutableNetwork(AutoScheduleContext::Ptr& context,
 }
 
 std::shared_ptr<IE::RemoteContext> AutoExecutableNetwork::GetContext() const {
-    std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
-    if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
-        return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
+    if (_autoSchedule->_pCTPUTLoadContext) {
+        for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
+            if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
+                return _autoSchedule->_pCTPUTLoadContext[i].executableNetwork->GetContext();
+            }
+        }
+        return nullptr;
     } else {
-        _autoSchedule->WaitActualNetworkReady();
-        return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
+        std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
+        if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
+            return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
+        } else {
+            _autoSchedule->WaitActualNetworkReady();
+            return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
+        }
     }
 }
 
@@ -82,11 +91,11 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
             all_devices[context.deviceInfo.deviceName] = device_properties;
         };
         if (_autoSchedule->_pCTPUTLoadContext) {
-            // need lock for inference failure
-            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
-            auto load_count = _autoSContext->_devicePriorities.size();
-            for (size_t i = 0; i < load_count; i++)
-                get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
+            for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
+                if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
+                    get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
+                }
+            }
         } else {
             {
                 std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
@@ -118,11 +127,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
         if (_autoSchedule->_pCTPUTLoadContext) {
             std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
             unsigned int res = 0u;
-            auto load_count = _autoSContext->_devicePriorities.size();
-            for (size_t i = 0; i < load_count; i++) {
+            for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
                 try {
-                    res += (_autoSchedule->_pCTPUTLoadContext[i]).executableNetwork->GetMetric(
-                        METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+                    if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
+                        res += (_autoSchedule->_pCTPUTLoadContext[i])
+                                   .executableNetwork->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))
+                                   .as<unsigned int>();
+                    }
                 } catch (const IE::Exception& iie) {
                     IE_THROW()
                         << "Every device used in cumulative mode should "
diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/auto_schedule.cpp
index 7e5a85809b86db..8e5eaf8c648b7a 100644
--- a/src/plugins/auto/auto_schedule.cpp
+++ b/src/plugins/auto/auto_schedule.cpp
@@ -234,9 +234,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
     bool isCumulative =
         (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
     if (isCumulative) {
-        std::list<DeviceInformation> validDevices =
-            _autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities,
-                                                   _loadContext[ACTUALDEVICE].networkPrecision);
+        const auto& validDevices = _autoSContext->_devicePriorities;
         // When the hint is ctput and there is only one device, the single-device logic is used
         if (validDevices.size() == 1) {
             _loadContext[ACTUALDEVICE].deviceInfo = validDevices.front();
@@ -244,14 +242,10 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
                 IE::PluginConfigParams::THROUGHPUT;
         } else if (validDevices.size() > 1) {
             _loadContext[ACTUALDEVICE].isEnabled = false;
-            _autoSContext->_devicePriorities.clear();
-            std::copy(std::begin(validDevices),
-                      std::end(validDevices),
-                      std::back_inserter(_autoSContext->_devicePriorities));
             // Total number of devices in CTPUT
-            auto nCTputDeviceNums = validDevices.size();
+            _nCTputDeviceNums = validDevices.size();
             // Generate contexts for loading each device
-            _pCTPUTLoadContext.reset(new AutoLoadContext[nCTputDeviceNums]);
+            _pCTPUTLoadContext.reset(new AutoLoadContext[_nCTputDeviceNums]);
             int idx = 0;
             DeviceInformation cpuDeviceInformation;
             for (auto& device : validDevices) {
@@ -272,6 +266,10 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
                     IE::PluginConfigParams::THROUGHPUT;
             }
         }
+        if (_autoSContext->_LogTag == "MULTI") {
+            // MULTI's performance hint always is tput
+            _autoSContext->_performanceHint = IE::PluginConfigParams::THROUGHPUT;
+        }
     } else {
         _loadContext[ACTUALDEVICE].deviceInfo =
             _autoSContext->_plugin->SelectDevice(_autoSContext->_devicePriorities,
@@ -388,12 +386,12 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
     std::vector<Task> otherDevicesloads;
     std::vector<Task> cpuLoads;
     if (_pCTPUTLoadContext) {
-        for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
+        for (size_t i = 0; i < _nCTputDeviceNums; i++) {
             auto* contextPtr = &_pCTPUTLoadContext[i];
             auto modelPath = _autoSContext->_modelPath;
             auto network = _autoSContext->_network;
             _pCTPUTLoadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
-            if (i == _autoSContext->_devicePriorities.size() - 1 &&
+            if (i == _nCTputDeviceNums - 1 &&
                 _pCTPUTLoadContext[i].deviceInfo.deviceName.find("CPU") != std::string::npos) {
                 cpuLoads.push_back(_pCTPUTLoadContext[i].task);
             } else {
@@ -518,7 +516,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
             _passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
         }
     }
-    WaitFirstNetworkReady();
+    _autoSContext->_hwExecutableNetwork = WaitFirstNetworkReady();
 }
 
 void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative) {
@@ -618,7 +616,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
     TryToLoadNetWork(context, modelPath, network, isCumulative);
 }
 
-void AutoSchedule::WaitFirstNetworkReady() {
+SoExecNetwork AutoSchedule::WaitFirstNetworkReady() {
     if (_firstLoadFuture.valid()) {
         // wait for the first loading finished
         _firstLoadFuture.wait();
@@ -626,7 +624,7 @@ void AutoSchedule::WaitFirstNetworkReady() {
     // check if there is any device that have loaded network successfully
     for (int i = CONTEXTNUM - 2; i >= 0; i--) {
         if (_loadContext[i].isEnabled && _loadContext[i].isAlready) {
-            return;
+            return _loadContext[i].executableNetwork;
         }
     }
     // the first loading is failed, wait for another loading
@@ -635,7 +633,7 @@ void AutoSchedule::WaitFirstNetworkReady() {
             _loadContext[i].future.wait();
             // check if loading is successful
             if (_loadContext[i].isAlready) {
-                return;
+                return _loadContext[i].executableNetwork;
             }
         }
     }
@@ -646,17 +644,21 @@ void AutoSchedule::WaitFirstNetworkReady() {
         }
     }
     // devices loaded successfully in CTPUT
+    SoExecNetwork execNetwork;
     if (_pCTPUTLoadContext) {
         int nLoadSucNums = 0;
-        for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
+        for (size_t i = 0; i < _nCTputDeviceNums; i++) {
             // check if device loaded successfully
             if (_pCTPUTLoadContext[i].isAlready) {
+                if (!execNetwork) {
+                    execNetwork = _pCTPUTLoadContext[i].executableNetwork;
+                }
                 nLoadSucNums++;
             }
         }
         // one or more devices loaded successfully
         if (nLoadSucNums > 0) {
-            return;
+            return execNetwork;
         }
     }
     IE_THROW() << GetLogTag() << "load all devices failed";
@@ -784,7 +786,6 @@ IInferPtr AutoSchedule::CreateInferRequest() {
             so = _passthroughExeNet._so;
         syncRequestImpl->setPointerToSo(so);
     } else if (std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest()) {
-        // cumulative case, load to MULTI:*
         auto sharedMultiRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
         if (sharedMultiRequest._ptr->getPointerToSo())
             syncRequestImpl->setPointerToSo(sharedMultiRequest._ptr->getPointerToSo());
diff --git a/src/plugins/auto/auto_schedule.hpp b/src/plugins/auto/auto_schedule.hpp
index 08be8e5a5e5a47..fc1308064b5364 100644
--- a/src/plugins/auto/auto_schedule.hpp
+++ b/src/plugins/auto/auto_schedule.hpp
@@ -51,6 +51,7 @@ class AutoSchedule : public MultiSchedule {
 public:
     AutoLoadContext                           _loadContext[CONTEXTNUM];
     std::unique_ptr<AutoLoadContext[]>        _pCTPUTLoadContext = nullptr;
+    size_t                                    _nCTputDeviceNums;
 
 protected:
     void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
@@ -58,9 +59,15 @@ class AutoSchedule : public MultiSchedule {
     static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests,
                                 const DeviceName& preferred_device);
     DeviceMap<NotBusyPriorityWorkerRequests> _idleWorkerRequests;
+    AutoScheduleContext::Ptr                 _autoSContext;
 
 private:
-    void WaitFirstNetworkReady();
+    /**
+     * @brief wait for one of the executable network to finish loading.
+     * @return An SoPtr object hold an available executable network loaded to HW device.
+     * @note An exception will be thrown if all loading of network to hw device fails.
+     */
+    SoExecNetwork WaitFirstNetworkReady();
     void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
     bool selectOtherDevice(const std::string& currentDeviceName);
     IE::Task releaseActualdeviceTask;
@@ -73,7 +80,6 @@ class AutoSchedule : public MultiSchedule {
     std::promise<void>                       _firstLoadPromise;
     bool                                     _exitFlag = {false};
     size_t                                   _cpuHelpInferCount = 0;
-    AutoScheduleContext::Ptr                 _autoSContext;
 };
 
 }  // namespace MultiDevicePlugin
diff --git a/src/plugins/auto/bind_multi_schedule.cpp b/src/plugins/auto/bind_multi_schedule.cpp
index 3ab8f30547d9dd..3e623bd984c2ab 100644
--- a/src/plugins/auto/bind_multi_schedule.cpp
+++ b/src/plugins/auto/bind_multi_schedule.cpp
@@ -6,140 +6,40 @@
 #include "async_infer_request.hpp"
 #include "plugin.hpp"
 #include "bind_multi_schedule.hpp"
-#include "multi_executable_network.hpp"
 // ------------------------------MultiSchedule----------------------------
 namespace MultiDevicePlugin {
 
-thread_local IE::IInferRequestInternal* BinderMultiSchedule::_sharedRequest = nullptr;
-
 void BinderMultiSchedule::init(const ScheduleContext::Ptr& sContext) {
-     MultiSchedule::init(sContext);
+    AutoSchedule::init(sContext);
+    LOG_INFO_TAG("enable bind buffer for AUTO");
 }
 
 Pipeline BinderMultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInferRequest** workerInferRequest) {
-    Pipeline pipeline = {
-        // if the request is coming with device-specific remote blobs make sure it is scheduled to the specific device only:
-        Stage {
-            /*TaskExecutor*/ std::make_shared<IE::ImmediateExecutor>(), /*task*/ [this, &syncInferRequest, workerInferRequest]() {
-                // by default, no preferred device:
-                _thisPreferredDeviceName = "";
-                auto execNetwork = _multiSContext->_executableNetwork.lock();
-                // if any input is remote (e.g. was set with SetBlob), let' use the corresponding device
-                for (const auto& it : execNetwork->GetInputsInfo()) {
-                    auto b = syncInferRequest->GetBlob(it.first);
-                    auto r = b->as<IE::RemoteBlob>();
-                    if (r) {
-                        const auto name = r->getDeviceName();
-                        const auto res = std::find_if(
-                            _multiSContext->_devicePrioritiesInitial.cbegin(),
-                            _multiSContext->_devicePrioritiesInitial.cend(),
-                        [&name](const MultiDevicePlugin::DeviceInformation & d) {
-                            return (d.defaultDeviceID.empty() ? d.deviceName : (d.deviceName + "." +
-                                    d.defaultDeviceID)) == name;
-                        });
-                        if (_multiSContext->_devicePrioritiesInitial.cend() == res) {
-                            IE_THROW() <<
-                                "None of the devices (for which current MULTI-device configuration was "
-                                "initialized) supports a remote blob created on the device named " << name;
-                        } else {
-                            // it is ok to take the c_str() here (as pointed in the executable_network.hpp we need to use const char*)
-                            // as the original strings are from the "persistent" vector (with the right lifetime)
-                            _thisPreferredDeviceName = res->deviceName.c_str();
-                            break;
-                        }
-                    }
-                }
-                _thisWorkerInferRequest = *workerInferRequest;
-                _sharedRequest = std::dynamic_pointer_cast<MultiDeviceInferRequest>(syncInferRequest)->GetSharedRequest()._ptr.get();
-            }},
-        // as the scheduling algo may select any device, this stage accepts the scheduling decision (actual workerRequest)
-        // then sets the device-agnostic blobs to the actual (device-specific) request
-        Stage {
-            /*TaskExecutor*/std::dynamic_pointer_cast<IE::ITaskExecutor>(shared_from_this()), /*task*/ [&syncInferRequest, workerInferRequest]() {
-                *workerInferRequest = _thisWorkerInferRequest;
-                auto multiSyncInferRequest = std::dynamic_pointer_cast<MultiDeviceInferRequest>(syncInferRequest);
-                multiSyncInferRequest->SetBlobsToAnotherRequest(_thisWorkerInferRequest->_inferRequest);
-                INFO_RUN([workerInferRequest]() {
-                    (*workerInferRequest)->_startTimes.push_back(std::chrono::steady_clock::now());
-                });
-            }},
-        // final task in the pipeline:
-        Stage {
-            /*TaskExecutor*/std::make_shared<ThisRequestExecutor>(workerInferRequest), /*task*/ [this, &syncInferRequest, workerInferRequest]() {
-                if (nullptr != (*workerInferRequest)->_exceptionPtr) {
-                    std::rethrow_exception((*workerInferRequest)->_exceptionPtr);
-                }
-                if (_multiSContext->_needPerfCounters) {
-                    auto multiSyncInferRequest = std::dynamic_pointer_cast<MultiDeviceInferRequest>
-                        (syncInferRequest);
-                    multiSyncInferRequest->_scheduledRequest =
-                        (*workerInferRequest)->_inferRequest;
-                }
-                INFO_RUN([workerInferRequest]() {
-                   (*workerInferRequest)->_endTimes.push_back(std::chrono::steady_clock::now());
-                });
-            }}
-    };
-    return pipeline;
-}
-
-bool BinderMultiSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, DeviceName preferred_device) {
-    std::vector<DeviceInformation> devices;
-    devices = [&] {
-        std::lock_guard<std::mutex> lock(_multiSContext->_mutex);
-        return _multiSContext->_devicePriorities;
-    }();
-    for (auto&& device : devices) {
-        if (!preferred_device.empty() && (device.deviceName != preferred_device)) {
-            continue;
-        }
-        if (RunPipelineTask(inferPipelineTask, _idleWorkerRequests[device.deviceName], preferred_device)) {
-            return true;
-        }
-    }
-    // no vacant requests this time, storing the task to the respective queue
-    if (!preferred_device.empty()) {
-        _inferPipelineTasksDeviceSpecific[preferred_device]->push(std::move(inferPipelineTask));
-    } else {
-        _inferPipelineTasks.push(std::move(inferPipelineTask));
-    }
-    return false;
-}
-
-bool BinderMultiSchedule::RunPipelineTask(IE::Task& inferPipelineTask,
-    NotBusyWorkerRequests& idleWorkerRequests,
-    const DeviceName& preferred_device) {
-    WorkerInferRequest* workerRequestPtr = nullptr;
-    WorkerInferRequest* headWorker = nullptr;
-    bool flag = false;
-    while (idleWorkerRequests.try_pop(workerRequestPtr)) {
-        if (flag && workerRequestPtr == headWorker)
-            break;
-        if (!flag) {
-            headWorker = workerRequestPtr;
-            flag = true;
-        }
-        IdleGuard<NotBusyWorkerRequests> idleGuard{workerRequestPtr, idleWorkerRequests};
-        if (_sharedRequest == workerRequestPtr->_inferRequest._ptr.get()) {
-            _thisWorkerInferRequest = workerRequestPtr;
-            {
-                auto capturedTask = std::move(inferPipelineTask);
+    Pipeline pipeline;
+    struct RequestExecutor : ITaskExecutor {
+        explicit RequestExecutor(InferenceEngine::SoIInferRequestInternal& inferRequest) : _inferRequest(inferRequest) {
+            _inferRequest->SetCallback([this](std::exception_ptr exceptionPtr) mutable {
+                _exceptionPtr = exceptionPtr;
+                auto capturedTask = std::move(_task);
                 capturedTask();
-            }
-            idleGuard.Release();
-            return true;
+            });
         }
-    }
-    return false;
-}
-
-void BinderMultiSchedule::run(IE::Task inferPipelineTask) {
-    if (_thisWorkerInferRequest) {
-        auto capturedTask = std::move(inferPipelineTask);
-        capturedTask();
-    } else {
-        ScheduleToWorkerInferRequest(std::move(inferPipelineTask), _thisPreferredDeviceName);
-    }
+        void run(InferenceEngine::Task task) override {
+            _task = std::move(task);
+            _inferRequest->StartAsync();
+        };
+        InferenceEngine::SoIInferRequestInternal& _inferRequest;
+        std::exception_ptr _exceptionPtr;
+        InferenceEngine::Task _task;
+    };
+    auto requestExecutor = std::make_shared<RequestExecutor>(
+        std::static_pointer_cast<MultiDeviceInferRequest>(syncInferRequest)->GetSharedRequest());
+    pipeline.emplace_back(requestExecutor, [requestExecutor] {
+        if (nullptr != requestExecutor->_exceptionPtr) {
+            std::rethrow_exception(requestExecutor->_exceptionPtr);
+        }
+    });
+    return pipeline;
 }
 
 BinderMultiSchedule::~BinderMultiSchedule() {
@@ -153,7 +53,7 @@ IInferPtr BinderMultiSchedule::CreateInferRequestImpl(
     SoInfer request_to_share_blobs_with;
     // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
     // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
-    for (const auto& device : _multiSContext->_devicePrioritiesInitial) {
+    for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
         auto& dev_requests = _workerRequests[device.deviceName];
         if ((num - sum) < dev_requests.size()) {
             request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
@@ -177,7 +77,7 @@ IInferPtr BinderMultiSchedule::CreateInferRequestImpl(IE::InputsDataMap networkI
     size_t sum = 0;
     // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
     // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
-    for (const auto& device : _multiSContext->_devicePrioritiesInitial) {
+    for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
         auto& dev_requests = _workerRequests[device.deviceName];
         if ((num - sum) < dev_requests.size()) {
             request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
diff --git a/src/plugins/auto/bind_multi_schedule.hpp b/src/plugins/auto/bind_multi_schedule.hpp
index 2217f4fb6d7a9d..7f24af39bac108 100644
--- a/src/plugins/auto/bind_multi_schedule.hpp
+++ b/src/plugins/auto/bind_multi_schedule.hpp
@@ -5,7 +5,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "multi_schedule.hpp"
+#include "auto_schedule.hpp"
 
 #ifdef  MULTIUNITTEST
 #define MOCKTESTMACRO virtual
@@ -15,22 +15,14 @@
 #endif
 
 namespace MultiDevicePlugin {
-class BinderMultiSchedule : public MultiSchedule {
+class BinderMultiSchedule : public AutoSchedule {
 public:
     using Ptr = std::shared_ptr<BinderMultiSchedule>;
     IInferPtr CreateInferRequestImpl(IE::InputsDataMap networkInputs, IE::OutputsDataMap networkOutputs) override;
     IE::IInferRequestInternal::Ptr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
                                                           const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
-    void run(IE::Task inferTask) override;
     void init(const ScheduleContext::Ptr& sContext) override;
     Pipeline GetPipeline(const IInferPtr& syncRequestImpl, WorkerInferRequest** WorkerInferRequest) override;
     virtual ~BinderMultiSchedule();
-
-protected:
-    static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyWorkerRequests& idleWorkerRequests, const DeviceName& preferred_device);
-    bool ScheduleToWorkerInferRequest(IE::Task, DeviceName preferred_device = "") override;
-
-protected:
-    thread_local static IE::IInferRequestInternal*                     _sharedRequest;
 };
 }  // namespace MultiDevicePlugin
diff --git a/src/plugins/auto/common.hpp b/src/plugins/auto/common.hpp
index 1b2fb9a85ca1eb..891e0bebb015cd 100644
--- a/src/plugins/auto/common.hpp
+++ b/src/plugins/auto/common.hpp
@@ -143,7 +143,6 @@ class MultiScheduleContext : public ScheduleContext {
     std::mutex                                     _mutex;
     bool                                           _needPerfCounters;
     bool                                           _batchingDisabled = {false};
-    bool                                           _bindBuffer = false;
     bool                                           _startupfallback = true;
     bool                                           _runtimeFallback = true;
     virtual ~MultiScheduleContext() = default;
@@ -161,6 +160,7 @@ class AutoScheduleContext : public MultiScheduleContext {
     std::mutex                  _confMutex;
     std::mutex                  _fallbackMutex;
     MultiDeviceInferencePlugin* _plugin;
+    SoExecNetwork               _hwExecutableNetwork;
     virtual ~AutoScheduleContext() = default;
 };
 
diff --git a/src/plugins/auto/multi_schedule.cpp b/src/plugins/auto/multi_schedule.cpp
index b2dd7097587b8e..27a841962453da 100644
--- a/src/plugins/auto/multi_schedule.cpp
+++ b/src/plugins/auto/multi_schedule.cpp
@@ -312,7 +312,7 @@ IInferPtr MultiSchedule::CreateInferRequest() {
         if (!so)
             so = _passthroughExeNet._so;
         syncRequestImpl->setPointerToSo(so);
-    } else if (_multiSContext->_bindBuffer) {
+    } else if (std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest()) {
         auto sharedRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
         if (sharedRequest._ptr->getPointerToSo())
             syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo());
diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp
index 65bd8793f71a6a..531af4ef31f346 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@@ -351,19 +351,21 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
     auto loadConfig = _pluginConfig;
     // if no perf hint from user with compiled model, or already been set with plugin
     // apply latency for AUTO, tput for MULTI
-    bool isHintSet = _pluginConfig.is_set_by_user(ov::hint::performance_mode) || config.find(ov::hint::performance_mode.name()) != config.end();
-    if (!isHintSet) {
-        if (workModeAuto) {
-            // set performance hint to 'LATENCY' model for AutoExecutable Network.
-            loadConfig.set_property(ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
-        } else {
-            // set performance hint to 'THROUGHPUT' model for MultiExecutable Network.
-            loadConfig.set_property(ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
-        }
+    auto itorConfig = config.find(ov::hint::performance_mode.name());
+    bool isHintSet = _pluginConfig.is_set_by_user(ov::hint::performance_mode) || itorConfig != config.end();
+    if (!isHintSet && workModeAuto) {
+        // NO user sets perfHint, then set perfhint to 'LATENCY' for AutoExecutableNetwork.
+        loadConfig.set_property(ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
     }
     // updateFromMap will check config valid
     loadConfig.set_user_property(PreProcessConfig(config), workModeAuto);
     loadConfig.apply_user_properties();
+    if (!workModeAuto) {
+        if (itorConfig != config.end() && itorConfig->second != InferenceEngine::PluginConfigParams::THROUGHPUT) {
+            LOG_WARNING_TAG("User set perf_hint:%s, but MULTI supports THROUGHPUT only", itorConfig->second.c_str());
+        }
+        loadConfig.set_property(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
+    }
     auto fullProperty = loadConfig.get_full_properties();
     // this can be updated when plugin switch to 2.0 API
     std::map<std::string, std::string> fullConfig = ConvertToStringMap(fullProperty);
@@ -378,6 +380,8 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
     std::unordered_map<std::string, ov::Any> multiNetworkConfig;
     std::vector<DeviceInformation> metaDevices;
     auto priorities = loadConfig.get_property(ov::device::priorities);
+    if (priorities.empty() && !workModeAuto)
+        IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for " << GetName() << " device";
     if (priorities.find("AUTO") != std::string::npos || priorities.find("MULTI") != std::string::npos) {
         IE_THROW() << "The device candidate list should not include the meta plugin for " << GetName() << " device";
     }
@@ -397,256 +401,104 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
         }
     };
 
-    // if workMode is AUTO
-    // only AUTO uses CheckConfig() to check fullConfig's parameters, MULTI does not
-    if (workModeAuto) {
-        // check the configure and check if need to set PerfCounters configure to device
-        // and set filter configure
-
-        OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl::AutoMode");
-        auto autoSContext = std::make_shared<AutoScheduleContext>();
-        std::map<std::string, std::string> filterConfig;
-        auto strDevices = GetDeviceList(fullConfig);
-        // fill in the context for auto
-        if (loadConfig.get_property(ov::enable_profiling)) {
-            filterConfig.insert({ov::enable_profiling.name(), PluginConfigParams::YES});
-            autoSContext->_needPerfCounters = true;
-        }
-        autoSContext->_modelPriority = MapPriorityValues(loadConfig.get_property(ov::hint::model_priority));
-        autoSContext->_batchingDisabled = !(loadConfig.get_property(ov::hint::allow_auto_batching));
-        autoSContext->_performanceHint = loadConfig.get_property(ov::hint::performance_mode.name()).as<std::string>();
-        // filter the device that supports filter configure
-        auto metaDevices = ParseMetaDevices(strDevices, fullConfig);
-        auto supportDevicesByConfig = FilterDevice(metaDevices, filterConfig);
-        if (supportDevicesByConfig.empty()) {
-             IE_THROW() << "There is no device support the configure";
-        }
-        auto supportDevices = supportDevicesByConfig;
-        CNNNetwork clonedNetwork;
-        std::string clonedModelPath = modelPath;
-        if (modelPath.empty()) {
-            // if network is valid
+    // check the configure and check if need to set PerfCounters configure to device
+    // and set filter configure
+    OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl::AutoMode");
+    auto autoSContext = std::make_shared<AutoScheduleContext>();
+    std::map<std::string, std::string> filterConfig;
+    auto strDevices = GetDeviceList(fullConfig);
+    // fill in the context for auto
+    if (loadConfig.get_property(ov::enable_profiling)) {
+        filterConfig.insert({ov::enable_profiling.name(), PluginConfigParams::YES});
+        autoSContext->_needPerfCounters = true;
+    }
+    autoSContext->_modelPriority = MapPriorityValues(loadConfig.get_property(ov::hint::model_priority));
+    autoSContext->_batchingDisabled = !(loadConfig.get_property(ov::hint::allow_auto_batching));
+    // set performanceHint for AutoExecutableNetwork
+    autoSContext->_performanceHint = loadConfig.get_property(ov::hint::performance_mode.name()).as<std::string>();
+    // filter the device that supports filter configure
+    metaDevices = ParseMetaDevices(strDevices, fullConfig);
+    auto supportDevicesByConfig = FilterDevice(metaDevices, filterConfig);
+    if (supportDevicesByConfig.empty()) {
+        IE_THROW() << "There is no device support the configure";
+    }
+    auto supportDevices = supportDevicesByConfig;
+    CNNNetwork clonedNetwork;
+    std::string clonedModelPath = modelPath;
+    // reset the strDevices to support devices
+    strDevices = "";
+    // calling GetValidDevices() to get a prioritized list of devices
+    bool isCumulative =
+        (autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
+    std::list<DeviceInformation> devicesWithPriority(supportDevices.begin(), supportDevices.end());
+    if (modelPath.empty()) {
+        // if network is valid
+        LOG_INFO_TAG("load with CNN network");
+        supportDevices = FilterDeviceByNetwork(supportDevicesByConfig, network);
+        clonedNetwork = InferenceEngine::details::cloneNetwork(network);
+        // clone the network, in case of reshape conflict
+    } else {
+        // model path, enable model load with single device situation
+        if (supportDevices.size() > 1 && !isCumulative) {
+            clonedNetwork = GetCore()->ReadNetwork(modelPath, std::string());
+            // do we really need to disable model path?
+            clonedModelPath = "";
             LOG_INFO_TAG("load with CNN network");
-            supportDevices = FilterDeviceByNetwork(supportDevicesByConfig, network);
-            // clone the network, in case of reshape conflict
-            clonedNetwork = InferenceEngine::details::cloneNetwork(network);
         } else {
-            // model path, enable model load with single device situation
-            if (supportDevices.size() > 1) {
-                clonedNetwork = GetCore()->ReadNetwork(modelPath, std::string());
-                // do we really need to disable model path?
-                clonedModelPath = "";
-                LOG_INFO_TAG("load with CNN network");
-            } else {
-                LOG_INFO_TAG("load with model path");
-            }
+            LOG_INFO_TAG("load with model path");
         }
-        // reset the strDevices to support devices
-        strDevices = "";
-        // calling GetValidDevices() to get a prioritized list of devices
-        auto devicesWithPriority = GetValidDevice(supportDevices, networkPrecision);
-        for (auto iter = devicesWithPriority.begin(); iter != devicesWithPriority.end(); iter++) {
-            strDevices += iter->deviceName;
-            strDevices += ",";
-        }
-        strDevices.pop_back();
-        for (auto iter = supportDevices.begin(); iter != supportDevices.end(); iter++) {
-            auto& configs = iter->config;
-            for (auto& config : configs) {
-                LOG_INFO_TAG("device:%s, config:%s=%s",
-                             iter->deviceName.c_str(),
-                             config.first.c_str(),
-                             config.second.c_str());
-            }
-            // carry on batch configs only if user explicitly sets
-            if (loadConfig.is_set_by_user(ov::hint::allow_auto_batching))
-                insertPropToConfig(ov::hint::allow_auto_batching.name(), iter->deviceName, configs);
-            if (loadConfig.is_set_by_user(ov::auto_batch_timeout))
-                insertPropToConfig(ov::auto_batch_timeout.name(), iter->deviceName, configs);
-            insertPropToConfig(ov::cache_dir.name(), iter->deviceName, configs);
-            LOG_INFO_TAG("device:%s, priority:%ld", iter->deviceName.c_str(), iter->devicePriority);
-        }
-        autoSContext->_modelPath = clonedModelPath;
-        // clone the network, in case of reshape conflict
-        autoSContext->_network = clonedNetwork;
-        autoSContext->_devicePriorities = supportDevices;
-        autoSContext->_devicePrioritiesInitial = supportDevices;
-        autoSContext->_strDevices = strDevices;
-        autoSContext->_plugin = this;
-        autoSContext->_core = GetCore();
-        autoSContext->_LogTag = _LogTag;
-        autoSContext->_bindBuffer = loadConfig.get_property(ov::intel_auto::device_bind_buffer);
-        autoSContext->_startupfallback = loadConfig.get_property(ov::intel_auto::enable_startup_fallback);
-        autoSContext->_runtimeFallback = loadConfig.get_property(ov::intel_auto::enable_runtime_fallback);
-        return std::make_shared<AutoExecutableNetwork>(autoSContext, std::make_shared<AutoSchedule>());
-    }
-    OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode");
-    // if is cumulative, PERFORMANCE_HINT set to THROUGHPUT and _LogTag set to AUTO
-    auto configIter =
-        std::find_if(fullConfig.begin(), fullConfig.end(), [](const std::pair<std::string, std::string>& config) {
-            return (config.first == CONFIG_KEY(PERFORMANCE_HINT));
-        });
-    if (configIter != fullConfig.end() && configIter->second == InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
-        configIter->second = InferenceEngine::PluginConfigParams::THROUGHPUT;
-        _LogTag = "AUTO";
-        LOG_INFO_TAG("CUMULATIVE Call MULTI PERFORMANCE_HINT set to THROUGHPUT");
     }
-    if (priorities.empty()) {
-        IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for " << GetName() << " device";
-    } else {  // for use case -d MULTI:xPU or -d AUTO:xPU
-        auto metaDevicesByConfig = ParseMetaDevices(priorities, fullConfig);
-        metaDevices = modelPath.empty() ? FilterDeviceByNetwork(metaDevicesByConfig, network)
-                                        : metaDevicesByConfig;
-        if (metaDevicesByConfig.size() != metaDevices.size()) {
-            LOG_DEBUG_TAG("stateful/dynamic model, loaded to single device");
-            multiNetworkConfig[ov::device::priorities.name()]
-                    = metaDevices[0].deviceName;
-        } else {
-            multiNetworkConfig[ov::device::priorities.name()] = priorities;
-        }
+    if (!isCumulative) {
+        devicesWithPriority = GetValidDevice(supportDevices, networkPrecision);
     }
-    auto multiSContext = std::make_shared<MultiScheduleContext>();
-    DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice;
-    std::mutex load_mutex;
-    std::vector<Task> loads;
-
-    auto loadInferEngTask = [&](DeviceInformation& p) {
-        auto tmpiter = fullConfig.find(CONFIG_KEY(ALLOW_AUTO_BATCHING));
-        if (tmpiter != fullConfig.end()) {
-            if (tmpiter->second == PluginConfigParams::NO) {
-                LOG_INFO_TAG("set %s=%s", tmpiter->first.c_str(), tmpiter->second.c_str());
-                multiSContext->_batchingDisabled = true;
-            }
-            if (loadConfig.is_set_by_user(ov::hint::allow_auto_batching))
-                p.config.insert({tmpiter->first, tmpiter->second});
-        }
-        if (loadConfig.is_set_by_user(ov::auto_batch_timeout))
-            insertPropToConfig(ov::auto_batch_timeout.name(), p.deviceName, p.config);
-        insertPropToConfig(ov::cache_dir.name(), p.deviceName, p.config);
-        const auto& deviceName = p.deviceName;
-        const auto& deviceConfig = p.config;
-        SoExecutableNetworkInternal exec_net;
-        LOG_DEBUG_TAG("load network to device:%s", deviceName.c_str());
-        try {
-            if (modelPath.empty()) {
-                exec_net = GetCore()->LoadNetwork(network, deviceName, deviceConfig);
-            } else {
-                exec_net = GetCore()->LoadNetwork(modelPath, deviceName, deviceConfig);
-            }
-        } catch (const IE::Exception& iie) {
-            if (_LogTag == "AUTO") {
-                LOG_DEBUG_TAG("Failed to load network to device:%s with error: %s", deviceName.c_str(), iie.what());
-                return;
-            } else {
-                IE_THROW() << "Failed to load network to device: " << deviceName.c_str() << " with error:" <<
-                    iie.what();
-            }
-        }
-
-        try {
-            std::string sStreamNums = "";
-            std::string sThreadNums = "";
-            if (deviceName.find("CPU") != std::string::npos) {
-                sStreamNums = exec_net->GetMetric(ov::num_streams.name()).as<std::string>();
-                sThreadNums = exec_net->GetMetric(ov::inference_num_threads.name()).as<std::string>();
-            } else if (deviceName.find("GPU") != std::string::npos) {
-                sStreamNums = exec_net->GetConfig(ov::num_streams.name()).as<std::string>();
-                sThreadNums = exec_net->GetConfig(ov::compilation_num_threads.name()).as<std::string>();
-            }
-
-            // print CPU or GPU streams num and threads num
-            if (!sStreamNums.empty() && !sThreadNums.empty()) {
-                LOG_INFO_TAG("after load network, %s streamNums:%s, %s threadNums:%s",
-                             deviceName.c_str(),
-                             sStreamNums.c_str(),
-                             deviceName.c_str(),
-                             sThreadNums.c_str());
-            }
-        } catch (const IE::Exception&) {
-            LOG_DEBUG_TAG("deviceName:%s cannot get streamNums and threadNums from exec_net", deviceName.c_str());
-        }
-        std::unique_lock<std::mutex> lock{load_mutex};
-        executableNetworkPerDevice.insert({deviceName, exec_net});
-        multiNetworkConfig.insert(deviceConfig.begin(), deviceConfig.end());
-    };
-
-    // Check if CPU is in device list
-    auto iterCPU = std::find_if(metaDevices.begin(), metaDevices.end(), [&](DeviceInformation& d) {
-        return d.deviceName.find("CPU") != std::string::npos;
-    });
-    // Load devices other than CPU first
-    for (auto& p : metaDevices) {
-        if (iterCPU != metaDevices.end() && p.deviceName == iterCPU->deviceName) {
-            continue;
-        }
-        loads.push_back([&]() {
-            loadInferEngTask(p);
-        });
-    }
-
-    auto executor = executorManager()->getIdleCPUStreamsExecutor(
-        IStreamsExecutor::Config{"MultiDeviceAsyncLoad",
-                                 static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
-                                 0 /*default threads per stream, workaround for ticket 62376*/,
-                                 IStreamsExecutor::ThreadBindingType::NONE});
-    if (loads.size() > 0) {
-        // Wait for the device to load the network
-        executor->runAndWait(loads);
-        loads.clear();
+    for (auto iter = devicesWithPriority.begin(); iter != devicesWithPriority.end(); iter++) {
+        strDevices += iter->deviceName;
+        strDevices += ",";
     }
-
-    // Finally load the CPU
-    if (iterCPU != metaDevices.end()) {
-        if (!executableNetworkPerDevice.empty() && iterCPU->config.find(ov::affinity.name()) == iterCPU->config.end()) {
-            LOG_DEBUG_TAG("set affinity to NUMA and disable hyper thread for CPU");
-            // If the other devices load successfully and no user set affinity then set NUMA to CPU
-            iterCPU->config.insert({ov::affinity.name(), ov::affinity(ov::Affinity::NUMA).second.as<std::string>()});
-            iterCPU->config.insert({CONFIG_KEY_INTERNAL(ENABLE_HYPER_THREAD), CONFIG_VALUE(NO)});
+    strDevices.pop_back();
+    for (auto iter = supportDevices.begin(); iter != supportDevices.end(); iter++) {
+        auto& configs = iter->config;
+        for (auto& config : configs) {
+            LOG_INFO_TAG("device:%s, config:%s=%s",
+                         iter->deviceName.c_str(),
+                         config.first.c_str(),
+                         config.second.c_str());
         }
-        loads.push_back([&]() {
-            loadInferEngTask(*iterCPU);
-        });
-        // Wait for CPU to load the network
-        executor->runAndWait(loads);
-    }
-
-    if (executableNetworkPerDevice.empty())
-        IE_THROW(NotFound) << "Failed to load network to any device "
-                           <<  "that the " << GetName() << " device is initialized to work with";
-
-    // checking the perf counters config from the loaded network to respect both device's plugin and load-specific setting
-    size_t num_plugins_supporting_perf_counters = 0;
-    for (auto& n : executableNetworkPerDevice) {
-            try {
-                num_plugins_supporting_perf_counters +=
-                        n.second->GetConfig(PluginConfigParams::KEY_PERF_COUNT).as<std::string>() ==
-                        PluginConfigParams::YES;
-            } catch (const IE::Exception&) {
-            }
-    }
-    // MULTI can enable the perf counters only if all  devices support/enable that
-    bool enablePerfCounters = num_plugins_supporting_perf_counters == executableNetworkPerDevice.size();
-    multiSContext->_devicePriorities = metaDevices;
-    multiSContext->_devicePrioritiesInitial = metaDevices;
-    multiSContext->_networksPerDevice = executableNetworkPerDevice;
-    multiSContext->_config = multiNetworkConfig;
-    multiSContext->_needPerfCounters = enablePerfCounters;
-    multiSContext->_core = GetCore();
-    multiSContext->_LogTag = _LogTag;
+        // carry on batch configs only if user explicitly sets
+        if (loadConfig.is_set_by_user(ov::hint::allow_auto_batching))
+            insertPropToConfig(ov::hint::allow_auto_batching.name(), iter->deviceName, configs);
+        if (loadConfig.is_set_by_user(ov::auto_batch_timeout))
+            insertPropToConfig(ov::auto_batch_timeout.name(), iter->deviceName, configs);
+        insertPropToConfig(ov::cache_dir.name(), iter->deviceName, configs);
+        LOG_INFO_TAG("device:%s, priority:%ld", iter->deviceName.c_str(), iter->devicePriority);
+    }
+    autoSContext->_modelPath = clonedModelPath;
+    // clone the network, in case of reshape conflict
+    autoSContext->_network = clonedNetwork;
+    autoSContext->_devicePriorities = supportDevices;
+    autoSContext->_devicePrioritiesInitial = supportDevices;
+    autoSContext->_strDevices = strDevices;
+    autoSContext->_plugin = this;
+    autoSContext->_core = GetCore();
+    autoSContext->_LogTag = _LogTag;
+    autoSContext->_startupfallback = loadConfig.get_property(ov::intel_auto::enable_startup_fallback);
+    autoSContext->_runtimeFallback = loadConfig.get_property(ov::intel_auto::enable_runtime_fallback);
     IExecutableNetworkInternal::Ptr impl;
-    auto tmp = loadConfig.get_property(ov::intel_auto::device_bind_buffer);
-    if (tmp) {
-        multiSContext->_bindBuffer = true;
-        impl = std::make_shared<MultiExecutableNetwork>(multiSContext, std::make_shared<BinderMultiSchedule>());
+    // enable bind only in cumulative_throughput mode
+    if (loadConfig.get_property(ov::intel_auto::device_bind_buffer) &&
+        autoSContext->_performanceHint == "CUMULATIVE_THROUGHPUT") {
+        LOG_INFO_TAG("runtime fallback set to disabled in binder mode");
+        autoSContext->_runtimeFallback = false;
+        impl = std::make_shared<AutoExecutableNetwork>(autoSContext, std::make_shared<BinderMultiSchedule>());
     } else {
-        impl = std::make_shared<MultiExecutableNetwork>(multiSContext, std::make_shared<MultiSchedule>());
+        impl = std::make_shared<AutoExecutableNetwork>(autoSContext, std::make_shared<AutoSchedule>());
     }
     if (!modelPath.empty()) {
         SetExeNetworkInfo(impl,
-                          executableNetworkPerDevice.begin()->second->GetInputsInfo(),
-                          executableNetworkPerDevice.begin()->second->GetOutputsInfo());
-        impl->setInputs(executableNetworkPerDevice.begin()->second->getInputs());
-        impl->setOutputs(executableNetworkPerDevice.begin()->second->getOutputs());
+                          autoSContext->_hwExecutableNetwork->GetInputsInfo(),
+                          autoSContext->_hwExecutableNetwork->GetOutputsInfo());
+        impl->setInputs(autoSContext->_hwExecutableNetwork->getInputs());
+        impl->setOutputs(autoSContext->_hwExecutableNetwork->getOutputs());
     }
     return impl;
 }
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 1db9e1864e161d..af5e0dc09ee045 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -411,12 +411,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_HETERO_OVClassLoadNetworkWithSecondaryPropertiesT
 // IE Class load and check network with ov::device::properties
 INSTANTIATE_TEST_SUITE_P(smoke_CPU_OVClassLoadNetworkAndCheckWithSecondaryPropertiesTest,
                          OVClassLoadNetworkAndCheckSecondaryPropertiesTest,
-                         ::testing::Combine(::testing::Values("CPU", "MULTI:CPU"),
+                         ::testing::Combine(::testing::Values("CPU"),
                                             ::testing::ValuesIn(configsDeviceProperties)));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CPU_OVClassLoadNetworkAndCheckWithSecondaryPropertiesDoubleTest,
                          OVClassLoadNetworkAndCheckSecondaryPropertiesTest,
-                         ::testing::Combine(::testing::Values("CPU", "MULTI:CPU"),
+                         ::testing::Combine(::testing::Values("CPU"),
                                             ::testing::ValuesIn(configsDevicePropertiesDouble)));
 INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassLoadNetworkTest, OVClassLoadNetworkTest,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index 550acd816448c1..5362f5af947a87 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -56,27 +56,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_cpuCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests::getTestCaseName);
 
 const std::vector<ov::AnyMap> multi_setcore_properties = {
-    {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
-     ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)}};
+     ov::hint::model_priority(ov::hint::Priority::HIGH)}};
 const std::vector<ov::AnyMap> multi_compileModel_properties = {
-    {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
-     ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
@@ -88,36 +74,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
 const std::vector<ov::AnyMap> auto_setcore_properties = {
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
 };
 const std::vector<ov::AnyMap> auto_compileModel_properties = {
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
     {ov::device::priorities(CommonTestUtils::DEVICE_CPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(false)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
 INSTANTIATE_TEST_SUITE_P(smoke_AutoCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
                          ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_AUTO),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index 27368152408246..5d62856303f51a 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -281,18 +281,12 @@ namespace {
     const std::vector<std::map<std::string, std::string>> auto_multi_prop_config = {
         {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_CPU},
          {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT},
-         {InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS, InferenceEngine::PluginConfigParams::YES},
-         {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "2"},
-         {InferenceEngine::PluginConfigParams::KEY_ALLOW_AUTO_BATCHING, InferenceEngine::PluginConfigParams::NO},
-         {InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::NO}}};
+         {InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED}}};
 
     const std::vector<std::map<std::string, std::string>> auto_multi_loadNetWork_config = {
         {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_CPU},
-         {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
-         {InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS, InferenceEngine::PluginConfigParams::NO},
-         {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "10"},
-         {InferenceEngine::PluginConfigParams::KEY_ALLOW_AUTO_BATCHING, InferenceEngine::PluginConfigParams::YES},
-         {InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES}}};
+         {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT},
+         {InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH}}};
 
     INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
                              SetPropLoadNetWorkGetPropTests,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 5e246017855e49..a2fe8fcfb6f074 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -93,7 +93,9 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNetWithIncorrectConfig.*)",
         R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*ExecGraphInfo.*)",
         R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*CanCreateTwoExeNetworksAndCheckFunction.*)",
-
+        // TODO: CVS-104942
+        R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*canLoadCorrectNetworkToGetExecutableAndCheckConfig.*)",
+        R"(.*(Auto|Multi).*SetPropLoadNetWorkGetPropTests.*)",
         // CPU does not support dynamic rank
         // Issue: CVS-66778
         R"(.*smoke_BehaviorTests.*InferFullyDynamicNetworkWith(S|G)etTensor.*)",
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index 479141520a742f..e051908f204dc0 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -109,25 +109,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_gpuCompileModelBehaviorTests,
 const std::vector<ov::AnyMap> multi_setcore_properties = {
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
-    {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
-     ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)}};
+     ov::hint::model_priority(ov::hint::Priority::HIGH)}};
 const std::vector<ov::AnyMap> multi_compileModel_properties = {
-    {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
-     ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
@@ -139,36 +125,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
 const std::vector<ov::AnyMap> auto_setcore_properties = {
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::HIGH)},
 };
 const std::vector<ov::AnyMap> auto_compileModel_properties = {
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
-     ov::hint::num_requests(8),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(true)},
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT),
-     ov::hint::num_requests(10),
-     ov::hint::allow_auto_batching(false),
-     ov::enable_profiling(false)},
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
     {ov::device::priorities(CommonTestUtils::DEVICE_GPU),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::num_requests(2),
-     ov::hint::allow_auto_batching(true),
-     ov::enable_profiling(false)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
 INSTANTIATE_TEST_SUITE_P(smoke_AutoCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
                          ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_AUTO),
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index f6dadba34962b0..b68d29ac75834e 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -309,22 +309,16 @@ namespace {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_GPU},
              {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT,
               InferenceEngine::PluginConfigParams::THROUGHPUT},
-             {InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS,
-              InferenceEngine::PluginConfigParams::YES},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "2"},
-             {InferenceEngine::PluginConfigParams::KEY_ALLOW_AUTO_BATCHING, InferenceEngine::PluginConfigParams::NO},
-             {InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::NO}}};
+             {InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY,
+              InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED}}};
     };
 
     auto auto_multi_loadNetWork_config = []() {
         return std::vector<std::map<std::string, std::string>>{
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_GPU},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
-             {InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS,
-              InferenceEngine::PluginConfigParams::NO},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "10"},
-             {InferenceEngine::PluginConfigParams::KEY_ALLOW_AUTO_BATCHING, InferenceEngine::PluginConfigParams::YES},
-             {InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES}}};
+             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT},
+             {InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY,
+              InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH}}};
     };
 
     INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
index e95d69995b66ce..10b67ab10d34e6 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
@@ -106,12 +106,17 @@ TEST_P(MultiDevice_Bind_oversubsciption_test, oversubsciptionOfInferRequest) {
 
 auto device_names_and_support_for_remote_blobs2 = []() {
     return std::vector<DevicesNamseAndProperties>{
+    // another GPU (the test will test its presence), different OCL contexts
+    // use GPU.0 as reference, expect auto to throw exception on other hardware contexts
 #ifdef ENABLE_INTEL_CPU
         //{{CPU}, {}},  // stand-alone CPU via MULTI (no GPU), no OCL context
-        {{CPU}, {ov::intel_auto::device_bind_buffer(true)}},  // stand-alone CPU via MULTI (no GPU), no OCL context
+        {{"GPU.1", CPU},
+         {ov::intel_auto::device_bind_buffer(true)}},
+        {{"GPU.1", CPU},
+         {ov::intel_auto::device_bind_buffer(false)}},
 #endif
-        {{"GPU.1"}, {}},  // another GPU (the test will test its presence), different OCL contexts
-        {{"GPU.1"}, {ov::intel_auto::device_bind_buffer(true)}},  // another GPU (the test will test its presence), different OCL contexts
+        {{"GPU.1"}, {}},
+        {{"GPU.1"}, {ov::intel_auto::device_bind_buffer(true)}},
     };
 };
 
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index df4e53897f4329..9ef080844e9cac 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -124,5 +124,9 @@ std::vector<std::string> disabledTestPatterns() {
 
             // Looks like the test is targeting CPU plugin and doesn't respect that execution graph may vary from plugin to plugin
             R"(.*ExecGraphSerializationTest.*)",
+
+            // TODO: support getconfig in auto/multi CVS-104942
+            // TODO: move auto/multi cases to dedicated unit tests
+            R"(.*(Auto|Multi).*SetPropLoadNetWorkGetPropTests.*)",
     };
 }
diff --git a/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp b/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp
index 15da490b23efe8..e5331582cd9ebb 100644
--- a/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp
@@ -70,10 +70,6 @@ TEST_P(ExecutableNetworkBaseTest, checkGetMetric) {
 
 TEST_P(ExecutableNetworkBaseTest, canLoadCorrectNetworkToGetExecutableAndCheckConfig) {
     auto execNet = ie->LoadNetwork(cnnNet, target_device, configuration);
-    if (target_device == CommonTestUtils::DEVICE_AUTO) {
-        // AUTO executable network didn't support to read any config.
-        GTEST_SKIP();
-    }
     for (const auto& configItem : configuration) {
         InferenceEngine::Parameter param;
         ASSERT_NO_THROW(param = execNet.GetConfig(configItem.first));
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index c6566cac1a0114..fe62869619a780 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -1355,6 +1355,7 @@ TEST_P(OVClassLoadNetworkAndCheckSecondaryPropertiesTest, LoadNetworkAndCheckSec
     ASSERT_TRUE(property.count(ov::num_streams.name()));
     auto actual = property.at(ov::num_streams.name()).as<int32_t>();
     ov::Any value;
+    //AutoExcutableNetwork GetMetric() does not support key ov::num_streams
     OV_ASSERT_NO_THROW(value = model.get_property(ov::num_streams.name()));
     int32_t expect = value.as<int32_t>();
     ASSERT_EQ(actual, expect);
@@ -1382,7 +1383,7 @@ TEST_P(OVClassLoadNetWorkDoNotReturnDefaultHintTest, LoadNetworkDoNotReturnDefau
     if (target_device.find("AUTO") != std::string::npos) {
         ASSERT_NE(value, ov::hint::PerformanceMode::LATENCY);
     } else {
-        ASSERT_NE(value, ov::hint::PerformanceMode::THROUGHPUT);
+        ASSERT_EQ(value, ov::hint::PerformanceMode::THROUGHPUT);
     }
 }
 
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index 6633a562c8507e..c8fc23815f22f4 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -178,8 +178,6 @@ TEST_P(OVSetPropComplieModleGetPropTests, SetPropertyComplieModelGetProperty) {
 
     ov::CompiledModel exeNetWork;
     OV_ASSERT_NO_THROW(exeNetWork = core->compile_model(model, target_device, compileModelProperties));
-    if (target_device == CommonTestUtils::DEVICE_AUTO)
-        GTEST_SKIP();
 
     for (const auto& property_item : compileModelProperties) {
         Any exeNetProperty;
diff --git a/src/tests/functional/plugin/shared/src/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/configuration_tests.cpp
index 4626cb98d8db79..74d37ae30adb11 100644
--- a/src/tests/functional/plugin/shared/src/behavior/plugin/configuration_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/plugin/configuration_tests.cpp
@@ -197,9 +197,8 @@ TEST_P(SetPropLoadNetWorkGetPropTests, SetPropLoadNetWorkGetProperty) {
 
     InferenceEngine::ExecutableNetwork exeNetWork;
     ASSERT_NO_THROW(exeNetWork = ie->LoadNetwork(cnnNet, target_device, loadNetWorkConfig));
-    if (target_device == CommonTestUtils::DEVICE_AUTO)
-        GTEST_SKIP();
 
+    //ie's setConfig and LoadNetwork should not affect each other, for config settings
     for (const auto& property_item : loadNetWorkConfig) {
         InferenceEngine::Parameter exeNetProperty;
         ASSERT_NO_THROW(exeNetProperty = exeNetWork.GetConfig(property_item.first));
@@ -213,5 +212,4 @@ TEST_P(SetPropLoadNetWorkGetPropTests, SetPropLoadNetWorkGetProperty) {
         ASSERT_EQ(property_item.second, property.as<std::string>());
     }
 }
-
 } // namespace BehaviorTestsDefinitions
diff --git a/src/tests/unit/auto/auto_ctput_call_multi.cpp b/src/tests/unit/auto/auto_ctput_call_multi.cpp
index c61db67312557f..afd893ad7d5554 100644
--- a/src/tests/unit/auto/auto_ctput_call_multi.cpp
+++ b/src/tests/unit/auto/auto_ctput_call_multi.cpp
@@ -45,6 +45,11 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
     ov::SoPtr<IExecutableNetworkInternal> cpuMockExeNetwork;
     NiceMock<MockIInferencePlugin>* cpuMockIPlugin;
     std::shared_ptr<InferenceEngine::IInferencePlugin> cpuMockPlugin;
+    // mock gpu exeNetwork
+    std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> gpuMockIExeNet;
+    ov::SoPtr<IExecutableNetworkInternal> gpuMockExeNetwork;
+    NiceMock<MockIInferencePlugin>* gpuMockIPlugin;
+    std::shared_ptr<InferenceEngine::IInferencePlugin> gpuMockPlugin;
     std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternal;
 
 public:
@@ -85,6 +90,16 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
         EXPECT_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
         cpuMockExeNetwork = ov::SoPtr<InferenceEngine::IExecutableNetworkInternal>(cpuMockPlugin->LoadNetwork(CNNNetwork{}, {}), {});
 
+        // prepare gpuMockExeNetwork
+        gpuMockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
+        auto gpuMockIPluginPtr = std::make_shared<NiceMock<MockIInferencePlugin>>();
+        ON_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _))
+            .WillByDefault(Return(gpuMockIExeNet));
+        gpuMockPlugin = gpuMockIPluginPtr;
+        // remove annoying ON CALL message
+        EXPECT_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
+        gpuMockExeNetwork = ov::SoPtr<InferenceEngine::IExecutableNetworkInternal>(gpuMockPlugin->LoadNetwork(CNNNetwork{}, {}), {});
+
         // prepare mockicore and cnnNetwork for loading
         core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
         auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
@@ -95,6 +110,9 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
         ON_CALL(*cpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
         ON_CALL(*cpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
             .WillByDefault(Return("0"));
+        ON_CALL(*gpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
+        ON_CALL(*gpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+            .WillByDefault(Return("0"));
 
         std::vector<std::string> availableDevs = {"CPU", "GPU"};
         ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
@@ -161,58 +179,53 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
                 LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
                             ::testing::Matcher<const std::string&>(StrEq("GPU")),
                             ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
-            .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+            .WillByDefault(Return(gpuMockExeNetwork));
+
         std::shared_ptr<ngraph::Function> simpleNetwork = ngraph::builder::subgraph::makeSingleConv();
         ASSERT_NO_THROW(simpleCnnNetwork = InferenceEngine::CNNNetwork(simpleNetwork));
     }
 };
 
-TEST_P(AutoCTPUTCallMulti, CTPUTDevicesLogicTest) {
+TEST_P(AutoCTPUTCallMulti, CTPUTDeviceLoadFailedNoExceptionThrowTest) {
     std::vector<std::string> targetDevices;
     std::string targetDev;
     bool AutoCallMulti;
     Config config;
     std::tie(AutoCallMulti, targetDevices) = this->GetParam();
+    std::string loadFailedDevice = targetDevices.size() > 0 ? targetDevices[0] : "";
+    std::string secondDevice = targetDevices.size() > 1 ? targetDevices[1] : "";
     plugin->SetName("MULTI");
     for (auto& deviceName : targetDevices) {
         targetDev += deviceName;
         targetDev += ((deviceName == targetDevices.back()) ? "" : ",");
     }
-    if (AutoCallMulti) {
-        std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
-        config.insert({{CONFIG_KEY(PERFORMANCE_HINT), InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT}});
-        config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
+    config.insert({{CONFIG_KEY(PERFORMANCE_HINT), InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT}});
+    config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
+    ON_CALL(*core,
+            LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(StrEq(loadFailedDevice)),
+                        ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
+        .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+    if (loadFailedDevice != CommonTestUtils::DEVICE_CPU) {
         EXPECT_CALL(*core,
                     LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
                                 ::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
             .Times(1);
+    }
+    if (loadFailedDevice != CommonTestUtils::DEVICE_GPU) {
         EXPECT_CALL(*core,
                     LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
                                 ::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_GPU),
                                 ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
             .Times(1);
-        ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
-        EXPECT_EQ(exeNetwork->GetMetric(ov::execution_devices.name()).as<std::string>(), CommonTestUtils::DEVICE_CPU);
-    } else {
-        config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
-        EXPECT_CALL(*core,
-                    LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
-                                ::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_CPU),
-                                ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
-            .Times(0);
-        EXPECT_CALL(*core,
-                    LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
-                                ::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_GPU),
-                                ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
-            .Times(1);
-        EXPECT_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), IE::Exception);
     }
+    ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
+    EXPECT_EQ(exeNetwork->GetMetric(ov::execution_devices.name()).as<std::string>(), secondDevice);
 }
 
 const std::vector<ConfigParams> testConfigs = {
-    ConfigParams{false, {"CPU", "GPU"}},
-    ConfigParams{false, {"GPU", "CPU"}},
     ConfigParams{true, {"CPU", "GPU"}},
     ConfigParams{true, {"GPU", "CPU"}},
 };
diff --git a/src/tests/unit/auto/auto_default_perf_hint_test.cpp b/src/tests/unit/auto/auto_default_perf_hint_test.cpp
index a213d31df9f532..0657f4fc4848a5 100644
--- a/src/tests/unit/auto/auto_default_perf_hint_test.cpp
+++ b/src/tests/unit/auto/auto_default_perf_hint_test.cpp
@@ -180,13 +180,13 @@ class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
         testConfigs.push_back(ConfigParams{
             "MULTI:CPU,GPU",
             {"CPU", "GPU"},
-            {{"DEVICE_PROPERTIES", "{GPU:{PERFORMANCE_HINT:LATENCY}}"},
-             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get perf_hint:latency
+            {{"DEVICE_PROPERTIES", "{GPU:{PERFORMANCE_HINT:THROUGHPUT}}"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get perf_hint:tput
         testConfigs.push_back(ConfigParams{
             "MULTI:CPU,GPU",
             {"CPU", "GPU"},
-            {{"DEVICE_PROPERTIES", "{CPU:{PERFORMANCE_HINT:LATENCY},GPU:{PERFORMANCE_HINT:LATENCY}}"},
-             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get perf_hint:lantency  GPU: get perf_hint:lantency
+            {{"DEVICE_PROPERTIES", "{CPU:{PERFORMANCE_HINT:THROUGHPUT},GPU:{PERFORMANCE_HINT:THROUGHPUT}}"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get perf_hint:tput  GPU: get perf_hint:tput
         return testConfigs;
     }
 
@@ -221,17 +221,17 @@ class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
             "MULTI:CPU,GPU",
             {"CPU", "GPU"},
             {{"CPU", "{ALLOW_AUTO_BATCHING:FALSE}"},
-             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint  GPU: get default_hint:tput
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get default_hint:tput
         testConfigs.push_back(ConfigParams{
             "MULTI:CPU,GPU",
             {"CPU", "GPU"},
             {{"DEVICE_PROPERTIES", "GPU:{ALLOW_AUTO_BATCHING:FALSE}}"},
-             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: no perf_hint
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get default_hint:tput
         testConfigs.push_back(ConfigParams{
             "MULTI:CPU,GPU",
             {"CPU", "GPU"},
             {{"DEVICE_PROPERTIES", "CPU:{ALLOW_AUTO_BATCHING:TRUE},GPU:{ALLOW_AUTO_BATCHING:FALSE}}"},
-             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint GPU: no perf_hint
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint GPU: get default_hint:tput
         return testConfigs;
     }
 
@@ -499,11 +499,6 @@ TEST_P(SecPropAndDefaultPerfHintMockTest, SecPropAndDefaultPerfHintTest) {
             // HW default perf_hint
             HW_PerfHint = bIsAuto ? "LATENCY" : "THROUGHPUT";
         }
-        auto item = config.find(deviceName);
-        if (item != config.end() && !isCPUHelper) {
-            // do not pass default perf_hint to HW
-            HW_PerfHint = "No PERFORMANCE_HINT";
-        }
 
         EXPECT_CALL(
             *core,

From 66ea57adddb82c184213c9db949b0a8141ddea5e Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Fri, 31 Mar 2023 19:55:55 +0200
Subject: [PATCH 201/296] Move memory tests from core to template plugin tests
 (#16460)

* Move memory tests from core to template plugin tests

* Rewrite tests to use template plugin

* Don't clone model in INTExecutable

* Add reset and modify tests

* Delete old test

* Fix clang-format

* Fix VariableState::set_state

* Enable and add var modify tests

* Fix INTExecutable

* Apply comments
---
 src/core/tests/CMakeLists.txt                 |   7 -
 src/core/tests/op_eval/memory.cpp             | 296 -------
 src/plugins/template/backend/executable.hpp   |   4 +
 .../template/backend/int_executable.cpp       |   4 +
 .../template/backend/int_executable.hpp       |   2 +
 .../template/src/sync_infer_request.cpp       |   3 +-
 src/plugins/template/src/variable_state.hpp   |   9 +-
 .../tests/functional/op_reference/memory.cpp  | 823 +++++++++++++++++-
 8 files changed, 841 insertions(+), 307 deletions(-)
 delete mode 100644 src/core/tests/op_eval/memory.cpp

diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt
index a2ccb021abb139..829f1822a21488 100644
--- a/src/core/tests/CMakeLists.txt
+++ b/src/core/tests/CMakeLists.txt
@@ -27,13 +27,6 @@ list(APPEND UNIT_TESTS_DEPENDENCIES openvino_template_extension)
 list(APPEND UNIT_TESTS_DEPENDENCIES template_extension)
 
 list(APPEND EXCLUDE_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/dnnl.cpp)
-if (ENABLE_TEMPLATE)
-    list(APPEND UNIT_TESTS_DEPENDENCIES openvino_template_plugin)
-else()
-    list(APPEND EXCLUDE_TESTS
-        # It should be a part of template plugin
-        ${CMAKE_CURRENT_SOURCE_DIR}/op_eval/memory.cpp)
-endif()
 
 ov_add_test_target(
     NAME ${TARGET_NAME}
diff --git a/src/core/tests/op_eval/memory.cpp b/src/core/tests/op_eval/memory.cpp
deleted file mode 100644
index 81ca0f0ce7fa3a..00000000000000
--- a/src/core/tests/op_eval/memory.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <string>
-#include <vector>
-
-#include "engines_util/execute_tools.hpp"
-#include "gtest/gtest.h"
-#include "ngraph/op/util/variable.hpp"
-#include "ngraph/op/util/variable_context.hpp"
-#include "ngraph/opsets/opset7.hpp"
-#include "ngraph/util.hpp"
-#include "ngraph/validation_util.hpp"
-#include "util/all_close_f.hpp"
-#include "util/test_tools.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace ngraph::opset7;
-
-OPENVINO_SUPPRESS_DEPRECATED_START
-
-shared_ptr<ngraph::Function> AssignReadGraph() {
-    auto p = make_shared<op::Parameter>(element::f32, Shape{3});
-    auto variable = make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "var_1"});
-    auto read_value = make_shared<ReadValue>(p, variable);
-    auto assign = make_shared<Assign>(read_value, variable);
-    return make_shared<Function>(OutputVector{assign}, ParameterVector{p}, VariableVector{variable});
-}
-
-shared_ptr<ngraph::Function> AssignReadAddGraph() {
-    auto p = make_shared<op::Parameter>(element::f32, Shape{3});
-    auto c = std::make_shared<Constant>(element::f32, Shape{3}, std::vector<float>({0, 0, 0}));
-    auto variable = make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "var_1"});
-    auto read_value = make_shared<ReadValue>(c, variable);
-    auto add = make_shared<Add>(p, read_value);
-    auto assign = make_shared<Assign>(add, variable);
-    return make_shared<Function>(OutputVector{assign}, ParameterVector{p}, VariableVector{variable});
-}
-
-shared_ptr<ngraph::Function> AssignReadMultiVariableGraph() {
-    auto c = std::make_shared<Constant>(element::f32, Shape{3}, std::vector<float>({0, 0, 0}));
-
-    auto variable = make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "var_1"});
-    auto variable_2 = make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "var_2"});
-
-    auto read_value = make_shared<ReadValue>(c, variable);
-    auto read_value_2 = make_shared<ReadValue>(c, variable_2);
-
-    auto add = make_shared<Add>(read_value_2, read_value);
-
-    auto assign = make_shared<Assign>(add, variable);
-    auto assign_2 = make_shared<Assign>(read_value_2, variable_2);
-
-    return make_shared<Function>(OutputVector{assign}, ParameterVector{}, VariableVector{variable, variable_2});
-}
-
-TEST(op_eval, assign_readvalue_without_evaluation_context) {
-    auto fun = AssignReadGraph();
-    auto result = make_shared<HostTensor>();
-
-    const int COUNT_RUNS = 10;
-    std::vector<float> inputs{-5, 0, 5};
-    std::vector<float> expected_result{0, 0, 0};
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_evaluation_context) {
-    auto fun = AssignReadGraph();
-    auto result = make_shared<HostTensor>();
-    const auto& variables = fun->get_variables();
-    EXPECT_EQ(variables.size(), 1);
-
-    std::vector<float> inputs{-5, 0, 5};
-    std::vector<float> expected_result{0, 0, 0};
-
-    EvaluationContext eval_context;
-    HostTensorPtr h_tensor = make_host_tensor<element::Type_t::f32>(Shape{3}, inputs);
-    VariableContext variable_context;
-    variable_context.set_variable_value(variables[0], std::make_shared<VariableValue>(h_tensor));
-    eval_context.emplace("VariableContext", variable_context);
-
-    const int COUNT_RUNS = 10;
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_add) {
-    auto fun = AssignReadAddGraph();
-    const auto& variables = fun->get_variables();
-    EXPECT_EQ(variables.size(), 1);
-
-    std::vector<float> inputs{-5, 0, 5};
-
-    // creating context
-    EvaluationContext eval_context;
-    auto variable_context = VariableContext();
-    auto variable_value = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs));
-    variable_context.set_variable_value(variables[0], variable_value);
-    eval_context.emplace("VariableContext", variable_context);
-
-    auto result = make_shared<HostTensor>();
-    const int COUNT_RUNS = 10;
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-        auto result_data = read_vector<float>(result);
-
-        auto cnt = static_cast<float>(i + 1);
-        std::vector<float> expected_result{inputs[0] * cnt, inputs[1] * cnt, inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_reset_before_evaluate) {
-    auto fun = AssignReadAddGraph();
-    const auto& variables = fun->get_variables();
-    EXPECT_EQ(variables.size(), 1);
-
-    std::vector<float> inputs{-5, 0, 5};
-
-    // creating context
-    EvaluationContext eval_context;
-    auto variable_context = VariableContext();
-    auto variable_value = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs));
-    variable_value->set_reset(false);
-    variable_context.set_variable_value(variables[0], variable_value);
-    eval_context.emplace("VariableContext", variable_context);
-
-    auto result = make_shared<HostTensor>();
-    const int COUNT_RUNS = 10;
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-        auto result_data = read_vector<float>(result);
-
-        auto cnt = static_cast<float>(i + 2);
-        std::vector<float> expected_result{inputs[0] * cnt, inputs[1] * cnt, inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_add_reset) {
-    auto fun = AssignReadAddGraph();
-    std::vector<float> inputs{-5, 0, 5};
-    const auto& variables = fun->get_variables();
-    EXPECT_EQ(variables.size(), 1);
-
-    // creating a Context
-    EvaluationContext eval_context;
-    auto variable_context = VariableContext();
-    auto variable_value = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs));
-    variable_context.set_variable_value(variables[0], variable_value);
-    eval_context.emplace("VariableContext", variable_context);
-
-    auto result = make_shared<HostTensor>();
-    const int COUNT_RUNS = 10;
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-        auto result_data = read_vector<float>(result);
-
-        auto cnt = static_cast<float>(i + 1);
-        std::vector<float> expected_result{inputs[0] * cnt, inputs[1] * cnt, inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-
-    const auto& found_context = eval_context.find("VariableContext");
-    EXPECT_NE(found_context, eval_context.end());
-    found_context->second.as<VariableContext>().reset_variable_context();
-
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-        auto result_data = read_vector<float>(result);
-
-        auto cnt = static_cast<float>(i + 1);
-        std::vector<float> expected_result{inputs[0] * cnt, inputs[1] * cnt, inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_add_modify) {
-    auto fun = AssignReadAddGraph();
-    std::vector<float> inputs{-5, 0, 5};
-    const auto& variables = fun->get_variables();
-    EXPECT_EQ(variables.size(), 1);
-
-    // creating context
-    EvaluationContext eval_context;
-    auto variable_context = VariableContext();
-    auto variable_value = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs));
-    variable_context.set_variable_value(variables[0], variable_value);
-    eval_context.emplace("VariableContext", variable_context);
-
-    auto result = make_shared<HostTensor>();
-    const int COUNT_RUNS = 10;
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        auto cnt = static_cast<float>(i + 1);
-        std::vector<float> expected_result{inputs[0] * cnt, inputs[1] * cnt, inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-
-    const auto& found_context = eval_context.find("VariableContext");
-    EXPECT_NE(found_context, eval_context.end());
-    const auto& var_value = found_context->second.as<VariableContext>().get_variable_value(variables[0]);
-    EXPECT_NE(var_value, nullptr);
-    var_value->set_value(make_host_tensor<element::Type_t::f32>(Shape{3}, {1, 2, 3}));
-
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {make_host_tensor<element::Type_t::f32>(Shape{3}, inputs)}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        auto cnt = static_cast<float>(i + 1);
-        std::vector<float> expected_result{1 + inputs[0] * cnt, 2 + inputs[1] * cnt, 3 + inputs[2] * cnt};
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
-
-TEST(op_eval, assign_readvalue_add_modify_multi_variables) {
-    auto fun = AssignReadMultiVariableGraph();
-    std::vector<float> inputs_1{2, 2, 2};
-    std::vector<float> inputs_2{1, 3, 5};
-    auto var_1 = fun->get_variable_by_id("var_1");
-    auto var_2 = fun->get_variable_by_id("var_2");
-    EXPECT_NE(var_1, nullptr);
-    EXPECT_NE(var_2, nullptr);
-
-    // creating context
-    EvaluationContext eval_context;
-    auto variable_context = VariableContext();
-    auto variable_value_1 = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs_1));
-    auto variable_value_2 = make_shared<VariableValue>(make_host_tensor<element::Type_t::f32>(Shape{3}, inputs_2));
-    variable_value_1->set_reset(false);
-    variable_value_2->set_reset(false);
-    variable_context.set_variable_value(var_1, variable_value_1);
-    variable_context.set_variable_value(var_2, variable_value_2);
-    eval_context.emplace("VariableContext", variable_context);
-
-    auto result = make_shared<HostTensor>();
-    const int COUNT_RUNS = 10;
-
-    std::vector<float> expected_result = inputs_1;
-    for (size_t i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        for (size_t j = 0; j < expected_result.size(); ++j) {
-            expected_result[j] += inputs_2[j];
-        }
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-
-    const auto& found_context = eval_context.find("VariableContext");
-    EXPECT_NE(found_context, eval_context.end());
-    auto var_context = found_context->second.as<VariableContext>();
-
-    auto var_value = variable_context.get_variable_value(var_1);
-    EXPECT_NE(var_value, nullptr);
-    var_value->set_value(make_host_tensor<element::Type_t::f32>(Shape{3}, {1, 2, 3}));
-
-    auto var_value_2 = variable_context.get_variable_value(var_2);
-    EXPECT_NE(var_value_2, nullptr);
-    var_value_2->set_reset(true);
-
-    expected_result = {1, 2, 3};
-    for (int i = 0; i < COUNT_RUNS; ++i) {
-        ASSERT_TRUE(fun->evaluate({result}, {}, eval_context));
-        EXPECT_EQ(result->get_element_type(), element::f32);
-        EXPECT_EQ(result->get_shape(), Shape{3});
-
-        ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_result));
-    }
-}
diff --git a/src/plugins/template/backend/executable.hpp b/src/plugins/template/backend/executable.hpp
index 0794488c334c3b..c9c60be9606f86 100644
--- a/src/plugins/template/backend/executable.hpp
+++ b/src/plugins/template/backend/executable.hpp
@@ -51,6 +51,10 @@ class Executable {
     /// \returns an ngraph::ResultVector of all input parameters
     const ov::ResultVector& get_results() const;
 
+    /// \brief Query the internal model
+    /// \returns model which is used inside executable
+    virtual std::shared_ptr<ov::Model> get_model() const = 0;
+
     /// \brief Create an input Tensor
     /// \param input_index The index position in the input Parameter vector. This would be the same
     /// order of Parameters passed into the inputs in the call() method.
diff --git a/src/plugins/template/backend/int_executable.cpp b/src/plugins/template/backend/int_executable.cpp
index 49253aec58f379..6091a62e3f0973 100644
--- a/src/plugins/template/backend/int_executable.cpp
+++ b/src/plugins/template/backend/int_executable.cpp
@@ -274,3 +274,7 @@ bool ov::runtime::interpreter::INTExecutable::evaluate_node(const std::shared_pt
     }
     return res;
 }
+
+std::shared_ptr<ov::Model> ov::runtime::interpreter::INTExecutable::get_model() const {
+    return m_model;
+}
diff --git a/src/plugins/template/backend/int_executable.hpp b/src/plugins/template/backend/int_executable.hpp
index 2610a82ee23e4e..8f2561811f16ce 100644
--- a/src/plugins/template/backend/int_executable.hpp
+++ b/src/plugins/template/backend/int_executable.hpp
@@ -41,6 +41,8 @@ class INTExecutable : public Executable {
 
     std::vector<ov::Tensor> create_output_tensor(size_t output_index, size_t pipeline_depth) override;
 
+    std::shared_ptr<ov::Model> get_model() const override;
+
 protected:
     std::shared_ptr<ov::op::v0::Parameter> get_parameter(size_t index) const;
     std::shared_ptr<ov::op::v0::Result> get_result(size_t index) const;
diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp
index 7a274cf94b3610..070518a56e7116 100644
--- a/src/plugins/template/src/sync_infer_request.cpp
+++ b/src/plugins/template/src/sync_infer_request.cpp
@@ -79,8 +79,7 @@ ov::template_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::
 
     // Save variable states
     ov::op::util::VariableContext variable_context;
-    for (const auto& variable : get_template_model()->m_model->get_variables()) {
-        auto value = std::make_shared<ov::op::util::VariableValue>();
+    for (const auto& variable : m_executable->get_model()->get_variables()) {
         if (!variable_context.get_variable_value(variable)) {
             auto shape = variable->get_info().data_shape.is_dynamic() ? ov::Shape{0}
                                                                       : variable->get_info().data_shape.to_shape();
diff --git a/src/plugins/template/src/variable_state.hpp b/src/plugins/template/src/variable_state.hpp
index c95fec677f96cc..2dfe4111010feb 100644
--- a/src/plugins/template/src/variable_state.hpp
+++ b/src/plugins/template/src/variable_state.hpp
@@ -12,8 +12,15 @@ namespace template_plugin {
 class VariableState : public ov::IVariableState {
 public:
     VariableState(const std::string& name, const ov::Tensor& tensor) : ov::IVariableState(name) {
-        set_state(tensor);
+        m_state = tensor;
     }
+    void set_state(const ov::Tensor& state) override {
+        OPENVINO_ASSERT(state.get_shape() == m_state.get_shape(), "Wrong tensor shape.");
+        OPENVINO_ASSERT(state.get_element_type() == state.get_element_type(), "Wrong tensor type.");
+        OPENVINO_ASSERT(state.get_byte_size() == state.get_byte_size(), "Blob size of tensors are not equal.");
+        std::memcpy(m_state.data(), state.data(), state.get_byte_size());
+    }
+
     void reset() override {
         std::memset(m_state.data(), 0, m_state.get_byte_size());
     }
diff --git a/src/plugins/template/tests/functional/op_reference/memory.cpp b/src/plugins/template/tests/functional/op_reference/memory.cpp
index 0fca42b69405af..f0492964cab1ce 100644
--- a/src/plugins/template/tests/functional/op_reference/memory.cpp
+++ b/src/plugins/template/tests/functional/op_reference/memory.cpp
@@ -5,6 +5,8 @@
 #include <gtest/gtest.h>
 
 #include "base_reference_test.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
+#include "openvino/op/add.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/read_value.hpp"
 #include "openvino/op/util/variable.hpp"
@@ -107,10 +109,20 @@ class ReferenceReadValueAssignV6LayerTest : public testing::TestWithParam<ReadVa
 
 TEST_P(ReferenceReadValueAssignV3LayerTest, ReadValueAssignWithHardcodedRefs) {
     Exec();
+    const int COUNT_RUNS = 10;
+    for (int i = 0; i < COUNT_RUNS; ++i) {
+        Infer();
+        Validate();
+    }
 }
 
 TEST_P(ReferenceReadValueAssignV6LayerTest, ReadValueAssignWithHardcodedRefs) {
     Exec();
+    const int COUNT_RUNS = 10;
+    for (int i = 0; i < COUNT_RUNS; ++i) {
+        Infer();
+        Validate();
+    }
 }
 
 template <element::Type_t IN_ET>
@@ -141,7 +153,13 @@ std::vector<ReadValueAssignParams> generateParamsForReadValueAssignBoolean() {
     using T = typename element_type_traits<IN_ET>::value_type;
 
     std::vector<ReadValueAssignParams> params{
-        ReadValueAssignParams(ov::Shape{1}, ov::Shape{1}, IN_ET, IN_ET, std::vector<T>{true}, std::vector<T>{true}, "v0"),
+        ReadValueAssignParams(ov::Shape{1},
+                              ov::Shape{1},
+                              IN_ET,
+                              IN_ET,
+                              std::vector<T>{true},
+                              std::vector<T>{true},
+                              "v0"),
         ReadValueAssignParams(ov::Shape{2, 2},
                               ov::Shape{2, 2},
                               IN_ET,
@@ -197,3 +215,806 @@ INSTANTIATE_TEST_SUITE_P(smoke_ReadValue_Assign_With_Hardcoded_Refs,
                          ReferenceReadValueAssignV6LayerTest::getTestCaseName);
 
 }  // namespace
+
+namespace {
+struct MemoryTestParams {
+    template <class IT>
+    MemoryTestParams(const ov::Shape& input_shape,
+                     const ov::Shape& output_shape,
+                     const ov::element::Type& input_type,
+                     const ov::element::Type& ouput_type,
+                     const std::vector<IT>& input_values,
+                     const std::vector<std::vector<IT>>& output_values,
+                     const size_t& count_runs,
+                     const std::vector<std::string>& variable_id,
+                     const size_t& reset_on_run = 0)
+        : m_input_shape(input_shape),
+          m_output_shape(output_shape),
+          m_input_type(input_type),
+          m_output_type(ouput_type),
+          m_input_data(reference_tests::CreateTensor(input_shape, input_type, input_values)),
+          m_expected_data(reference_tests::CreateTensor(output_shape, ouput_type, output_values[0])),
+          m_variable_id(variable_id),
+          m_count_runs(count_runs),
+          m_reset_on_run(reset_on_run) {
+        for (size_t i = 0; i < m_count_runs; i++) {
+            m_expected_data_vector.push_back(reference_tests::CreateTensor(output_shape, ouput_type, output_values[i]));
+        }
+    }
+    ov::Shape m_input_shape;
+    ov::Shape m_output_shape;
+    ov::element::Type m_input_type;
+    ov::element::Type m_output_type;
+    ov::runtime::Tensor m_input_data;
+    ov::runtime::Tensor m_expected_data;
+    std::vector<std::string> m_variable_id;
+    size_t m_count_runs;
+    size_t m_reset_on_run;
+    std::vector<ov::Tensor> m_expected_data_vector;
+};
+
+class ReferenceMemoryTest : public testing::TestWithParam<MemoryTestParams> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<MemoryTestParams>& obj) {
+        auto params = obj.param;
+        std::ostringstream result;
+        result << "shape=" << params.m_input_shape << "_";
+        result << "iType=" << params.m_input_type << "_";
+        result << "shape=" << params.m_output_shape << "_";
+        result << "oType=" << params.m_output_type;
+        return result.str();
+    }
+
+protected:
+    const std::string targetDevice;
+    std::shared_ptr<ov::Core> core;
+    std::shared_ptr<ov::Model> function;
+    ov::CompiledModel executableNetwork;
+    ov::InferRequest inferRequest;
+
+    ReferenceMemoryTest() : targetDevice("TEMPLATE"), function(), executableNetwork(), inferRequest() {
+        core = ov::test::utils::PluginCache::get().core(targetDevice);
+    };
+
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.m_input_shape, params.m_input_type, params.m_variable_id);
+        executableNetwork = core->compile_model(function, targetDevice);
+        inferRequest = executableNetwork.create_infer_request();
+    }
+
+    void CommonTestSteps(const std::function<void(size_t, ov::InferRequest&)>& custom_step = nullptr) {
+        auto params = GetParam();
+
+        const auto& functionParams = function->get_parameters();
+        inferRequest.set_tensor(executableNetwork.input(0), params.m_input_data);
+        for (size_t i = 0; i < params.m_count_runs; ++i) {
+            if (custom_step) {
+                custom_step(i, inferRequest);
+            }
+            inferRequest.infer();
+            auto actualOutData = inferRequest.get_tensor(executableNetwork.output(0));
+            reference_tests::CommonReferenceTest::ValidateBlobs(params.m_expected_data_vector[i],
+                                                                actualOutData,
+                                                                i,
+                                                                1e-2f,
+                                                                -1.f,
+                                                                0);
+        }
+    }
+
+    virtual std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                                      const ov::element::Type& input_type,
+                                                      const std::vector<std::string>& variable_id) = 0;
+};
+
+std::shared_ptr<ov::Model> CreateFunction_ReadValueAssingAdd(const ov::Shape& input_shape,
+                                                             const ov::element::Type& input_type,
+                                                             const std::vector<std::string>& variable_id) {
+    auto in = std::make_shared<ov::op::v0::Parameter>(input_type, input_shape);
+    auto c = std::make_shared<ov::op::v0::Constant>(input_type, input_shape, 0);
+    auto variable = std::make_shared<ov::op::util::Variable>(
+        ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id[0]});
+    auto read_value = std::make_shared<ov::op::v6::ReadValue>(c, variable);
+    auto add = std::make_shared<ov::op::v1::Add>(in, read_value);
+    auto assign = std::make_shared<ov::op::v6::Assign>(add, variable);
+    return std::make_shared<ov::Model>(ov::OutputVector{assign},
+                                       ov::ParameterVector{in},
+                                       ov::op::util::VariableVector{variable});
+}
+
+std::shared_ptr<ov::Model> CreateFunction_ReadValueAssingAddMultiVariable(const ov::Shape& input_shape,
+                                                                          const ov::element::Type& input_type,
+                                                                          const std::vector<std::string>& variable_id) {
+    auto in = std::make_shared<ov::op::v0::Parameter>(input_type, input_shape);
+    auto variable1 = std::make_shared<ov::op::util::Variable>(
+        ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id[0]});
+    auto variable2 = std::make_shared<ov::op::util::Variable>(
+        ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id[1]});
+    auto read_value1 = std::make_shared<ov::op::v6::ReadValue>(in, variable1);
+    auto read_value2 = std::make_shared<ov::op::v6::ReadValue>(in, variable2);
+    auto add1 = std::make_shared<ov::op::v1::Add>(read_value1, read_value2);
+    auto add2 = std::make_shared<ov::op::v1::Add>(in, add1);
+    auto assign1 = std::make_shared<ov::op::v6::Assign>(add2, variable1);
+    auto assign2 = std::make_shared<ov::op::v6::Assign>(read_value2, variable2);
+    return std::make_shared<ov::Model>(ov::OutputVector{assign1},
+                                       ov::SinkVector{assign2},
+                                       ov::ParameterVector{in},
+                                       ov::op::util::VariableVector{variable1, variable2});
+}
+
+class ReferenceReadValueAssignAddLayerTest : public ReferenceMemoryTest {
+protected:
+    std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                              const ov::element::Type& input_type,
+                                              const std::vector<std::string>& variable_id) override {
+        return CreateFunction_ReadValueAssingAdd(input_shape, input_type, variable_id);
+    }
+};
+
+TEST_P(ReferenceReadValueAssignAddLayerTest, MemoryWithHardcodedRefs) {
+    CommonTestSteps();
+}
+
+template <ov::element::Type_t IN_ET>
+std::vector<MemoryTestParams> generateParamsForReadValueAssignAdd() {
+    using T = typename ov::element_type_traits<IN_ET>::value_type;
+    size_t count_runs = 10;
+
+    std::vector<T> first_result_shape1 = {1};
+    std::vector<T> first_result_shape22 = {1, 2, 3, 4};
+    std::vector<T> first_result_shape123 = {1, 2, 3, 4, 5, 6};
+
+    std::vector<T> new_result_shape1(1, T(0));
+    std::vector<T> new_result_shape22(4, T(0));
+    std::vector<T> new_result_shape123(6, T(0));
+
+    std::vector<std::vector<T>> result_shape1;
+    std::vector<std::vector<T>> result_shape22;
+    std::vector<std::vector<T>> result_shape123;
+
+    for (size_t i = 0; i < count_runs; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::vector<MemoryTestParams> params{MemoryTestParams(ov::Shape{1},
+                                                          ov::Shape{1},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1},
+                                                          result_shape1,
+                                                          count_runs,
+                                                          {"v0"}),
+                                         MemoryTestParams(ov::Shape{2, 2},
+                                                          ov::Shape{2, 2},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4},
+                                                          result_shape22,
+                                                          count_runs,
+                                                          {"v0"}),
+                                         MemoryTestParams(ov::Shape{1, 2, 3},
+                                                          ov::Shape{1, 2, 3},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4, 5, 6},
+                                                          result_shape123,
+                                                          count_runs,
+                                                          {"v0"})};
+    return params;
+}
+
+std::vector<MemoryTestParams> generateCombinedParamsForReadValueAssignAdd() {
+    const std::vector<std::vector<MemoryTestParams>> allTypeParams{
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::f32>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::f16>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::bf16>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::i64>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::i32>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::i16>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::i8>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::u64>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::u32>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::u16>(),
+        generateParamsForReadValueAssignAdd<ov::element::Type_t::u8>()};
+
+    std::vector<MemoryTestParams> combinedParams;
+
+    for (const auto& params : allTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Memory_With_Hardcoded_Refs,
+                         ReferenceReadValueAssignAddLayerTest,
+                         ::testing::ValuesIn(generateCombinedParamsForReadValueAssignAdd()),
+                         ReferenceReadValueAssignAddLayerTest::getTestCaseName);
+
+class ReferenceReadValueAssignAddMultiVariableLayerTest : public ReferenceMemoryTest {
+protected:
+    std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                              const ov::element::Type& input_type,
+                                              const std::vector<std::string>& variable_id) override {
+        return CreateFunction_ReadValueAssingAddMultiVariable(input_shape, input_type, variable_id);
+    }
+};
+
+TEST_P(ReferenceReadValueAssignAddMultiVariableLayerTest, MemoryWithHardcodedRefs) {
+    CommonTestSteps();
+}
+
+template <ov::element::Type_t IN_ET>
+std::vector<MemoryTestParams> ReadValueAssignAddMultiVariableLayer() {
+    using T = typename ov::element_type_traits<IN_ET>::value_type;
+    size_t count_runs = 10;
+
+    std::vector<T> first_result_shape1 = {1};
+    std::vector<T> first_result_shape22 = {1, 2, 3, 4};
+    std::vector<T> first_result_shape123 = {1, 2, 3, 4, 5, 6};
+
+    std::vector<T> new_result_shape1(1, T(0));
+    std::vector<T> new_result_shape22(4, T(0));
+    std::vector<T> new_result_shape123(6, T(0));
+
+    std::vector<std::vector<T>> result_shape1;
+    std::vector<std::vector<T>> result_shape22;
+    std::vector<std::vector<T>> result_shape123;
+
+    for (size_t i = 0; i < count_runs; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::vector<MemoryTestParams> params{MemoryTestParams(ov::Shape{1},
+                                                          ov::Shape{1},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1},
+                                                          result_shape1,
+                                                          count_runs,
+                                                          {"v0", "v1"}),
+                                         MemoryTestParams(ov::Shape{2, 2},
+                                                          ov::Shape{2, 2},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4},
+                                                          result_shape22,
+                                                          count_runs,
+                                                          {"v0", "v1"}),
+                                         MemoryTestParams(ov::Shape{1, 2, 3},
+                                                          ov::Shape{1, 2, 3},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4, 5, 6},
+                                                          result_shape123,
+                                                          count_runs,
+                                                          {"v0", "v1"})};
+    return params;
+}
+
+std::vector<MemoryTestParams> generateCombinedParamsForReadValueAssignAddMultiVariableLayer() {
+    const std::vector<std::vector<MemoryTestParams>> allTypeParams{
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::f32>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::f16>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::bf16>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::i64>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::i32>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::i16>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::i8>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::u64>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::u32>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::u16>(),
+        ReadValueAssignAddMultiVariableLayer<ov::element::Type_t::u8>()};
+
+    std::vector<MemoryTestParams> combinedParams;
+
+    for (const auto& params : allTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Memory_With_Hardcoded_Refs,
+                         ReferenceReadValueAssignAddMultiVariableLayerTest,
+                         ::testing::ValuesIn(generateCombinedParamsForReadValueAssignAddMultiVariableLayer()),
+                         ReferenceReadValueAssignAddMultiVariableLayerTest::getTestCaseName);
+
+class ReferenceReadValueAssignAddResetLayerTest : public ReferenceMemoryTest {
+protected:
+    std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                              const ov::element::Type& input_type,
+                                              const std::vector<std::string>& variable_id) override {
+        return CreateFunction_ReadValueAssingAdd(input_shape, input_type, variable_id);
+    }
+};
+
+TEST_P(ReferenceReadValueAssignAddResetLayerTest, MemoryResetWithHardcodedRefs) {
+    auto params = GetParam();
+
+    auto reset_var = [&](size_t iter, ov::InferRequest& inferRequest) {
+        if (params.m_reset_on_run == iter) {
+            auto vars = inferRequest.query_state();
+            for (auto& var : vars) {
+                var.reset();
+            }
+        }
+    };
+    CommonTestSteps(reset_var);
+}
+
+template <ov::element::Type_t IN_ET>
+std::vector<MemoryTestParams> generateParamsForReadValueAssignAddReset() {
+    using T = typename ov::element_type_traits<IN_ET>::value_type;
+    size_t count_runs = 10;
+    size_t reset_on_run = 5;
+
+    std::vector<T> first_result_shape1 = {1};
+    std::vector<T> first_result_shape22 = {1, 2, 3, 4};
+    std::vector<T> first_result_shape123 = {1, 2, 3, 4, 5, 6};
+
+    std::vector<T> new_result_shape1(1, T(0));
+    std::vector<T> new_result_shape22(4, T(0));
+    std::vector<T> new_result_shape123(6, T(0));
+
+    std::vector<std::vector<T>> result_shape1;
+    std::vector<std::vector<T>> result_shape22;
+    std::vector<std::vector<T>> result_shape123;
+
+    for (size_t i = 0; i < count_runs - reset_on_run; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    new_result_shape1 = std::vector<T>(1, T(0));
+    new_result_shape22 = std::vector<T>(4, T(0));
+    new_result_shape123 = std::vector<T>(6, T(0));
+
+    for (size_t i = count_runs - reset_on_run; i < count_runs; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::vector<MemoryTestParams> params{MemoryTestParams(ov::Shape{1},
+                                                          ov::Shape{1},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1},
+                                                          result_shape1,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{2, 2},
+                                                          ov::Shape{2, 2},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4},
+                                                          result_shape22,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{1, 2, 3},
+                                                          ov::Shape{1, 2, 3},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4, 5, 6},
+                                                          result_shape123,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run)};
+    return params;
+}
+
+std::vector<MemoryTestParams> generateCombinedParamsForReadValueAssignAddReset() {
+    const std::vector<std::vector<MemoryTestParams>> allTypeParams{
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::f32>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::f16>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::bf16>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::i64>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::i32>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::i16>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::i8>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::u64>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::u32>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::u16>(),
+        generateParamsForReadValueAssignAddReset<ov::element::Type_t::u8>()};
+
+    std::vector<MemoryTestParams> combinedParams;
+
+    for (const auto& params : allTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Memory_With_Hardcoded_Refs,
+                         ReferenceReadValueAssignAddResetLayerTest,
+                         ::testing::ValuesIn(generateCombinedParamsForReadValueAssignAddReset()),
+                         ReferenceReadValueAssignAddResetLayerTest::getTestCaseName);
+
+
+class ReferenceReadValueAssignAddModifyLayerTest : public ReferenceMemoryTest {
+protected:
+    std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                              const ov::element::Type& input_type,
+                                              const std::vector<std::string>& variable_id) override {
+        return CreateFunction_ReadValueAssingAdd(input_shape, input_type, variable_id);
+    }
+};
+
+TEST_P(ReferenceReadValueAssignAddModifyLayerTest, MemoryResetWithHardcodedRefs) {
+    auto params = GetParam();
+
+    auto reset_var = [&](size_t iter, ov::InferRequest& inferRequest) {
+        if (params.m_reset_on_run == iter) {
+            auto vars = inferRequest.query_state();
+            for (auto& var : vars) {
+                var.set_state(params.m_input_data);
+            }
+        }
+    };
+    CommonTestSteps(reset_var);
+}
+
+template <ov::element::Type_t IN_ET>
+std::vector<MemoryTestParams> generateParamsForReadValueAssignAddModify() {
+    using T = typename ov::element_type_traits<IN_ET>::value_type;
+    size_t count_runs = 10;
+    size_t reset_on_run = 5;
+
+    std::vector<T> first_result_shape1 = {1};
+    std::vector<T> first_result_shape22 = {1, 2, 3, 4};
+    std::vector<T> first_result_shape123 = {1, 2, 3, 4, 5, 6};
+
+    std::vector<T> new_result_shape1(1, T(0));
+    std::vector<T> new_result_shape22(4, T(0));
+    std::vector<T> new_result_shape123(6, T(0));
+
+    std::vector<std::vector<T>> result_shape1;
+    std::vector<std::vector<T>> result_shape22;
+    std::vector<std::vector<T>> result_shape123;
+
+    for (size_t i = 0; i < count_runs - reset_on_run; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    new_result_shape1 = std::vector<T>(1, T(0));
+    new_result_shape22 = std::vector<T>(4, T(0));
+    new_result_shape123 = std::vector<T>(6, T(0));
+
+    std::transform(new_result_shape1.begin(),
+                   new_result_shape1.end(),
+                   first_result_shape1.begin(),
+                   new_result_shape1.begin(),
+                   std::plus<T>());
+    std::transform(new_result_shape22.begin(),
+                   new_result_shape22.end(),
+                   first_result_shape22.begin(),
+                   new_result_shape22.begin(),
+                   std::plus<T>());
+    std::transform(new_result_shape123.begin(),
+                   new_result_shape123.end(),
+                   first_result_shape123.begin(),
+                   new_result_shape123.begin(),
+                   std::plus<T>());
+
+    for (size_t i = count_runs - reset_on_run; i < count_runs; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::vector<MemoryTestParams> params{MemoryTestParams(ov::Shape{1},
+                                                          ov::Shape{1},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1},
+                                                          result_shape1,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{2, 2},
+                                                          ov::Shape{2, 2},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4},
+                                                          result_shape22,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{1, 2, 3},
+                                                          ov::Shape{1, 2, 3},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4, 5, 6},
+                                                          result_shape123,
+                                                          count_runs,
+                                                          {"v0"},
+                                                          reset_on_run)};
+    return params;
+}
+
+std::vector<MemoryTestParams> generateCombinedParamsForReadValueAssignAddModify() {
+    const std::vector<std::vector<MemoryTestParams>> allTypeParams{
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::f32>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::f16>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::bf16>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::i64>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::i32>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::i16>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::i8>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::u64>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::u32>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::u16>(),
+        generateParamsForReadValueAssignAddModify<ov::element::Type_t::u8>()};
+
+    std::vector<MemoryTestParams> combinedParams;
+
+    for (const auto& params : allTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Memory_With_Hardcoded_Refs,
+                         ReferenceReadValueAssignAddModifyLayerTest,
+                         ::testing::ValuesIn(generateCombinedParamsForReadValueAssignAddModify()),
+                         ReferenceReadValueAssignAddModifyLayerTest::getTestCaseName);
+
+
+class ReferenceReadValueAssignAddMultiVariableModifyLayerTest : public ReferenceMemoryTest {
+protected:
+    std::shared_ptr<ov::Model> CreateFunction(const ov::Shape& input_shape,
+                                              const ov::element::Type& input_type,
+                                              const std::vector<std::string>& variable_id) override {
+        return CreateFunction_ReadValueAssingAddMultiVariable(input_shape, input_type, variable_id);
+    }
+};
+
+TEST_P(ReferenceReadValueAssignAddMultiVariableModifyLayerTest, MemoryResetWithHardcodedRefs) {
+    auto params = GetParam();
+
+    auto reset_var = [&](size_t iter, ov::InferRequest& inferRequest) {
+        if (params.m_reset_on_run == iter) {
+            auto vars = inferRequest.query_state();
+            vars[1].set_state(params.m_input_data);
+        }
+    };
+    CommonTestSteps(reset_var);
+}
+
+template <ov::element::Type_t IN_ET>
+std::vector<MemoryTestParams> generateParamsForReadValueAssignAddMultiVariableModify() {
+    using T = typename ov::element_type_traits<IN_ET>::value_type;
+    size_t count_runs = 10;
+    size_t reset_on_run = 5;
+
+    std::vector<T> first_result_shape1 = {1};
+    std::vector<T> first_result_shape22 = {1, 2, 3, 4};
+    std::vector<T> first_result_shape123 = {1, 2, 3, 4, 5, 6};
+
+    std::vector<T> new_result_shape1(1, T(0));
+    std::vector<T> new_result_shape22(4, T(0));
+    std::vector<T> new_result_shape123(6, T(0));
+
+    std::vector<std::vector<T>> result_shape1;
+    std::vector<std::vector<T>> result_shape22;
+    std::vector<std::vector<T>> result_shape123;
+
+    for (size_t i = 0; i < count_runs - reset_on_run; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::transform(first_result_shape1.begin(),
+                first_result_shape1.end(),
+                first_result_shape1.begin(),
+                first_result_shape1.begin(),
+                std::plus<T>());
+    std::transform(first_result_shape22.begin(),
+                first_result_shape22.end(),
+                first_result_shape22.begin(),
+                first_result_shape22.begin(),
+                std::plus<T>());
+    std::transform(first_result_shape123.begin(),
+                first_result_shape123.end(),
+                first_result_shape123.begin(),
+                first_result_shape123.begin(),
+                std::plus<T>());
+
+    for (size_t i = count_runs - reset_on_run; i < count_runs; i++) {
+        std::transform(new_result_shape1.begin(),
+                       new_result_shape1.end(),
+                       first_result_shape1.begin(),
+                       new_result_shape1.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape22.begin(),
+                       new_result_shape22.end(),
+                       first_result_shape22.begin(),
+                       new_result_shape22.begin(),
+                       std::plus<T>());
+        std::transform(new_result_shape123.begin(),
+                       new_result_shape123.end(),
+                       first_result_shape123.begin(),
+                       new_result_shape123.begin(),
+                       std::plus<T>());
+        result_shape1.push_back(new_result_shape1);
+        result_shape22.push_back(new_result_shape22);
+        result_shape123.push_back(new_result_shape123);
+    }
+
+    std::vector<MemoryTestParams> params{MemoryTestParams(ov::Shape{1},
+                                                          ov::Shape{1},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1},
+                                                          result_shape1,
+                                                          count_runs,
+                                                          {"v0", "v1"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{2, 2},
+                                                          ov::Shape{2, 2},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4},
+                                                          result_shape22,
+                                                          count_runs,
+                                                          {"v0", "v1"},
+                                                          reset_on_run),
+                                         MemoryTestParams(ov::Shape{1, 2, 3},
+                                                          ov::Shape{1, 2, 3},
+                                                          IN_ET,
+                                                          IN_ET,
+                                                          std::vector<T>{1, 2, 3, 4, 5, 6},
+                                                          result_shape123,
+                                                          count_runs,
+                                                          {"v0", "v1"},
+                                                          reset_on_run)};
+    return params;
+}
+
+std::vector<MemoryTestParams> generateCombinedParamsForReadValueAssignAddMultiVariableModify() {
+    const std::vector<std::vector<MemoryTestParams>> allTypeParams{
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::f32>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::f16>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::bf16>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::i64>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::i32>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::i16>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::i8>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::u64>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::u32>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::u16>(),
+        generateParamsForReadValueAssignAddMultiVariableModify<ov::element::Type_t::u8>()};
+
+    std::vector<MemoryTestParams> combinedParams;
+
+    for (const auto& params : allTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Memory_With_Hardcoded_Refs,
+                         ReferenceReadValueAssignAddMultiVariableModifyLayerTest,
+                         ::testing::ValuesIn(generateCombinedParamsForReadValueAssignAddMultiVariableModify()),
+                         ReferenceReadValueAssignAddMultiVariableModifyLayerTest::getTestCaseName);
+}  // namespace

From 186a1ccdcdb8a1df24468c6a17b3b511eda4ff28 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Sat, 1 Apr 2023 00:49:07 +0400
Subject: [PATCH 202/296] Move interpreter test to template plugin (#16673)

---
 src/core/tests/eval.cpp                        | 18 ------------------
 .../tests/functional/op_reference/range.cpp    |  9 +++++++++
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp
index 00b72d098fce57..5cd17a83fce3f1 100644
--- a/src/core/tests/eval.cpp
+++ b/src/core/tests/eval.cpp
@@ -173,24 +173,6 @@ TEST(eval, evaluate_dynamic_range_sum) {
     ASSERT_EQ(cval, seq);
 }
 
-TEST(eval, interpret_dynamic_range_sum) {
-    auto p_start = make_shared<op::Parameter>(element::f32, PartialShape{});
-    auto p_stop = make_shared<op::Parameter>(element::f32, PartialShape{});
-    auto p_step = make_shared<op::Parameter>(element::f32, PartialShape{});
-    auto p1 = make_shared<op::Parameter>(element::f32, PartialShape{});
-    auto range = make_shared<op::v0::Range>(p_start, p_stop, p_step);
-    auto add = make_shared<op::v1::Add>(range, p1);
-    auto fun = make_shared<Function>(OutputVector{add}, ParameterVector{p_start, p_stop, p_step, p1});
-    auto test_case = test::TestCase(fun);
-    test_case.add_input(std::vector<float>{1.0f});
-    test_case.add_input(std::vector<float>{10.0f});
-    test_case.add_input(std::vector<float>{3.0f});
-    test_case.add_input(std::vector<float>{7.0f});
-    vector<float> seq{8.0f, 11.0f, 14.0f};
-    test_case.add_expected_output({3}, seq);
-    test_case.run();
-}
-
 TEST(eval, evaluate_broadcast_v3_bidirectional) {
     Shape shape_a{4, 1};
     auto A = make_shared<op::Parameter>(element::f32, shape_a);
diff --git a/src/plugins/template/tests/functional/op_reference/range.cpp b/src/plugins/template/tests/functional/op_reference/range.cpp
index 57a5dc77d1a699..392d015480640c 100644
--- a/src/plugins/template/tests/functional/op_reference/range.cpp
+++ b/src/plugins/template/tests/functional/op_reference/range.cpp
@@ -231,6 +231,15 @@ std::vector<RangeParams> generateParamsForRangeV0Float() {
                     0.0f,
                     1.0f,
                     0.25f),
+        RangeParams(ov::Shape{},
+                    ov::Shape{},
+                    IN_ET,
+                    IN_ET,
+                    IN_ET,
+                    std::vector<T>{1.0f, 4.f, 7.f},
+                    1.0f,
+                    10.0f,
+                    3.0f),
         RangeParams(ov::Shape{},
                     ov::Shape{10},
                     IN_ET,

From e978db3132c4296176a83c8e47aaf6a74bb2aa35 Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Sat, 1 Apr 2023 09:46:20 +0200
Subject: [PATCH 203/296] Move new util functions from public api to dev api
 (#16683)

---
 .../push_constant_to_subgraph.cpp             |  8 ++--
 src/core/dev_api/validation_util.hpp          | 42 +++++++++++++++++++
 .../include/openvino/core/validation_util.hpp | 31 --------------
 .../include/slice_shape_inference_utils.hpp   |  9 ++--
 src/core/src/op/concat.cpp                    |  4 +-
 src/core/src/validation_util.cpp              | 13 +++---
 src/core/tests/validation_utils.cpp           | 11 ++---
 7 files changed, 65 insertions(+), 53 deletions(-)
 create mode 100644 src/core/dev_api/validation_util.hpp

diff --git a/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp b/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp
index 21e636992067d5..2289ce74bc73c2 100644
--- a/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp
@@ -4,10 +4,10 @@
 
 #include "transformations/common_optimizations/push_constant_to_subgraph.hpp"
 
-#include <openvino/core/validation_util.hpp>
-#include <openvino/op/util/multi_subgraph_base.hpp>
-
 #include "itt.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/util/multi_subgraph_base.hpp"
+#include "validation_util.hpp"
 
 using MultiSubGraphOp = ov::op::util::MultiSubGraphOp;
 
@@ -21,7 +21,7 @@ static std::shared_ptr<ov::op::v0::Constant> try_constantfold_input(
     const auto input_index = input_desc->m_input_index;
     auto it = cache.find(input_index);
     if (it == cache.end()) {
-        auto constant = constantfold_subgraph(op->input_value(input_index));
+        auto constant = ov::util::constantfold_subgraph(op->input_value(input_index));
         if (constant) {
             cache.insert({input_index, constant});
         }
diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp
new file mode 100644
index 00000000000000..452445a055b8a5
--- /dev/null
+++ b/src/core/dev_api/validation_util.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+
+namespace ov {
+namespace util {
+/// \brief Normalize value to the max if value is negative.
+///
+/// \param value  Input value to normalize.
+/// \param max    Value used for normalization
+///
+/// \return Value if positive otherwise return value + max
+OPENVINO_API int64_t normalize(const int64_t& value, const int64_t& max);
+
+/// \brief Check if values in vector are unique.
+///
+/// \param data  Input data to check.
+///
+/// \return True if unique otherwise false.
+OPENVINO_API bool are_unique(const std::vector<int64_t>& data);
+
+/// \brief Clip value to minimum if below min, or to maximum of above max.
+///
+/// \param value  Value to be clipped.
+/// \param min    Minimum value bound.
+/// \param max    Maximum value boiund
+///
+/// \return Value if between min, max otherwise min or max.
+OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_t& max);
+
+/// \brief Constant folds a subgraph to a constant node
+///
+/// \param subgraph sink
+///
+/// \return Constant node or nullptr if unable to constantfold the subgraph
+OPENVINO_API std::shared_ptr<op::v0::Constant> constantfold_subgraph(const Output<Node>& subgraph_sink);
+}  // namespace util
+}  // namespace ov
diff --git a/src/core/include/openvino/core/validation_util.hpp b/src/core/include/openvino/core/validation_util.hpp
index 3b88a51a2f5d7a..2d6cb08e98ec51 100644
--- a/src/core/include/openvino/core/validation_util.hpp
+++ b/src/core/include/openvino/core/validation_util.hpp
@@ -30,15 +30,6 @@ void infer_auto_padding(const Shape& image_shape,
                         CoordinateDiff& padding_above,
                         CoordinateDiff& padding_below);
 
-/// \brief Normalize value to the max if value is negative.
-///
-/// \param value  Input value to normalize.
-/// \param max    Value used for normalization
-///
-/// \return Value if positive otherwise return value + max
-OPENVINO_API
-int64_t normalize(const int64_t& value, const int64_t& max);
-
 /// \brief      Handle out of range axis.
 ///
 /// \param[in]  node         The node with requested axis.
@@ -182,26 +173,4 @@ OPENVINO_API std::vector<PartialShape> get_node_input_partial_shapes(const ov::N
 /// \return True if rank compatible to any from ranks, otherwise false.
 OPENVINO_API bool is_rank_compatible_any_of(const ov::Rank& rank, const std::vector<ov::Rank>& ranks);
 
-/// \brief Check if values in vector are unique.
-///
-/// \param data  Input data to check.
-///
-/// \return True if unique otherwise false.
-OPENVINO_API bool are_unique(const std::vector<int64_t>& data);
-
-/// \brief Clip value to minimum if below min, or to maximum of above max.
-///
-/// \param value  Value to be clipped.
-/// \param min    Minimum value bound.
-/// \param max    Maximum value boiund
-///
-/// \return Value if between min, max otherwise min or max.
-OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_t& max);
-
-/// \brief Constant folds a subgraph to a constant node
-///
-/// \param subgraph sink
-///
-/// \return Constant node or nullptr if unable to constantfold the subgraph
-OPENVINO_API std::shared_ptr<op::v0::Constant> constantfold_subgraph(const Output<Node>& subgraph_sink);
 }  // namespace ov
diff --git a/src/core/shape_inference/include/slice_shape_inference_utils.hpp b/src/core/shape_inference/include/slice_shape_inference_utils.hpp
index 9b33900692b2e1..e76718714c831b 100644
--- a/src/core/shape_inference/include/slice_shape_inference_utils.hpp
+++ b/src/core/shape_inference/include/slice_shape_inference_utils.hpp
@@ -4,11 +4,10 @@
 
 #pragma once
 
-#include <ngraph/validation_util.hpp>
-#include <openvino/op/constant.hpp>
-
+#include "openvino/op/constant.hpp"
 #include "sequnce_generator.hpp"
 #include "utils.hpp"
+#include "validation_util.hpp"
 
 namespace ov {
 namespace internal {
@@ -161,8 +160,8 @@ inline int64_t get_sliced_value(const int64_t& dim, const int64_t& start, const
         }
         lb = min_bound;
     } else {
-        lb = clip(normalize(start, norm_dim), min_bound, lower_max);
-        ub = clip(normalize(stop, norm_dim), upper_min, norm_dim);
+        lb = ov::util::clip(ov::util::normalize(start, norm_dim), min_bound, lower_max);
+        ub = ov::util::clip(ov::util::normalize(stop, norm_dim), upper_min, norm_dim);
     }
 
     // Calculate sliced value from bounds and step.
diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp
index 6f5cc09adb1ec4..6334a1d8c8c964 100644
--- a/src/core/src/op/concat.cpp
+++ b/src/core/src/op/concat.cpp
@@ -12,7 +12,7 @@
 #include "itt.hpp"
 #include "ngraph/attribute_visitor.hpp"
 #include "ngraph/runtime/reference/concat.hpp"
-#include "ngraph/validation_util.hpp"
+#include "validation_util.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -115,7 +115,7 @@ bool op::Concat::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inp
     OPENVINO_ASSERT(!inputs.empty());
     OPENVINO_ASSERT(outputs.size() == 1);
 
-    auto concat_axis = ov::normalize(get_axis(), inputs.front().get_shape().size());
+    auto concat_axis = ov::util::normalize(get_axis(), inputs.front().get_shape().size());
 
     std::vector<const char*> arg_bufs;
     std::vector<ov::Shape> arg_shapes;
diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp
index 34d1c616910b19..0559275e632b86 100644
--- a/src/core/src/validation_util.cpp
+++ b/src/core/src/validation_util.cpp
@@ -27,6 +27,7 @@
 #include "ngraph/util.hpp"
 #include "openvino/op/ops.hpp"
 #include "sequnce_generator.hpp"
+#include "validation_util.hpp"
 
 NGRAPH_SUPPRESS_DEPRECATED_START
 using namespace std;
@@ -872,7 +873,7 @@ std::string normalize_axis_error_msg(const int64_t& axis, const int64_t& lower,
 }
 }  // namespace
 
-int64_t ov::normalize(const int64_t& value, const int64_t& max) {
+int64_t ov::util::normalize(const int64_t& value, const int64_t& max) {
     return (value < 0) ? value + max : value;
 };
 
@@ -937,7 +938,7 @@ int64_t ov::normalize_axis(const std::string& node_description,
     OPENVINO_ASSERT((axis_range_min <= axis) && (axis <= axis_range_max),
                     node_description,
                     normalize_axis_error_msg(axis, axis_range_min, axis_range_max));
-    return normalize(axis, tensor_rank);
+    return util::normalize(axis, tensor_rank);
 }
 
 void ngraph::opset1::infer_conv_backprop_auto_padding(const Shape& input_data_shape,
@@ -1312,7 +1313,7 @@ void ov::generate_transpose_default_order(std::vector<int64_t>& axes_order, cons
 }
 
 bool ov::is_valid_axes_order(const std::vector<int64_t>& axes_order, const size_t size) {
-    return are_unique(axes_order) &&
+    return util::are_unique(axes_order) &&
            std::all_of(axes_order.cbegin(), axes_order.cend(), ov::cmp::Between<int64_t, ov::cmp::LOWER>(0, size));
 }
 
@@ -1331,16 +1332,16 @@ bool ov::is_rank_compatible_any_of(const ov::Rank& rank, const std::vector<Rank>
     });
 }
 
-bool ov::are_unique(const std::vector<int64_t>& data) {
+bool ov::util::are_unique(const std::vector<int64_t>& data) {
     return std::unordered_set<int64_t>(data.begin(), data.cend()).size() == data.size();
 }
 
 // clip value to min, max
-int64_t ov::clip(const int64_t& value, const int64_t& min, const int64_t& max) {
+int64_t ov::util::clip(const int64_t& value, const int64_t& min, const int64_t& max) {
     return std::min(std::max(value, min), max);
 };
 
-std::shared_ptr<op::v0::Constant> ov::constantfold_subgraph(const Output<Node>& subgraph_sink) {
+std::shared_ptr<op::v0::Constant> ov::util::constantfold_subgraph(const Output<Node>& subgraph_sink) {
     if (const auto& c = ov::as_type_ptr<op::v0::Constant>(subgraph_sink.get_node_shared_ptr()))
         return c;
 
diff --git a/src/core/tests/validation_utils.cpp b/src/core/tests/validation_utils.cpp
index ca5c0d66f3bd6f..1168911bcfaaed 100644
--- a/src/core/tests/validation_utils.cpp
+++ b/src/core/tests/validation_utils.cpp
@@ -4,10 +4,11 @@
 
 #include <gtest/gtest.h>
 
-#include <openvino/core/type.hpp>
-#include <openvino/core/validation_util.hpp>
-#include <openvino/opsets/opset8.hpp>
-#include <openvino/util/common_util.hpp>
+#include "openvino/core/type.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "openvino/opsets/opset8.hpp"
+#include "openvino/util/common_util.hpp"
+#include "validation_util.hpp"
 
 TEST(get_constant_from_source, invalidation_check) {
     auto a = ov::opset8::Constant::create(ov::element::i64, {100}, {123});
@@ -61,7 +62,7 @@ TEST(constantfold_subgraph, split) {
     auto axis = ov::opset8::Constant::create(ov::element::i64, ov::Shape{}, {0});
     auto split = std::make_shared<ov::opset8::VariadicSplit>(mul, axis, lenghts);
     std::vector<float> expected(std::next(input.begin(), input.size() / 2), input.end());
-    auto ret = ov::constantfold_subgraph(split->output(1));
+    auto ret = ov::util::constantfold_subgraph(split->output(1));
     ASSERT_NE(ret, nullptr);
     auto actual = ret->cast_vector<float>();
     ASSERT_EQ(expected, actual);

From 99eda5b5e155101fb56b2cc8ebe460ac264a3ecb Mon Sep 17 00:00:00 2001
From: Wang Wangwang <wangwang.wang@intel.com>
Date: Mon, 3 Apr 2023 11:56:48 +0800
Subject: [PATCH 204/296] =?UTF-8?q?[PYTHON][CAPI][AUTO]=20Add=20ENABLE=5FS?=
 =?UTF-8?q?TARTUP=5FFALLBACK=20and=20ENABLE=5FRUNTIME=5FFALLBACK=20proper?=
 =?UTF-8?q?=E2=80=A6=20(#16436)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [AUTO] Add ENABLE_STARTUP_FALLBACK and ENABLE_RUNTIME_FALLBACK properties to Python API

* Add DEVICE_BIND_BUFFER property

* Add AUTO properties to C API

* Update test case && Update AUTO properties in PYTHON API

* Create dedicated files for auto plugin

* Update header files

* Update test case

* Modify code style

* Update variable name

* Add test case for invalid input value
---
 .../c/include/openvino/c/auto/properties.h    | 34 ++++++++++++
 .../openvino/c/gpu/gpu_plugin_properties.h    |  2 +-
 src/bindings/c/include/openvino/c/openvino.h  |  1 +
 src/bindings/c/src/ov_auto_property.cpp       | 10 ++++
 .../c/tests/ov_auto_property_test.cpp         | 53 +++++++++++++++++++
 .../pyopenvino/core/properties/properties.cpp |  9 ++++
 .../pyopenvino/core/properties/properties.hpp |  1 +
 .../tests/test_runtime/test_properties.py     | 30 +++++++++++
 8 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 src/bindings/c/include/openvino/c/auto/properties.h
 create mode 100644 src/bindings/c/src/ov_auto_property.cpp
 create mode 100644 src/bindings/c/tests/ov_auto_property_test.cpp

diff --git a/src/bindings/c/include/openvino/c/auto/properties.h b/src/bindings/c/include/openvino/c/auto/properties.h
new file mode 100644
index 00000000000000..87d920cc131c90
--- /dev/null
+++ b/src/bindings/c/include/openvino/c/auto/properties.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief This is a specified header file for auto plugin's properties
+ *
+ * @file properties.h
+ */
+
+#pragma once
+#include "openvino/c/ov_common.h"
+
+/**
+ * @brief Read-write property<string> for setting that enables performance improvement by binding
+ * buffer to hw infer request
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_intel_auto_device_bind_buffer;
+
+/**
+ * @brief Read-write property<string> to enable/disable CPU as accelerator (or helper device) at the beginning
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_intel_auto_enable_startup_fallback;
+
+/**
+ * @brief Read-write property<string> to enable/disable runtime fallback to other devices when infer fails
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_intel_auto_enable_runtime_fallback;
\ No newline at end of file
diff --git a/src/bindings/c/include/openvino/c/gpu/gpu_plugin_properties.h b/src/bindings/c/include/openvino/c/gpu/gpu_plugin_properties.h
index 9d3aa162ac12be..7ec3b46731b922 100644
--- a/src/bindings/c/include/openvino/c/gpu/gpu_plugin_properties.h
+++ b/src/bindings/c/include/openvino/c/gpu/gpu_plugin_properties.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/src/bindings/c/include/openvino/c/openvino.h b/src/bindings/c/include/openvino/c/openvino.h
index 7ffcb79da6390e..f49a5049f7ed91 100644
--- a/src/bindings/c/include/openvino/c/openvino.h
+++ b/src/bindings/c/include/openvino/c/openvino.h
@@ -15,6 +15,7 @@
  **/
 #pragma once
 
+#include "openvino/c/auto/properties.h"
 #include "openvino/c/ov_common.h"
 #include "openvino/c/ov_compiled_model.h"
 #include "openvino/c/ov_core.h"
diff --git a/src/bindings/c/src/ov_auto_property.cpp b/src/bindings/c/src/ov_auto_property.cpp
new file mode 100644
index 00000000000000..c8900e87c7a437
--- /dev/null
+++ b/src/bindings/c/src/ov_auto_property.cpp
@@ -0,0 +1,10 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/c/auto/properties.h"
+
+// Read-write property key
+const char* ov_property_key_intel_auto_device_bind_buffer = "DEVICE_BIND_BUFFER";
+const char* ov_property_key_intel_auto_enable_startup_fallback = "ENABLE_STARTUP_FALLBACK";
+const char* ov_property_key_intel_auto_enable_runtime_fallback = "ENABLE_RUNTIME_FALLBACK";
diff --git a/src/bindings/c/tests/ov_auto_property_test.cpp b/src/bindings/c/tests/ov_auto_property_test.cpp
new file mode 100644
index 00000000000000..6750092d845c93
--- /dev/null
+++ b/src/bindings/c/tests/ov_auto_property_test.cpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_test.hpp"
+
+using test_params = std::tuple<std::string, const char*, const char*, bool>;
+
+class ov_auto_plugin_test : public ::testing::TestWithParam<test_params> {
+public:
+    std::string device_name;
+    const char* auto_property;
+    const char* property_value;
+    bool invalid_value;
+
+public:
+    void SetUp() override {
+        std::tie(device_name, auto_property, property_value, invalid_value) = GetParam();
+    }
+};
+
+TEST_P(ov_auto_plugin_test, ov_core_auto_set_and_get_property_bool) {
+    ov_core_t* core = nullptr;
+    OV_EXPECT_OK(ov_core_create(&core));
+    EXPECT_NE(nullptr, core);
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), auto_property, property_value));
+    char* ret = nullptr;
+    if (invalid_value) {
+        OV_EXPECT_NOT_OK(ov_core_get_property(core, device_name.c_str(), auto_property, &ret));
+        EXPECT_STRNE(property_value, ret);
+    } else {
+        OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), auto_property, &ret));
+        EXPECT_STREQ(property_value, ret);
+    }
+    ov_free(ret);
+    ov_core_free(core);
+}
+
+const std::vector<test_params> test_property_config = {
+    test_params{"AUTO", ov_property_key_intel_auto_device_bind_buffer, "YES", false},
+    test_params{"AUTO", ov_property_key_intel_auto_device_bind_buffer, "NO", false},
+    test_params{"AUTO", ov_property_key_intel_auto_device_bind_buffer, "TEST", true},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_startup_fallback, "YES", false},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_startup_fallback, "NO", false},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_startup_fallback, "TEST", true},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_runtime_fallback, "YES", false},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_runtime_fallback, "NO", false},
+    test_params{"AUTO", ov_property_key_intel_auto_enable_runtime_fallback, "TEST", true},
+};
+
+INSTANTIATE_TEST_SUITE_P(ov_auto_plugin_test_properties,
+                         ov_auto_plugin_test,
+                         ::testing::ValuesIn(test_property_config));
\ No newline at end of file
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 105695e5bc911e..d5002d382cf571 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -271,4 +271,13 @@ void regmodule_properties(py::module m) {
     m_streams.def("num", [](const int32_t value) {
         return ov::streams::num(ov::streams::Num(value));
     });
+
+    // Submodule auto
+    py::module m_intel_auto =
+        m_properties.def_submodule("intel_auto",
+                                   "openvino.runtime.properties.intel_auto submodule that simulates ov::intel_auto");
+
+    wrap_property_RW(m_intel_auto, ov::intel_auto::device_bind_buffer, "device_bind_buffer");
+    wrap_property_RW(m_intel_auto, ov::intel_auto::enable_startup_fallback, "enable_startup_fallback");
+    wrap_property_RW(m_intel_auto, ov::intel_auto::enable_runtime_fallback, "enable_runtime_fallback");
 }
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.hpp b/src/bindings/python/src/pyopenvino/core/properties/properties.hpp
index 7625e58e90e209..7437ba8073f019 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.hpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.hpp
@@ -10,6 +10,7 @@
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
+#include "openvino/runtime/auto/properties.hpp"
 #include "pyopenvino/core/properties/properties.hpp"
 
 namespace py = pybind11;
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index f2a905d38ba622..cc8af6d5429837 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -268,6 +268,36 @@ def test_properties_ro(ov_property_ro, expected_value):
                 (2.0, 2.0),
             ),
         ),
+        (
+            properties.intel_auto.device_bind_buffer,
+            "DEVICE_BIND_BUFFER",
+            (
+                (True, True),
+                (False, False),
+                (1, True),
+                (0, False),
+            ),
+        ),
+        (
+            properties.intel_auto.enable_startup_fallback,
+            "ENABLE_STARTUP_FALLBACK",
+            (
+                (True, True),
+                (False, False),
+                (1, True),
+                (0, False),
+            ),
+        ),
+        (
+            properties.intel_auto.enable_runtime_fallback,
+            "ENABLE_RUNTIME_FALLBACK",
+            (
+                (True, True),
+                (False, False),
+                (1, True),
+                (0, False),
+            ),
+        ),
         (properties.device.id, "DEVICE_ID", (("0", "0"),)),
         (
             properties.log.level,

From d7f70b647bc12f1ae497bec9e7af5b13feed187c Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Mon, 3 Apr 2023 07:48:57 +0200
Subject: [PATCH 205/296] [DOCS] shift to rst -install guides aptyumdocker
 (#16680)

---
 .../install_guides/installing-openvino-apt.md | 131 ++++----
 .../installing-openvino-docker-linux.md       | 291 ++++++++++--------
 .../install_guides/installing-openvino-yum.md | 113 ++++---
 3 files changed, 296 insertions(+), 239 deletions(-)

diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md
index eb47d2aa236848..aae1f0afc10aab 100644
--- a/docs/install_guides/installing-openvino-apt.md
+++ b/docs/install_guides/installing-openvino-apt.md
@@ -1,57 +1,60 @@
 # Install Intel® Distribution of OpenVINO™ Toolkit for Linux Using APT Repository {#openvino_docs_install_guides_installing_openvino_apt}
 
+
+
 @sphinxdirective
 
-With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on Linux using the APT repository. OpenVINO™ Development Tools can be installed via PyPI only. See :ref:`Installing Additional Components <intall additional components apt>` for more information. 
+With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on Linux using the APT repository. 
+OpenVINO™ Development Tools can be installed via PyPI only.
+See `Installing Additional Components <step-3-optional-install-additional-components>`__ for more information. 
 
-See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`_ for more information on updates in the latest release.
+See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`__ 
+for more information on updates in the latest release.
 
-Installing OpenVINO Runtime from APT is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions on how to install OpenVINO Runtime for Python using PyPI.
+Installing OpenVINO Runtime from APT is recommended for C++ developers. If you are working with Python, 
+the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the 
+:doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions 
+on how to install OpenVINO Runtime for Python using PyPI.
 
 .. warning:: 
 
-   By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`_.
+   By downloading and using this container and the included software, you agree to the terms and 
+   conditions of the 
+   `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`__.
 
-@endsphinxdirective
 
+Prerequisites
+#######################################
 
-## Prerequisites
-
-@sphinxdirective
 
 .. tab:: System Requirements
 
    | Full requirement listing is available in:
-   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
+   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`__
 
 .. tab:: Processor Notes
 
   Processor graphics are not included in all processors.
-  See `Product Specifications`_ for information about your processor.
-
-  .. _Product Specifications: https://ark.intel.com/
+  See `Product Specifications <https://ark.intel.com/>`__ for information about your processor.
 
 .. tab:: Software Requirements
 
-  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`_
+  * `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`__
   * GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04)
-  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
-
-
-.. _install runtime apt:
+  * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`__
 
-@endsphinxdirective
 
+Installing OpenVINO Runtime
+#######################################
 
-## Installing OpenVINO Runtime
+Step 1: Set Up the OpenVINO Toolkit APT Repository
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### Step 1: Set Up the OpenVINO Toolkit APT Repository
 
-@sphinxdirective
 
-#. Install the GPG key for the repository
+1. Install the GPG key for the repository
 
-   a. Download the `GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB <https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB>`_
+   a. Download the `GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB <https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB>`__
 
       You can also use the following command:
 
@@ -73,7 +76,7 @@ Installing OpenVINO Runtime from APT is recommended for C++ developers. If you a
 
             sudo apt-get install gnupg
 
-#. Add the repository via the following command:
+2. Add the repository via the following command:
 
    .. tab:: Ubuntu 18
 
@@ -88,27 +91,26 @@ Installing OpenVINO Runtime from APT is recommended for C++ developers. If you a
          echo "deb https://apt.repos.intel.com/openvino/2022 focal main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list
 
 
-#. Update the list of packages via the update command:
+3. Update the list of packages via the update command:
 
    .. code-block:: sh
 
       sudo apt update
 
 
-#. Verify that the APT repository is properly set up. Run the apt-cache command to see a list of all available OpenVINO packages and components:
+4. Verify that the APT repository is properly set up. Run the apt-cache command to see a list of all available OpenVINO packages and components:
 
    .. code-block:: sh
 
       apt-cache search openvino
 
-@endsphinxdirective
-
 
-### Step 2: Install OpenVINO Runtime Using the APT Package Manager
 
-#### Install OpenVINO Runtime
+Step 2: Install OpenVINO Runtime Using the APT Package Manager
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-@sphinxdirective
+Install OpenVINO Runtime
+------------------------
 
 .. tab:: The Latest Version
 
@@ -141,13 +143,12 @@ Installing OpenVINO Runtime from APT is recommended for C++ developers. If you a
 
 .. note::
 
-   You can use ``--no-install-recommends`` option to install only required packages. Keep in mind that the build tools must be installed **separately** if you want to compile the samples.
+   You can use ``--no-install-recommends`` option to install only required packages. 
+   Keep in mind that the build tools must be installed **separately** if you want to compile the samples.
 
-@endsphinxdirective
 
-#### Check for Installed Packages and Versions
-
-@sphinxdirective
+Check for Installed Packages and Versions
+-----------------------------------------
 
 Run the following command:
 
@@ -155,32 +156,26 @@ Run the following command:
 
    apt list --installed | grep openvino
 
-.. _intall additional components apt:
-
-@endsphinxdirective
 
+Step 3 (Optional): Install Additional Components
+++++++++++++++++++++++++++++++++++++++++++++++++
 
-### Step 3 (Optional): Install Additional Components
-
-@sphinxdirective
-
-OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you installed OpenVINO Runtime using APT, OpenVINO Development Tools must be installed separately.
+OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. 
+It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. 
+If you installed OpenVINO Runtime using APT, OpenVINO Development Tools must be installed separately.
 
 See the **For C++ Developers** section on the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for instructions.
 
-@endsphinxdirective
 
-### Step 4 (Optional): Configure Inference on Non-CPU Devices
-
-@sphinxdirective
+Step 4 (Optional): Configure Inference on Non-CPU Devices
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :doc:`GPU Setup Guide <openvino_docs_install_guides_configurations_for_intel_gpu>`.
+To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps 
+in :doc:`GPU Setup Guide <openvino_docs_install_guides_configurations_for_intel_gpu>`.
 
-@endsphinxdirective
 
-### Step 5: Build Samples
-
-@sphinxdirective
+Step 5: Build Samples
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 To build the C++ or C sample applications for Linux, run the ``build_samples.sh`` script:
 
@@ -198,11 +193,10 @@ To build the C++ or C sample applications for Linux, run the ``build_samples.sh`
 
 For more information, refer to :ref:`Build the Sample Applications on Linux <build-samples-linux>`.
 
-@endsphinxdirective
 
-### Uninstalling OpenVINO Runtime
 
-@sphinxdirective
+Uninstalling OpenVINO Runtime
+#######################################
 
 To uninstall OpenVINO Runtime via APT, run the following command based on your needs:
 
@@ -224,16 +218,15 @@ To uninstall OpenVINO Runtime via APT, run the following command based on your n
 
       sudo apt autoremove openvino-2022.3.0
 
-@endsphinxdirective
 
+What's Next?
+#######################################
 
-## What's Next?
+Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! 
+Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials:
 
-@sphinxdirective
-
-Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials:
-
-* Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
+* Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step 
+  instructions on building and running a basic image classification C++ application.
 
   .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
      :width: 400
@@ -243,7 +236,7 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine
    * `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
    * `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
 
-You can also try the following things:
+You can also try the following:
 
 * Learn more about :doc:`OpenVINO Workflow <openvino_workflow>`.
 * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <openvino_docs_model_processing_introduction>`.
@@ -252,8 +245,12 @@ You can also try the following things:
 * See sample applications in :doc:`OpenVINO toolkit Samples Overview <openvino_docs_OV_UG_Samples_Overview>`.
 * Take a glance at the OpenVINO product home page: https://software.intel.com/en-us/openvino-toolkit.
 
-@endsphinxdirective
 
-## Additional Resources
+Additional Resources
+#######################################
+
+- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
+
+
 
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
+@endsphinxdirective
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md
index 745b5db27b8a0a..828977d3b15b34 100644
--- a/docs/install_guides/installing-openvino-docker-linux.md
+++ b/docs/install_guides/installing-openvino-docker-linux.md
@@ -1,178 +1,227 @@
 # Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image {#openvino_docs_install_guides_installing_openvino_docker_linux}
 
+
+@sphinxdirective
+
 This guide provides steps on creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Linux and using the image on different devices. 
 
-## <a name="system-requirements-docker-linux"></a>System Requirements
+System Requirements
+###################
 
-@sphinxdirective
 .. tab:: Target Operating Systems with Python Versions
   
-  +----------------------------------------------+-------------------------+
-  | Operating System                             | Included Python Version |
-  +==============================================+=========================+
-  | Ubuntu 18.04 long-term support (LTS), 64-bit |  3.8                    |
-  +----------------------------------------------+-------------------------+
-  | Ubuntu 20.04 long-term support (LTS), 64-bit |  3.8                    |
-  +----------------------------------------------+-------------------------+
-  | Red Hat Enterprise Linux 8, 64-bit           |  3.8                    |
-  +----------------------------------------------+-------------------------+
+   +----------------------------------------------+-------------------------+
+   | Operating System                             | Included Python Version |
+   +==============================================+=========================+
+   | Ubuntu 18.04 long-term support (LTS), 64-bit |  3.8                    |
+   +----------------------------------------------+-------------------------+
+   | Ubuntu 20.04 long-term support (LTS), 64-bit |  3.8                    |
+   +----------------------------------------------+-------------------------+
+   | Red Hat Enterprise Linux 8, 64-bit           |  3.8                    |
+   +----------------------------------------------+-------------------------+
 
 .. tab:: Host Operating Systems
 
-  * Linux
-  * Windows Subsystem for Linux 2 (WSL2) on CPU or GPU
-  * macOS on CPU only
-  
-  To launch a Linux image on WSL2 when trying to run inferences on a GPU, make sure that the following requirements are met:
+   * Linux
+   * Windows Subsystem for Linux 2 (WSL2) on CPU or GPU
+   * macOS on CPU only
+   
+   To launch a Linux image on WSL2 when trying to run inferences on a GPU, make sure that the following requirements are met:
+ 
+   * Only Windows 10 with 21H2 update or above installed and Windows 11 are supported.
+   * Intel GPU driver on Windows host with version 30.0.100.9684 or above need be installed. For more details, 
+     `this article at intel.com <https://www.intel.com/content/www/us/en/artificial-intelligence/harness-the-power-of-intel-igpu-on-your-machine.html#articleparagraph_983312434>`__ .
+   * From 2022.1 release, the Docker images contain preinstalled recommended version of OpenCL Runtime with WSL2 support.
 
-  - Only Windows 10 with 21H2 update or above installed and Windows 11 are supported.
-  - Intel GPU driver on Windows host with version 30.0.100.9684 or above need be installed. Please see `this article`_ for more details.
-  - From 2022.1 release, the Docker images contain preinstalled recommended version of OpenCL Runtime with WSL2 support.
-  
-  .. _this article: https://www.intel.com/content/www/us/en/artificial-intelligence/harness-the-power-of-intel-igpu-on-your-machine.html#articleparagraph_983312434
 
-@endsphinxdirective
+Installation
+#############
 
-## Installation Flow
+* Use a prebuilt image:
+  
+  1. `Get a prebuilt image from provided sources <getting-a-prebuilt-image-from-provided-sources>`__
+  2. `Run the image on different devices <running-the-docker-image-on-different-devices>`__
+  3. `Run samples in the Docker image <running-samples-in-docker-image>`__
+
+* If you want to customize your image, you can also build a Docker image manually:
+  
+  1. `Prepare a Dockerfile <preparing-a-dockerfile>`__
+  2. `Configure the Docker image <configuring-the-image-for-different-devices>`__
+  3. `Run the image on different devices <running-the-docker-image-on-different-devices>`__
+  4. `Run samples in the Docker image <running-samples-in-docker-image>`__
 
-There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs:
-* Use a prebuilt image. Do the following steps:
-  1. <a href="#get-prebuilt-image-docker-linux">Get a prebuilt image from provided sources</a>.
-  2. <a href="#run-image-docker-linux">Run the image on different devices</a>.
-  3. <a href="#run-samples-docker-linux">(Optional) Run samples in the Docker image</a>.
-* If you want to customize your image, you can also build a Docker image manually by using the following steps:
-  1. <a href="#prepare-dockerfile-linux">Prepare a Dockerfile</a>.
-  2. <a href="#configure-image-docker-linux">Configure the Docker image</a>.
-  3. <a href="#run-image-docker-linux">Run the image on different devices</a>.
-  4. <a href="#run-samples-docker-linux">(Optional) Run samples in the Docker image</a>.
 
-## <a name="get-prebuilt-image-docker-linux"></a>Getting a Prebuilt Image from Provided Sources
+Getting a Prebuilt Image from Provided Sources
+++++++++++++++++++++++++++++++++++++++++++++++
 
 You can find prebuilt images on:
 
-- [Docker Hub](https://hub.docker.com/u/openvino)
-- [Red Hat Quay.io](https://quay.io/organization/openvino)
-- [Red Hat Ecosystem Catalog (runtime image)](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3)
-- [Red Hat Ecosystem Catalog (development image)](https://catalog.redhat.com/software/containers/intel/openvino-dev/613a450dc9bc35f21dc4a1f7)
-- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino)
+- `Docker Hub <https://hub.docker.com/u/openvino>`__
+- `Red Hat Quay.io <https://quay.io/organization/openvino>`__
+- `Red Hat Ecosystem Catalog (runtime image) <https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3>`__
+- `Red Hat Ecosystem Catalog (development image) <https://catalog.redhat.com/software/containers/intel/openvino-dev/613a450dc9bc35f21dc4a1f7>`__
+- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
 
-## <a name="prepare-dockerfile-linux"></a>Preparing a Dockerfile
+Preparing a Dockerfile
+++++++++++++++++++++++
 
-You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
-You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with OpenVINO. 
+You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__
+or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__
+which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
+You can also try our `Tutorials <https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials>`__ 
+which demonstrate the usage of Docker containers with OpenVINO. 
 
-## <a name="configure-image-docker-linux"></a>Configuring the Image for Different Devices
+Configuring the Image for Different Devices
++++++++++++++++++++++++++++++++++++++++++++
 
-If you want to run inferences on a CPU no extra configuration is needed. Go to <a href="#run-image-docker-linux">Running the image on different devices</a> for the next step.
+If you want to run inference on a CPU no extra configuration is needed. 
+Go to `Run the image on different devices <running-the-docker-image-on-different-devices>`__ for the next step.
 
-### Configuring Docker Image for GPU
+Configuring Docker Image for GPU
+--------------------------------
 
-By default, the distributed Docker image for OpenVINO has the recommended version of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver for the operating system installed inside. If you want to build an image with a custom version of OpenCL Runtime included, you need to modify the Dockerfile using the lines below (the 19.41.14441 version is used as an example) and build the image manually:
+By default, the distributed Docker image for OpenVINO has the recommended version of 
+Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver for the operating 
+system installed inside. If you want to build an image with a custom version of OpenCL Runtime included, 
+you need to modify the Dockerfile using the lines below (the 19.41.14441 version is used as an example) and build the image manually:
 
 **Ubuntu 18.04/20.04**:
 
-```sh
-WORKDIR /tmp/opencl
-RUN useradd -ms /bin/bash -G video,users openvino && \
-    chown openvino -R /home/openvino
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends ocl-icd-libopencl1 && \
-    rm -rf /var/lib/apt/lists/* && \
-    curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-gmmlib_19.3.2_amd64.deb" --output "intel-gmmlib_19.3.2_amd64.deb" && \
-    curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-core_1.0.2597_amd64.deb" --output "intel-igc-core_1.0.2597_amd64.deb" && \
-    curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-opencl_1.0.2597_amd64.deb" --output "intel-igc-opencl_1.0.2597_amd64.deb" && \
-    curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-opencl_19.41.14441_amd64.deb" --output "intel-opencl_19.41.14441_amd64.deb" && \
-    curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-ocloc_19.41.14441_amd64.deb" --output "intel-ocloc_19.04.12237_amd64.deb" && \
-    dpkg -i /tmp/opencl/*.deb && \
-    ldconfig && \
-    rm /tmp/opencl
-```
+.. code-block:: sh
+
+   WORKDIR /tmp/opencl
+   RUN useradd -ms /bin/bash -G video,users openvino && \
+       chown openvino -R /home/openvino
+   
+   RUN apt-get update && \
+       apt-get install -y --no-install-recommends ocl-icd-libopencl1 && \
+       rm -rf /var/lib/apt/lists/* && \
+       curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-gmmlib_19.3.2_amd64.deb" --output "intel-gmmlib_19.3.2_amd64.deb" && \
+       curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-core_1.0.2597_amd64.deb" --output "intel-igc-core_1.0.2597_amd64.deb" && \
+       curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-opencl_1.0.2597_amd64.deb" --output "intel-igc-opencl_1.0.2597_amd64.deb" && \
+       curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-opencl_19.41.14441_amd64.deb" --output "intel-opencl_19.41.14441_amd64.deb" && \
+       curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-ocloc_19.41.14441_amd64.deb" --output "intel-ocloc_19.04.12237_amd64.deb" && \
+       dpkg -i /tmp/opencl/*.deb && \
+       ldconfig && \
+       rm /tmp/opencl
+   
 
 **RHEL 8**:
 
-```sh
-WORKDIR /tmp/opencl
-RUN useradd -ms /bin/bash -G video,users openvino && \
-    chown openvino -R /home/openvino
-RUN groupmod -g 44 video
-
-RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && \
-    yum update -y && yum install -y ocl-icd ocl-icd-devel && \
-    yum clean all && rm -rf /var/cache/yum && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-19.3.2-1.el7.x86_64.rpm && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-core-1.0.2597-1.el7.x86_64.rpm/download -o intel-igc-core-1.0.2597-1.el7.x86_64.rpm && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-opencl-1.0.2597-1.el7.x86_64.rpm/download -o intel-igc-opencl-1.0.2597-1.el7.x86_64.rpm && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-opencl-devel-1.0.2597-1.el7.x86_64.rpm/download -o  intel-igc-opencl-devel-1.0.2597-1.el7.x86_64.rpm && \
-    curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-opencl-19.41.14441-1.el7.x86_64.rpm/download -o intel-opencl-19.41.14441-1.el7.x86_64.rpm \
-    rpm -ivh ${TEMP_DIR}/*.rpm && \
-    ldconfig && \
-    rm -rf ${TEMP_DIR} && \
-    yum remove -y epel-release
-```
-
-## <a name="run-image-docker-linux"></a>Running the Docker Image on Different Devices
-
-### Running the Image on CPU
+.. code-block:: sh
+
+   WORKDIR /tmp/opencl
+   RUN useradd -ms /bin/bash -G video,users openvino && \
+       chown openvino -R /home/openvino
+   RUN groupmod -g 44 video
+   
+   RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && \
+       yum update -y && yum install -y ocl-icd ocl-icd-devel && \
+       yum clean all && rm -rf /var/cache/yum && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-19.3.2-1.el7.x86_64.rpm && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-core-1.0.2597-1.el7.x86_64.rpm/download -o intel-igc-core-1.0.2597-1.el7.x86_64.rpm && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-opencl-1.0.2597-1.el7.x86_64.rpm/download -o intel-igc-opencl-1.0.2597-1.el7.x86_64.rpm && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-igc-opencl-devel-1.0.2597-1.el7.x86_64.rpm/download -o  intel-igc-opencl-devel-1.0.2597-1.el7.x86_64.rpm && \
+       curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-opencl-19.41.14441-1.el7.x86_64.rpm/download -o intel-opencl-19.41.14441-1.el7.x86_64.rpm \
+       rpm -ivh ${TEMP_DIR}/*.rpm && \
+       ldconfig && \
+       rm -rf ${TEMP_DIR} && \
+       yum remove -y epel-release
+
+
+Running the Docker Image on Different Devices
++++++++++++++++++++++++++++++++++++++++++++++
+
+Running the Image on CPU
+-------------------------
 
 Run the Docker image with the following command:
-```
-docker run -it --rm <image_name>
-```
 
-Note the following things:
+.. code-block:: sh
+
+   docker run -it --rm <image_name>
 
-- Kernel reports the same information for all containers as for native application, for example, CPU, memory information.
-- All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions.
-- Docker does not use virtualization or emulation. The process in Docker is just a regular Linux process, but it is isolated from external world on kernel level. Performance loss is minor.
 
+Note the following:
 
-### Running the Image on GPU
+- Kernel reports the same information for all containers as for native application, 
+  for example, CPU, memory information.
+- All instructions that are available to host process available for process in container, 
+  including, for example, AVX2, AVX512. No restrictions.
+- Docker does not use virtualization or emulation. The process in Docker is just a regular 
+  Linux process, but it is isolated from external world on kernel level. Performance loss is minor.
 
-> **NOTE**: Only Intel® integrated graphics are supported.
 
-Note the following things:
+Running the Image on GPU
+-------------------------
+
+.. note:: 
+  
+   Only Intel® integrated graphics are supported.
+
+Note the following:
 
 - GPU is not available in the container by default. You must attach it to the container.
 - Kernel driver must be installed on the host.
-- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md).
+- In the container, non-root user must be in the ``video`` and ``render`` groups. 
+  To add a user to the render group, follow the 
+  `Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
 
-To make GPU available in the container, attach the GPU to the container using `--device /dev/dri` option and run the container:
+To make GPU available in the container, attach the GPU to the container using ``--device /dev/dri`` option and run the container:
 
 * Ubuntu 18 or RHEL 8:
-    ```sh
-    docker run -it --rm --device /dev/dri <image_name>
-    ```
-    > **NOTE**: If your host system is Ubuntu 20, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md).
+  
+  .. code-block:: sh
+
+     docker run -it --rm --device /dev/dri <image_name>
+
+  .. note:: 
+   
+     If your host system is Ubuntu 20, follow the 
+     `Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
 
 * WSL2:
-    ```sh
-    docker run -it --rm --device /dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl <image_name>
-    ```
-    > **NOTE**: To launch a Linux image on WSL2, make sure that the additional requirements in <a href="#system-requirements">System Requirements</a> are met.
+  
+  .. code-block:: sh
 
+     docker run -it --rm --device /dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl <image_name>
 
-## <a name="run-samples"></a>Running Samples in Docker Image
+  .. note::
+   
+     To launch a Linux image on WSL2, make sure that the additional `System Requirements <system-requirements>`__ are met.
 
-To run the `Hello Classification Sample` on a specific inference device, run the following commands:
+
+Running Samples in Docker Image
+###############################
+
+To run the ``Hello Classification Sample`` on a specific inference device, run the following commands:
 
 **CPU**:
 
-```sh
-docker run -it --rm <image_name>
-/bin/bash -c "cd ~ && omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 /opt/intel/openvino/samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp CPU"
-```
+.. code-block:: sh
+
+   docker run -it --rm <image_name>
+   /bin/bash -c "cd ~ && omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 /opt/intel/openvino/samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp CPU"
 
 **GPU**:
 
-```sh
-docker run -itu root:root  --rm --device /dev/dri:/dev/dri <image_name>
-/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU"
-```
+.. code-block:: sh
+
+   docker run -itu root:root  --rm --device /dev/dri:/dev/dri <image_name>
+   /bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU"
+
+
+Additional Resources
+###############################
+
+- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit. 
+  The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. 
+  You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
+- `Intel® Distribution of OpenVINO™ toolkit home page <https://software.intel.com/en-us/openvino-toolkit>`__
+- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
+
+
+@endsphinxdirective
 
-## Additional Resources
 
-- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
-- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file
diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md
index 5aaa145d1c293c..fc970033171129 100644
--- a/docs/install_guides/installing-openvino-yum.md
+++ b/docs/install_guides/installing-openvino-yum.md
@@ -1,26 +1,35 @@
 # Install OpenVINO™ Runtime on Linux From YUM Repository {#openvino_docs_install_guides_installing_openvino_yum}
 
+
+
 @sphinxdirective
 
-With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on Linux using the YUM repository. OpenVINO™ Development Tools can be installed via PyPI only. See :ref:`Installing Additional Components <intall additional components yum>` for more information.
+With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on Linux using the YUM repository. 
+OpenVINO™ Development Tools can be installed via PyPI only. See 
+`Installing Additional Components <step-3-optional-install-additional-components>`__ for more information.
 
-See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`_ for more information on updates in the latest release.
+See the `Release Notes <https://www.intel.com/content/www/us/en/developer/articles/release-notes/openvino-2022-3-lts-relnotes.html>`__ 
+for more information on updates in the latest release.
 
-Installing OpenVINO Runtime from YUM is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` page for instructions on how to install OpenVINO Runtime for Python using PyPI.
+Installing OpenVINO Runtime from YUM is recommended for C++ developers. If you are working with Python, 
+the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the 
+:doc:`Install OpenVINO from PyPI <openvino_docs_install_guides_installing_openvino_pip>` 
+page for instructions on how to install OpenVINO Runtime for Python using PyPI.
 
 .. warning:: 
 
-   By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`_.
+   By downloading and using this container and the included software, you agree to the terms and conditions of the 
+   `software license agreements <https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf>`__.
 
-@endsphinxdirective
 
-## Prerequisites
+Prerequisites
+#############
+
 
-@sphinxdirective
 .. tab:: System Requirements
 
    | Full requirement listing is available in:
-   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
+   | `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`__
 
    .. note::
 
@@ -29,9 +38,7 @@ Installing OpenVINO Runtime from YUM is recommended for C++ developers. If you a
 .. tab:: Processor Notes
 
    Processor graphics are not included in all processors.
-   See `Product Specifications`_ for information about your processor.
-
-   .. _Product Specifications: https://ark.intel.com/
+   See `Product Specifications <https://ark.intel.com/>`__ for information about your processor.
 
 .. tab:: Software
 
@@ -39,15 +46,15 @@ Installing OpenVINO Runtime from YUM is recommended for C++ developers. If you a
    * GCC 8.2.0
    * `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
 
-@endsphinxdirective
 
-## Install OpenVINO Runtime
+Install OpenVINO Runtime
+########################
 
-### Step 1: Set Up the Repository
+Step 1: Set Up the Repository
++++++++++++++++++++++++++++++
 
-@sphinxdirective
 
-1. Create a YUM repository file (`openvino-2022.repo`) in the `/tmp` directory as a normal user:
+1. Create a YUM repository file (``openvino-2022.repo``) in the ``/tmp`` directory as a normal user:
 
    .. code-block:: sh
 
@@ -61,7 +68,7 @@ Installing OpenVINO Runtime from YUM is recommended for C++ developers. If you a
       gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
       EOF
 
-2. Move the new `openvino-2022.repo` file to the YUM configuration directory, i.e. `/etc/yum.repos.d`:
+2. Move the new ``openvino-2022.repo`` file to the YUM configuration directory, i.e. ``/etc/yum.repos.d``:
    
    .. code-block:: sh
 
@@ -82,13 +89,14 @@ To list available OpenVINO packages, use the following command:
 
    yum list 'openvino*'
 
-@endsphinxdirective
 
-### Step 2: Install OpenVINO Runtime Using the YUM Package Manager
 
-#### Install OpenVINO Runtime
+Step 2: Install OpenVINO Runtime Using the YUM Package Manager
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Install OpenVINO Runtime
+-------------------------
 
-@sphinxdirective
 
 .. tab:: The Latest Version
 
@@ -112,11 +120,11 @@ To list available OpenVINO packages, use the following command:
 
       sudo yum install openvino-2022.3.0
 
-@endsphinxdirective
 
-#### Check for Installed Packages and Version
 
-@sphinxdirective
+Check for Installed Packages and Version
+-----------------------------------------
+
 
 Run the following command:
 
@@ -124,30 +132,28 @@ Run the following command:
 
    yum list installed 'openvino*'
 
-.. _intall additional components yum:
-
-@endsphinxdirective
-
-
-### Step 3 (Optional): Install Additional Components
 
-@sphinxdirective
+Step 3 (Optional): Install Additional Components
++++++++++++++++++++++++++++++++++++++++++++++++++
 
-OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you installed OpenVINO Runtime using YUM, OpenVINO Development Tools must be installed separately.
+OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. 
+It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and 
+Open Model Zoo Downloader. If you installed OpenVINO Runtime using YUM, OpenVINO Development 
+Tools must be installed separately.
 
 See **For C++ Developers** section on the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for instructions.
 
-@endsphinxdirective
-
-### Step 4 (Optional): Configure Inference on Non-CPU Devices
 
-To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in [GPU Setup Guide](@ref openvino_docs_install_guides_configurations_for_intel_gpu).
+Step 4 (Optional): Configure Inference on Non-CPU Devices
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### Step 5: Build Samples
+To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in 
+:doc:`GPU Setup Guide <openvino_docs_install_guides_configurations_for_intel_gpu>`.
 
-@sphinxdirective
+Step 5: Build Samples
+++++++++++++++++++++++
 
-To build the C++ or C sample applications for Linux, run the `build_samples.sh` script:
+To build the C++ or C sample applications for Linux, run the ``build_samples.sh`` script:
 
 .. tab:: C++
 
@@ -162,13 +168,11 @@ To build the C++ or C sample applications for Linux, run the `build_samples.sh`
       /usr/share/openvino/samples/c/build_samples.sh
 
 
-For more information, refer to :ref:`Build the Sample Applications on Linux <build-samples-linux>`.
-
-@endsphinxdirective
+For more information, refer to :doc:`Build the Sample Applications on Linux <openvino_docs_OV_UG_Samples_Overview>`.
 
-### Uninstalling OpenVINO Runtime
 
-@sphinxdirective
+Uninstalling OpenVINO Runtime
+##############################
 
 To uninstall OpenVINO Runtime via YUM, run the following command based on your needs:
 
@@ -191,15 +195,16 @@ To uninstall OpenVINO Runtime via YUM, run the following command based on your n
 
       sudo yum autoremove openvino-2022.3.0
 
-@endsphinxdirective
 
-## What's Next?
 
-@sphinxdirective
+What's Next?
+#############
 
-Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials:
+Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! 
+Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials:
 
-* Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ for step-by-step instructions on building and running a basic image classification C++ application.
+* Try the `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`_ 
+  for step-by-step instructions on building and running a basic image classification C++ application.
 
   .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg
      :width: 400
@@ -218,8 +223,14 @@ You can also try the following things:
 * See sample applications in :doc:`OpenVINO toolkit Samples Overview <openvino_docs_OV_UG_Samples_Overview>`.
 * Take a glance at the OpenVINO product home page: https://software.intel.com/en-us/openvino-toolkit.
 
+
+
+Additional Resources
+#####################
+
+- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
+
+
 @endsphinxdirective
 
-## Additional Resources
 
-- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
\ No newline at end of file

From b64cbff10b1f625f87a0f6046a038c79fb5b48d1 Mon Sep 17 00:00:00 2001
From: Anton Voronov <anton.voronov@intel.com>
Date: Mon, 3 Apr 2023 09:55:49 +0400
Subject: [PATCH 206/296] [CPU] FQ shape agnostic kernel (#16585)

---
 .../intel_cpu/src/nodes/fake_quantize.cpp     | 377 +++++++------
 .../intel_cpu/src/nodes/fake_quantize.h       |  20 +-
 .../subgraph_tests/src/fq_caching.cpp         | 529 ++++++++++++++++++
 3 files changed, 747 insertions(+), 179 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_caching.cpp

diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
index b0ec6f70a4118d..06203699ba2619 100644
--- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
@@ -23,6 +23,7 @@
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "common/cpu_memcpy.h"
 #include <common/primitive_hashing_utils.hpp>
+#include <utils/shape_inference/shape_inference_pass_through.hpp>
 
 #include <ngraph/opsets/opset1.hpp>
 #include "utils/ngraph_utils.hpp"
@@ -295,6 +296,45 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
     bool do_rounding = true;
     bool do_dequantization = true;
 
+    inline void load_broadcasted_vectors_only(size_t idx) {
+        const auto &broadcasted = jqp_.broadcasted;
+        if (broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)]) uni_vbroadcastss(vmm_crop_low(idx), ptr[reg_crop_low]);
+        if (broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)]) uni_vbroadcastss(vmm_crop_high(idx), ptr[reg_crop_high]);
+        if (broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)]) uni_vbroadcastss(vmm_input_scale(idx), ptr[reg_input_scale]);
+        if (broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)]) uni_vbroadcastss(vmm_input_shift(idx), ptr[reg_input_shift]);
+        if (do_dequantization) {
+            if (broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)]) uni_vbroadcastss(vmm_output_scale(idx), ptr[reg_output_scale]);
+            if (broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)]) uni_vbroadcastss(vmm_output_shift(idx), ptr[reg_output_shift]);
+        }
+    }
+
+    template <typename T>
+    inline void load_not_broadcasted_vectors_only(size_t idx, size_t offset) {
+        const auto &broadcasted = jqp_.broadcasted;
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)]) uni_vmovups(T(vmm_crop_low(idx).getIdx()), ptr[reg_crop_low + offset]);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)]) uni_vmovups(T(vmm_crop_high(idx).getIdx()), ptr[reg_crop_high + offset]);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)]) uni_vmovups(T(vmm_input_scale(idx).getIdx()), ptr[reg_input_scale + offset]);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)]) uni_vmovups(T(vmm_input_shift(idx).getIdx()), ptr[reg_input_shift + offset]);
+        if (do_dequantization) {
+            if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)])
+                uni_vmovups(T(vmm_output_scale(idx).getIdx()), ptr[reg_output_scale + offset]);
+            if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)])
+                uni_vmovups(T(vmm_output_shift(idx).getIdx()), ptr[reg_output_shift + offset]);
+        }
+    }
+
+    inline void increase_ptrs_if_not_broadcasted(size_t offset) {
+        const auto &broadcasted = jqp_.broadcasted;
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)]) add(reg_crop_low, offset);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)]) add(reg_crop_high, offset);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)]) add(reg_input_scale, offset);
+        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)]) add(reg_input_shift, offset);
+        if (do_dequantization) {
+            if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)]) add(reg_output_scale, offset);
+            if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)]) add(reg_output_shift, offset);
+        }
+    }
+
     inline void compute_planar() {
         int src_type_size = jqp_.src_prc.size();
         int dst_type_size = jqp_.dst_prc.size();
@@ -430,10 +470,10 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
         if (isa == cpu::x64::avx512_core)
             uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
 
-        int simd_w = isa == cpu::x64::avx512_core ? 16 : 8;
-        int tail8_simd_w = 8;
-        int tail4_simd_w = 4;
-        int repeats = isa == cpu::x64::sse41 ? 2 : 1;
+        constexpr unsigned simd_w = isa == cpu::x64::avx512_core ? 16 : 8;
+        constexpr unsigned tail8_simd_w = 8;
+        constexpr unsigned tail4_simd_w = 4;
+        constexpr unsigned repeats = isa == cpu::x64::sse41 ? 2 : 1;
 
         Label main_loop_label;
         Label tail_blk8_label;
@@ -446,18 +486,15 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
         Label tail_loop_label;
         Label exit_label;
 
+        for (int i = 0; i < repeats; i++) {
+            load_broadcasted_vectors_only(i);
+        }
+
         cmp(reg_block_size, simd_w);
         jl(simd_w == 16 ? tail_blk8_label : tail_blk4_label, T_NEAR);
 
         for (int i = 0; i < repeats; i++) {
-            uni_vmovups(vmm_crop_low(i), ptr[reg_crop_low + i * (simd_w / 2) * sizeof(float)]);
-            uni_vmovups(vmm_crop_high(i), ptr[reg_crop_high + i * (simd_w / 2) * sizeof(float)]);
-            uni_vmovups(vmm_input_scale(i), ptr[reg_input_scale + i * (simd_w / 2) * sizeof(float)]);
-            uni_vmovups(vmm_input_shift(i), ptr[reg_input_shift + i * (simd_w / 2) * sizeof(float)]);
-            if (do_dequantization) {
-                uni_vmovups(vmm_output_scale(i), ptr[reg_output_scale + i * (simd_w / 2) * sizeof(float)]);
-                uni_vmovups(vmm_output_shift(i), ptr[reg_output_shift + i * (simd_w / 2) * sizeof(float)]);
-            }
+            load_not_broadcasted_vectors_only<Vmm>(i, i * (simd_w / 2) * sizeof(float));
         }
 
         L(main_loop_label); {
@@ -493,14 +530,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
             mov(aux_reg_from, reg_from);
             mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]);
 
-            uni_vmovups(ymm_crop_low(0), ptr[reg_crop_low]);
-            uni_vmovups(ymm_crop_high(0), ptr[reg_crop_high]);
-            uni_vmovups(ymm_input_scale(0), ptr[reg_input_scale]);
-            uni_vmovups(ymm_input_shift(0), ptr[reg_input_shift]);
-            if (do_dequantization) {
-                uni_vmovups(ymm_output_scale(0), ptr[reg_output_scale]);
-                uni_vmovups(ymm_output_shift(0), ptr[reg_output_shift]);
-            }
+            load_not_broadcasted_vectors_only<Ymm>(0, 0);
 
             L(tail_blk8_loop_label); {
                 cmp(reg_work_amount, 0);
@@ -527,14 +557,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
 
             add(reg_from, tail8_simd_w * src_type_size);
             add(reg_to, tail8_simd_w * dst_type_size);
-            add(reg_crop_low, tail8_simd_w * wei_type_size);
-            add(reg_crop_high, tail8_simd_w * wei_type_size);
-            add(reg_input_scale, tail8_simd_w * wei_type_size);
-            add(reg_input_shift, tail8_simd_w * wei_type_size);
-            if (do_dequantization) {
-                add(reg_output_scale, tail8_simd_w * wei_type_size);
-                add(reg_output_shift, tail8_simd_w * wei_type_size);
-            }
+            increase_ptrs_if_not_broadcasted(tail8_simd_w * wei_type_size);
             sub(reg_block_size, tail8_simd_w);
         }
 
@@ -547,14 +570,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
         mov(aux_reg_from, reg_from);
         mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]);
 
-        uni_vmovups(xmm_crop_low(0), ptr[reg_crop_low]);
-        uni_vmovups(xmm_crop_high(0), ptr[reg_crop_high]);
-        uni_vmovups(xmm_input_scale(0), ptr[reg_input_scale]);
-        uni_vmovups(xmm_input_shift(0), ptr[reg_input_shift]);
-        if (do_dequantization) {
-            uni_vmovups(xmm_output_scale(0), ptr[reg_output_scale]);
-            uni_vmovups(xmm_output_shift(0), ptr[reg_output_shift]);
-        }
+        load_not_broadcasted_vectors_only<Xmm>(0, 0);
 
         L(tail_blk4_loop_label); {
             cmp(reg_work_amount, 0);
@@ -581,14 +597,8 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
 
         add(reg_from, tail4_simd_w * src_type_size);
         add(reg_to, tail4_simd_w * dst_type_size);
-        add(reg_crop_low, tail4_simd_w * wei_type_size);
-        add(reg_crop_high, tail4_simd_w * wei_type_size);
-        add(reg_input_scale, tail4_simd_w * wei_type_size);
-        add(reg_input_shift, tail4_simd_w * wei_type_size);
-        if (do_dequantization) {
-            add(reg_output_scale, tail4_simd_w * wei_type_size);
-            add(reg_output_shift, tail4_simd_w * wei_type_size);
-        }
+        increase_ptrs_if_not_broadcasted(tail4_simd_w * wei_type_size);
+        sub(reg_block_size, tail4_simd_w);
 
         L(tail_label);
 
@@ -602,28 +612,52 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
         L(tail_loop_label); {
             cmp(reg_work_amount, 0);
             jle(exit_label, T_NEAR);
+            Label end_unroll;
+
+            auto tail_unroll = [&](size_t iter) {
+                const auto &broadcasted = jqp_.broadcasted;
+                for (int i = 0; i < iter; i++) {
+                    if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)])
+                        uni_vmovss(xmm_crop_low(0), ptr[reg_crop_low + i * wei_type_size]);
+                    if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)])
+                        uni_vmovss(xmm_crop_high(0), ptr[reg_crop_high + i * wei_type_size]);
+                    if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)])
+                        uni_vmovss(xmm_input_scale(0), ptr[reg_input_scale + i * wei_type_size]);
+                    if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)])
+                        uni_vmovss(xmm_input_shift(0), ptr[reg_input_shift + i * wei_type_size]);
+                    if (do_dequantization) {
+                        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)])
+                            uni_vmovss(xmm_output_scale(0), ptr[reg_output_scale + i * wei_type_size]);
+                        if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)])
+                            uni_vmovss(xmm_output_shift(0), ptr[reg_output_shift + i * wei_type_size]);
+                    }
 
-            for (int i = 0; i < jqp_.c % tail4_simd_w; i++) {
-                uni_vmovss(xmm_crop_low(0), ptr[reg_crop_low + i * wei_type_size]);
-                uni_vmovss(xmm_crop_high(0), ptr[reg_crop_high + i * wei_type_size]);
-                uni_vmovss(xmm_input_scale(0), ptr[reg_input_scale + i * wei_type_size]);
-                uni_vmovss(xmm_input_shift(0), ptr[reg_input_shift + i * wei_type_size]);
-                if (do_dequantization) {
-                    uni_vmovss(xmm_output_scale(0), ptr[reg_output_scale + i * wei_type_size]);
-                    uni_vmovss(xmm_output_shift(0), ptr[reg_output_shift + i * wei_type_size]);
-                }
+                    load_scalar(xmm_val(0), ptr[aux_reg_from + i * src_type_size], jqp_.src_prc);
 
-                load_scalar(xmm_val(0), ptr[aux_reg_from + i * src_type_size], jqp_.src_prc);
+                    uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0));
+                    uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0));
+                    uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0));
+                    if (do_rounding) uni_vroundps(xmm_val(0), xmm_val(0), 0);
+                    if (do_dequantization) uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0));
 
-                uni_vminps(xmm_val(0), xmm_val(0), xmm_crop_high(0));
-                uni_vmaxps(xmm_val(0), xmm_val(0), xmm_crop_low(0));
-                uni_vfmadd213ps(xmm_val(0), xmm_input_scale(0), xmm_input_shift(0));
-                if (do_rounding) uni_vroundps(xmm_val(0), xmm_val(0), 0);
-                if (do_dequantization) uni_vfmadd213ps(xmm_val(0), xmm_output_scale(0), xmm_output_shift(0));
+                    store_scalar(ptr[aux_reg_to + i * dst_type_size], xmm_val(0), jqp_.dst_prc);
+                }
+                jmp(end_unroll, T_NEAR);
+            };
 
-                store_scalar(ptr[aux_reg_to + i * dst_type_size], xmm_val(0), jqp_.dst_prc);
+            std::array<Label, tail4_simd_w> unroll_labels;
+            for (size_t i = 1; i < tail4_simd_w; ++i) {
+                cmp(reg_block_size, i);
+                je(unroll_labels[i], T_NEAR);
             }
 
+            for (size_t i = 1; i < tail4_simd_w; ++i) {
+                L(unroll_labels[i]);
+                tail_unroll(i);
+            }
+
+            L(end_unroll);
+
             dec(reg_work_amount);
             add(aux_reg_from, reg_src_step);
             add(aux_reg_to, reg_dst_step);
@@ -895,29 +929,39 @@ namespace {
 struct FakeQuantKey {
     jit_quantize_params jqp;
     size_t hash() const {
+        using namespace dnnl::impl::primitive_hashing;
         size_t seed = 0;
-        seed = hash_combine(seed, jqp.c);
         seed = hash_combine(seed, jqp.is_planar);
         seed = hash_combine(seed, jqp.src_prc.getPrecVal());
         seed = hash_combine(seed, jqp.wei_prc.getPrecVal());
         seed = hash_combine(seed, jqp.dst_prc.getPrecVal());
-        seed = dnnl::impl::primitive_hashing::get_vector_hash(seed, jqp.s_str);
-        seed = dnnl::impl::primitive_hashing::get_vector_hash(seed, jqp.d_str);
         seed = hash_combine(seed, jqp.op_type);
+        if (jqp.op_type ==  Algorithm::FQBinarization) {
+            seed = hash_combine(seed, jqp.c);
+        } else {
+            seed = hash_combine(seed, jqp.broadcasted);
+        }
         return seed;
     }
 
     bool operator==(const FakeQuantKey& rhs) const {
-        bool result = jqp.c == rhs.jqp.c && jqp.is_planar == rhs.jqp.is_planar && jqp.src_prc == rhs.jqp.src_prc &&
+        bool result = jqp.is_planar == rhs.jqp.is_planar && jqp.src_prc == rhs.jqp.src_prc &&
                       jqp.wei_prc == rhs.jqp.wei_prc && jqp.dst_prc == rhs.jqp.dst_prc &&
-                      jqp.op_type == rhs.jqp.op_type && jqp.s_str == rhs.jqp.s_str && jqp.d_str == rhs.jqp.d_str;
+                      jqp.op_type == rhs.jqp.op_type;
+        if (result) {
+            if (jqp.op_type == Algorithm::FQBinarization) {
+                result = result && jqp.c == rhs.jqp.c;
+            } else {
+                result = result && jqp.broadcasted == rhs.jqp.broadcasted;
+            }
+        }
         return result;
     }
 };
 }  // namespace
 
 FakeQuantize::FakeQuantize(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
-        Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
+        Node(op, context, PassThroughShapeInferFactory()) {
     std::string errorMessage;
     if (isSupportedOperation(op, errorMessage)) {
         algorithm = Algorithm::FQCommon;
@@ -1096,6 +1140,13 @@ FakeQuantize::FakeQuantize(const std::shared_ptr<ngraph::Node>& op, const GraphC
             outputScaleSize = outputScale.size();
             outputShiftSize = outputShift.size();
 
+            broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)] = cropLowSize == 1;
+            broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)] = cropHighSize == 1;
+            broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)] = inputScaleSize == 1;
+            broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)] = inputShiftSize == 1;
+            broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)] = outputScaleSize == 1;
+            broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)] = outputShiftSize == 1;
+
             if (everyone_is(1u, cropLowSize, cropHighSize, inputScaleSize, inputShiftSize, outputScaleSize, outputShiftSize))
                 broadcastingPolicy = PerTensor;
             else if (one_of(1u, cropLowSize, cropHighSize, inputScaleSize, inputShiftSize, outputScaleSize, outputShiftSize))
@@ -1285,7 +1336,15 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    for (auto& fmt : getDataFormats()) {
+    std::vector<LayoutType> dataFormats;
+    // reference implementation supports only planar format
+    if (impl_type == impl_desc_type::ref) {
+        dataFormats.push_back(LayoutType::ncsp);
+    } else {
+        dataFormats = getDataFormats();
+    }
+
+    for (auto& fmt : dataFormats) {
         NodeConfig config;
         config.dynBatchSupport = true;
         for (size_t i = 0; i < getParentEdges().size(); i++) {
@@ -1315,32 +1374,33 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() {
 }
 
 bool FakeQuantize::needPrepareParams() const {
-    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
-    if (!selectedPrimitiveDescriptor)
-        IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
+    if (isBinarization()) {
+        auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
+        if (!selectedPrimitiveDescriptor)
+            IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
 
-    if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) {
-        return true;
-    }
+        if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) {
+            return true;
+        }
 
-    const auto axisSize = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[getAxis()];
-    const auto newPaddedSize = rnd_up(axisSize, 16);
-    const auto currPaddedSize = rnd_up(currentAxisSize, 16);
+        const auto axisSize = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[getAxis()];
+        const auto newPaddedSize = rnd_up(axisSize, 16);
+        const auto currPaddedSize = rnd_up(currentAxisSize, 16);
 
-    return newPaddedSize != currPaddedSize || (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) &&
-                                                 axisSize != currentAxisSize);
+        return newPaddedSize != currPaddedSize || ((isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize);
+    }
+    return false;
 }
 
 void FakeQuantize::prepareParams() {
-    const size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
-    const size_t newPaddedSize = rnd_up(axisSize, 16);
-    IE_ASSERT(newPaddedSize != 0);
-
-    if (internalBlobMemory.empty() || newPaddedSize != rnd_up(currentAxisSize, 16) ||
-            (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize)) {
-        DnnlBlockedMemoryDesc weightsDataDesc(Shape(VectorDims{newPaddedSize}), memory::data_type::f32, memory::format_tag::x);
+    if (isBinarization()) {
+        const size_t axisSize = getParentEdgeAt(0)->getMemory().GetShape().getStaticDims()[getAxis()];
+        const size_t newPaddedSize = rnd_up(axisSize, 16);
+        IE_ASSERT(newPaddedSize != 0);
 
-        if (isBinarization()) {
+        if (internalBlobMemory.empty() || newPaddedSize != rnd_up(currentAxisSize, 16) ||
+                ((isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize)) {
+            DnnlBlockedMemoryDesc weightsDataDesc(Shape(VectorDims{newPaddedSize}), memory::data_type::f32, memory::format_tag::x);
             constexpr size_t numBinFqIntBlob = 2;
             bool needUpdThr = false, needUpdMask = false;
             if (isInputLowBroadcasted && axisSize != currentAxisSize) {
@@ -1376,65 +1436,49 @@ void FakeQuantize::prepareParams() {
                     internalBlobMemory[1] = binarizationMaskDataMem;
                 }
             }
-        } else if (levels != 2) {
-            constexpr size_t numFqIntBlob = 6;
-
-            auto pushInternalBlob = [&](std::vector<float>& data, size_t idx) {
-                auto memory = std::make_shared<Memory>(getEngine());
-                bool needOverwrite = getInputShapeAtPort(0).getDims()[getAxis()] == Shape::UNDEFINED_DIM && data.size() == 1;
-                if (needOverwrite) {
-                    memory->Create(weightsDataDesc);
-                    float *ptr = reinterpret_cast<float *>(memory->GetPtr());
-                    std::fill(ptr, ptr + newPaddedSize, data[0]);
-                } else {
-                    if (data.size() == 1) {
-                        data.resize(newPaddedSize, data[0]);
-                    } else {
-                        data.resize(newPaddedSize);
-                    }
-                    memory->Create(weightsDataDesc, &data[0]);
-                }
-
-                if (internalBlobMemory.size() != numFqIntBlob) {
-                    internalBlobMemory.push_back(memory);
-                } else if (needOverwrite) {
-                    internalBlobMemory[idx] = memory;
-                }
-            };
-
-            pushInternalBlob(cropLow, 0);
-            pushInternalBlob(cropHigh, 1);
-            pushInternalBlob(inputScale, 2);
-            pushInternalBlob(inputShift, 3);
-            pushInternalBlob(outputScale, 4);
-            pushInternalBlob(outputShift, 5);
-        } else {
-            IE_THROW() << "Can't fill internal blob for FakeQuantize node with name: " << getName();
         }
+        currentAxisSize = axisSize;
     }
-    currentAxisSize = axisSize;
+}
 
+void FakeQuantize::createPrimitive() {
+    Node::createPrimitive();
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
     if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
         const auto& config = getSelectedPrimitiveDescriptor()->getConfig();
-        const auto& inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims();
+
         //Form FakeQuanKey
         FakeQuantKey key = {};
-        key.jqp.c = inDims.size() > 1 ? inDims[1] : 1;
         key.jqp.src_prc = config.inConfs[0].getMemDesc()->getPrecision();
         key.jqp.wei_prc = Precision::FP32;
         key.jqp.dst_prc = config.outConfs[0].getMemDesc()->getPrecision();
 
-        auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-        key.jqp.s_str = srcDesc->getStrides();
-        auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+        const auto &srcMemory = getParentEdgeAt(0)->getMemory();
+        const auto &srcDesc = srcMemory.getDesc();
 
-        key.jqp.d_str = dstDesc->getStrides();
-        key.jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3u, 4u, 5u);
+        key.jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5);
         key.jqp.op_type = getAlgorithm();
 
+        if (isBinarization()) {
+            const auto &inDims = srcMemory.getStaticDims();
+            key.jqp.c = inDims.size() > 1 ? inDims[1] : 1;
+        } else {
+             // in case of blocked layout we need to extend vectors to prevent read from unallocated memory
+            size_t paddedSize = srcDesc.hasLayoutType(LayoutType::nCsp16c) ? 16 : srcDesc.hasLayoutType(LayoutType::nCsp8c) ? 8 : 1;
+            if (paddedSize != 1) {
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)]) cropLow.resize(rnd_up(cropLow.size(), paddedSize));
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)]) cropHigh.resize(rnd_up(cropHigh.size(), paddedSize));
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)]) inputScale.resize(rnd_up(inputScale.size(), paddedSize));
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)]) inputShift.resize(rnd_up(inputShift.size(), paddedSize));
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)]) outputScale.resize(rnd_up(outputScale.size(), paddedSize));
+                if (!broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)]) outputShift.resize(rnd_up(outputShift.size(), paddedSize));
+            }
+
+            key.jqp.broadcasted = broadcasted;
+        }
+
         auto cache = context->getParamsCache();
         auto buildExecutor = [](const FakeQuantKey& key) {
             return std::make_shared<FakeQuantizeJitExecutor>(key.jqp);
@@ -1513,13 +1557,6 @@ void FakeQuantize::executeReference() {
     } else {
         auto dst = reinterpret_cast<float *>(dstMemory->GetPtr());
 
-        auto crop_low = reinterpret_cast<const float*>(internalBlobMemory[0]->GetData());
-        auto crop_high = reinterpret_cast<const float*>(internalBlobMemory[1]->GetData());
-        auto input_scale = reinterpret_cast<const float*>(internalBlobMemory[2]->GetData());
-        auto input_shift = reinterpret_cast<const float*>(internalBlobMemory[3]->GetData());
-        auto output_scale = reinterpret_cast<const float*>(internalBlobMemory[4]->GetData());
-        auto output_shift = reinterpret_cast<const float*>(internalBlobMemory[5]->GetData());
-
         parallel_nd(N, C, D, H, W, [&](dim_t n, dim_t c, dim_t d, dim_t h, dim_t w) {
             size_t src_off = srcDims.size() == 5 ?
                                 n * s_str[0] + c * s_str[1] + d * s_str[2] + h * s_str[3] + w * s_str[4] :
@@ -1534,12 +1571,12 @@ void FakeQuantize::executeReference() {
             float src_val = src[src_off];
 
             int wei_idx = getAxis() == 0 ? n : c;
-            float cl = crop_low[wei_idx];
-            float ch = crop_high[wei_idx];
-            float isc = input_scale[wei_idx];
-            float ish = input_shift[wei_idx];
-            float osc = output_scale[wei_idx];
-            float osh = output_shift[wei_idx];
+            float cl = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)] ? cropLow[0] : cropLow[wei_idx];
+            float ch = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)] ? cropHigh[0] : cropHigh[wei_idx];
+            float isc = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)] ? inputScale[0] : inputScale[wei_idx];
+            float ish = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)] ? inputShift[0] : inputShift[wei_idx];
+            float osc = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)] ? outputScale[0] : outputScale[wei_idx];
+            float osh = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)] ? outputShift[0] : outputShift[wei_idx];
 
             float dst_val = nstl::min(ch, nstl::max(cl, src_val));
             dst_val = dst_val * isc + ish;
@@ -1562,7 +1599,7 @@ void FakeQuantize::executeReference() {
 }
 
 void FakeQuantize::executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
-    auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
+    const auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
     auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
 
     auto src = reinterpret_cast<const uint8_t *>(srcMemory->GetPtr());
@@ -1573,8 +1610,8 @@ void FakeQuantize::executeBinarization(const std::unique_ptr<jit_uni_quantize_ke
 
     auto src_dims = srcMemory->getStaticDims();
 
-    const auto &jqp = pKernel->jqp_;
-    std::vector<size_t> s_str = jqp.s_str;
+    auto srcMemDesc = srcMemory->GetDescWithType<BlockedMemoryDesc>();
+    std::vector<size_t> s_str = srcMemDesc->getStrides();
     size_t tmp = s_str[s_str.size() - 1];
     for (int i = s_str.size() - 1; i > 1; i--) {
         s_str[i] = s_str[i - 1];
@@ -1608,13 +1645,6 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
     auto src = reinterpret_cast<const uint8_t *>(srcMemory->GetPtr());
     auto dst = reinterpret_cast<uint8_t *>(dstMemory->GetPtr());
 
-    auto crop_low = reinterpret_cast<const float*>(internalBlobMemory[0]->GetData());
-    auto crop_high = reinterpret_cast<const float*>(internalBlobMemory[1]->GetData());
-    auto input_scale = reinterpret_cast<const float*>(internalBlobMemory[2]->GetData());
-    auto input_shift = reinterpret_cast<const float*>(internalBlobMemory[3]->GetData());
-    auto output_scale = reinterpret_cast<const float*>(internalBlobMemory[4]->GetData());
-    auto output_shift = reinterpret_cast<const float*>(internalBlobMemory[5]->GetData());
-
     auto& srcDesc = srcMemory->getDesc();
     auto srcDims = srcDesc.getShape().getStaticDims();
 
@@ -1626,7 +1656,8 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
     auto src_type_size = jqp.src_prc.size();
     auto dst_type_size = jqp.dst_prc.size();
 
-    auto s_str = jqp.s_str;
+    auto srcMemDesc = srcMemory->GetDescWithType<BlockedMemoryDesc>();
+    auto s_str = srcMemDesc->getStrides();
 
     if (is_blk_format) {
         s_str[1] /= blk_size;
@@ -1657,12 +1688,12 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
 
             arg.from = &src[data_off * src_type_size];
             arg.to = &dst[data_off * dst_type_size];
-            arg.crop_low = &crop_low[c];
-            arg.crop_high = &crop_high[c];
-            arg.input_scale = &input_scale[c];
-            arg.input_shift = &input_shift[c];
-            arg.output_scale = &output_scale[c];
-            arg.output_shift = &output_shift[c];
+            arg.crop_low = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)] ? &cropLow[0] : &cropLow[c];
+            arg.crop_high = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)] ? &cropHigh[0] : &cropHigh[c];
+            arg.input_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)] ? &inputScale[0] : &inputScale[c];
+            arg.input_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)] ? &inputShift[0] : &inputShift[c];
+            arg.output_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)] ? &outputScale[0] : &outputScale[c];
+            arg.output_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c];
 
             arg.src_step = (size_t) blk_size * src_type_size;
             arg.dst_step = (size_t) blk_size * dst_type_size;
@@ -1687,12 +1718,12 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
 
             arg.from = &src[data_off * src_type_size];
             arg.to = &dst[data_off * dst_type_size];
-            arg.crop_low = &crop_low[c];
-            arg.crop_high = &crop_high[c];
-            arg.input_scale = &input_scale[c];
-            arg.input_shift = &input_shift[c];
-            arg.output_scale = &output_scale[c];
-            arg.output_shift = &output_shift[c];
+            arg.crop_low = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)] ? &cropLow[0] : &cropLow[c];
+            arg.crop_high = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)] ? &cropHigh[0] : &cropHigh[c];
+            arg.input_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)] ? &inputScale[0] : &inputScale[c];
+            arg.input_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)] ? &inputShift[0] : &inputShift[c];
+            arg.output_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)] ? &outputScale[0] : &outputScale[c];
+            arg.output_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c];
 
             arg.src_step = is_blk_format ? (size_t) blk_size * src_type_size : (size_t) C * src_type_size;
             arg.dst_step = is_blk_format ? (size_t) blk_size * dst_type_size : (size_t) C * dst_type_size;
@@ -1715,12 +1746,12 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
 
             arg.from = &src[data_off * src_type_size];
             arg.to = &dst[data_off * dst_type_size];
-            arg.crop_low = &crop_low[c];
-            arg.crop_high = &crop_high[c];
-            arg.input_scale = &input_scale[c];
-            arg.input_shift = &input_shift[c];
-            arg.output_scale = &output_scale[c];
-            arg.output_shift = &output_shift[c];
+            arg.crop_low = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_LOW)] ? &cropLow[0] : &cropLow[c];
+            arg.crop_high = broadcasted[static_cast<size_t>(FQ_add_input_type::CROP_HIGH)] ? &cropHigh[0] : &cropHigh[c];
+            arg.input_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SCALE)] ? &inputScale[0] : &inputScale[c];
+            arg.input_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::INPUT_SHIFT)] ? &inputShift[0] : &inputShift[c];
+            arg.output_scale = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SCALE)] ? &outputScale[0] : &outputScale[c];
+            arg.output_shift = broadcasted[static_cast<size_t>(FQ_add_input_type::OUTPUT_SHIFT)] ? &outputShift[0] : &outputShift[c];
 
             arg.src_step = is_blk_format ? (size_t) blk_size * src_type_size : (size_t) C * src_type_size;
             arg.dst_step = is_blk_format ? (size_t) blk_size * dst_type_size : (size_t) C * dst_type_size;
@@ -1737,11 +1768,7 @@ void FakeQuantize::executeDynamicImpl(dnnl::stream strm) {
 }
 
 void FakeQuantize::execute(dnnl::stream strm) {
-    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
-    if (!selectedPrimitiveDescriptor)
-        IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
-
-    if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
+    if (getSelectedPrimitiveDescriptor()->getImplementationType() != impl_desc_type::ref) {
         execPtr->exec(*this);
     } else {
         executeReference();
diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.h b/src/plugins/intel_cpu/src/nodes/fake_quantize.h
index f97947e7f2c6b8..47a7c13a8b0f09 100644
--- a/src/plugins/intel_cpu/src/nodes/fake_quantize.h
+++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.h
@@ -17,18 +17,27 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
+enum class FQ_add_input_type {
+    CROP_LOW,
+    CROP_HIGH,
+    INPUT_SCALE,
+    INPUT_SHIFT,
+    OUTPUT_SCALE,
+    OUTPUT_SHIFT,
+    INPUTS_SIZE
+};
+
 struct jit_quantize_params {
-    int c;
     bool is_planar;
 
     InferenceEngine::Precision src_prc;
     InferenceEngine::Precision wei_prc;
     InferenceEngine::Precision dst_prc;
 
-    std::vector<size_t> s_str;
-    std::vector<size_t> d_str;
-
     Algorithm op_type;
+
+    int c; // need only for binarization
+    std::bitset<static_cast<size_t>(FQ_add_input_type::INPUTS_SIZE)> broadcasted; // need only for quantization
 };
 
 struct jit_quantize_call_args {
@@ -82,6 +91,7 @@ class FakeQuantize : public Node {
 
     bool needPrepareParams() const override;
     void prepareParams() override;
+    void createPrimitive() override;
 
     const float* getBinarizationTresholdsPtr() const { return &binarizationThresholds[0]; }
     const float* getBinarizationOutputMaskPtr() const { return reinterpret_cast<const float*>(&binarizationOutputMask[0]); }
@@ -240,6 +250,8 @@ class FakeQuantize : public Node {
     size_t outputScaleSize;
     size_t outputShiftSize;
 
+    std::bitset<static_cast<size_t>(FQ_add_input_type::INPUTS_SIZE)> broadcasted;
+
     std::vector<float> fqScales;
 
     // version based lazy evaluation, any parameter change increases parameterVersion
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_caching.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_caching.cpp
new file mode 100644
index 00000000000000..37784337820b3f
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_caching.cpp
@@ -0,0 +1,529 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Motivation:
+// In a dynamic scenario, depending on the input shapes for the current node, we can either generate a new jit kernel or get an existing one from the cache.
+// But the current single layer tests do not allow checking the case when the same kernel can be used for different nodes.
+// This subgraph test contains 2 FQ nodes and allows us to check this case.
+
+//  ------------------------------------    ------------------------------------
+//  |             Input 0              |    |             Input 1              |
+//  ------------------------------------    ------------------------------------
+//                   |                                       |
+//  ------------------------------------    ------------------------------------
+//  |          FakeQuantize 0          |    |          FakeQuantize 1          |
+//  ------------------------------------    ------------------------------------
+//                   |                                       |
+//                   |                      ------------------------------------
+//                   |                      |Reshape (if !reshapeShape.empty())|
+//                   |                      ------------------------------------
+//                   |                                       |
+//  ----------------------------------------------------------------------------
+//  |                                 Concat                                   |
+//  ----------------------------------------------------------------------------
+//                                       |
+//                                   --------
+//                                   |Output|
+//                                   --------
+
+#include <shared_test_classes/base/ov_subgraph.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <common_test_utils/ov_tensor_utils.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+
+using namespace CPUTestUtils;
+using namespace ov::test;
+using namespace InferenceEngine;
+using namespace ngraph;
+
+namespace CPUSubgraphTestsDefinitions {
+
+using InputShapesTuple = std::tuple<
+        std::vector<InputShape>,                // fq dynamic data shapes
+        std::vector<std::vector<SizeVector>>,   // fq range input shapes
+        std::vector<int32_t>                    // reshape shape
+>;
+
+using FqSpecificParams = std::tuple<int64_t,                  // 'data' input low bounds
+                                    int64_t,                  // 'data' input high bounds
+                                    std::vector<float>,       // output low
+                                    std::vector<float>,       // output high
+                                    size_t>;                  // levels
+
+typedef std::tuple<
+        InputShapesTuple,                                   // fq input shapes and reshape shape
+        FqSpecificParams,                                   // fq specific params
+        std::pair<std::vector<float>, std::vector<float>>,  // il and ih values
+        CPUSpecificParams,
+        std::map<std::string, std::string>                  // Additional config (disable snippets or no)
+> FakeQuantizeCacheTestParams;
+
+class FakeQuantizeCacheTest : public testing::WithParamInterface<FakeQuantizeCacheTestParams>,
+                         virtual public SubgraphBaseTest, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<FakeQuantizeCacheTestParams> &obj) {
+        InputShapesTuple inputShapesTuple;
+        FqSpecificParams fqParams;
+        std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(inputShapesTuple, fqParams, inputRangesValues, cpuParams, additionalConfig) = obj.param;
+
+        std::vector<InputShape> shapes;
+        std::vector<std::vector<SizeVector>> ranges;
+        std::vector<int32_t> reshapeShape;
+        std::tie(shapes, ranges, reshapeShape) = inputShapesTuple;
+
+        int64_t inDataLowBounds, inDataHighBounds;
+        std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
+        size_t levels;
+        inputLow = inputRangesValues.first;
+        inputHigh = inputRangesValues.second;
+        std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, levels) = fqParams;
+
+        std::ostringstream results;
+
+        for (int i = 0; i < shapes.size(); i++) {
+            results << "FQ" << i << "_IS=(" << CommonTestUtils::partialShape2str({shapes[i].first}) << ")_";
+            results << "TS=";
+            for (const auto& shape : shapes[i].second) {
+                results << "(" << CommonTestUtils::vec2str(shape) << ")_";
+            }
+            results << "RS=";
+            for (const auto& range : ranges[i]) {
+                results << "(" << CommonTestUtils::vec2str(range) << ")_";
+            }
+        }
+        if (!reshapeShape.empty()) {
+            results << "ReshapeShape=(" << CommonTestUtils::vec2str(reshapeShape) << ")_";
+        }
+
+        results << "LOW_BOUNDS=" << inDataLowBounds << "_";
+        results << "HIGH_BOUNDS=" << inDataHighBounds << "_";
+        results << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
+        results << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_";
+        results << "OL=" << CommonTestUtils::vec2str(outputLow) << "_";
+        results << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_";
+        results << "LEVELS=" << levels;
+
+        results << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            results << "_PluginConf";
+            for (auto& item : additionalConfig) {
+                results << "_" << item.first << "=" << item.second;
+            }
+        }
+
+        return results.str();
+    }
+
+protected:
+    void SetUp() override {
+        abs_threshold = 0.01f;
+
+        InputShapesTuple inputShapesTuple;
+        FqSpecificParams fqParams;
+        std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(inputShapesTuple, fqParams, inputRangesValues,
+                cpuParams, additionalConfig) = this->GetParam();
+
+        std::vector<InputShape> shapesVec;
+        std::vector<std::vector<SizeVector>> rangesVec;
+        std::vector<int32_t> reshapeShape;
+        std::tie(shapesVec, rangesVec, reshapeShape) = inputShapesTuple;
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        init_input_shapes(shapesVec);
+
+        size_t levels;
+        std::vector<std::vector<float>> rangesBounds(RANGES_INPUT_NUMBER);
+        rangesBounds[0] = inputRangesValues.first;
+        rangesBounds[1] = inputRangesValues.second;
+        std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], levels) = fqParams;
+
+        ParameterVector ngraphParam;
+        std::vector<std::shared_ptr<Node>> ngraphInputs;
+
+        auto ngInPrec = element::Type_t::f32;
+
+        for (size_t i = 0; i < inputDynamicShapes.size(); i++) {
+            ngraphParam.push_back(std::make_shared<opset1::Parameter>(ngInPrec, inputDynamicShapes[i]));
+            ngraphInputs.push_back(ngraphParam.back());
+        }
+
+        auto makeFQ = [&](int i) {
+            auto extendData = [](const std::vector<float> &data, size_t newSize) {
+                std::vector<float> extendedData(newSize);
+                size_t oldSize = data.size();
+                for (size_t i = 0; i < newSize; i++) {
+                    extendedData[i] = data[i % oldSize];
+                }
+                return extendedData;
+            };
+
+            auto ranges = rangesVec[i];
+
+            auto il = builder::makeConstant(ngInPrec, ranges[0], extendData(rangesBounds[0],
+                std::accumulate(ranges[0].begin(), ranges[0].end(), 1, std::multiplies<size_t>())));
+            auto ih = builder::makeConstant(ngInPrec, ranges[1], extendData(rangesBounds[1],
+                std::accumulate(ranges[1].begin(), ranges[1].end(), 1, std::multiplies<size_t>())));
+            auto ol = builder::makeConstant(ngInPrec, ranges[2], extendData(rangesBounds[2],
+                std::accumulate(ranges[2].begin(), ranges[2].end(), 1, std::multiplies<size_t>())));
+            auto oh = builder::makeConstant(ngInPrec, ranges[3], extendData(rangesBounds[3],
+                std::accumulate(ranges[3].begin(), ranges[3].end(), 1, std::multiplies<size_t>())));
+
+            auto fqNode = std::make_shared<opset5::FakeQuantize>(ngraphParam[i], il, ih, ol, oh, levels);
+            fqNode->get_rt_info() = getCPUInfo();
+            return fqNode;
+        };
+
+        std::shared_ptr<Node> lastNode0 = makeFQ(0);
+        std::shared_ptr<Node> lastNode1 = makeFQ(1);
+
+        if (!reshapeShape.empty()) {
+            auto reshapeConstNode = builder::makeConstant(::element::Type(::element::Type_t::i32),
+                                                                  {reshapeShape.size()}, reshapeShape);
+            lastNode1 = std::make_shared<opset5::Reshape>(lastNode1, reshapeConstNode, false);
+        }
+        auto concat = builder::makeConcat({lastNode0, lastNode1}, 0);
+
+        if (selectedType.empty()) {
+           selectedType = getPrimitiveType() + "_FP32";
+        }
+
+        function = std::make_shared<Function>(concat, ngraphParam, "fq_cache");
+    }
+
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(),
+                                                             targetInputStaticShapes[i],
+                                                             inDataHighBounds - inDataLowBounds,
+                                                             inDataLowBounds);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
+private:
+    const size_t RANGES_INPUT_NUMBER = 4;
+
+    int64_t inDataLowBounds, inDataHighBounds;
+};
+
+TEST_P(FakeQuantizeCacheTest, CompareWithRefs) {
+    run();
+
+    CheckPluginRelatedResults(compiledModel, "FakeQuantize");
+}
+
+namespace {
+
+const std::vector<size_t> levels = {256};
+
+int64_t dataLowBounds{-10}, dataHighBounds{10};
+
+const std::vector<std::pair<std::vector<float>, std::vector<float>>> inputRanges = {
+    {{0.0f, 1.0f}, {5.0f, 6.0f}},
+};
+
+const std::vector<float> outputLow{5.0f, 6.0f}, outputHigh{25.0f, 31.0f};
+
+const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
+                                               ::testing::Values(dataHighBounds),
+                                               ::testing::Values(outputLow),
+                                               ::testing::Values(outputHigh),
+                                               ::testing::ValuesIn(levels));
+
+const std::map<std::string, std::string> emptyConfig = {};
+const std::map<std::string, std::string> disableSnippets = {
+    {PluginConfigInternalParams::KEY_SNIPPETS_MODE, PluginConfigInternalParams::DISABLE}};
+
+
+// 3D
+std::vector<CPUSpecificParams> cpuParams_3D = {
+    CPUSpecificParams({ncw}, {ncw}, {}, {}),
+};
+
+std::vector<InputShapesTuple> inputShapes_3D = {
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, -1, 43}, {{1, 10, 43}, {1, 20, 43}, {1, 10, 43}, {1, 20, 43}}},
+            // input1
+            {{-1, -1, 43}, {{1, 10, 43}, {1, 20, 43}, {1, 10, 43}, {1, 20, 43}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, // miss
+            // input1
+            {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 10, -1}, {{1, 10, 22}, {1, 10, 44}, {1, 10, 22}, {1, 10, 44}}},
+            // input1
+            {{-1, 10, -1}, {{1, 10, 22}, {1, 10, 44}, {1, 10, 22}, {1, 10, 44}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 10, 1}, {1, 10, 1}, {1, 10, 1}, {1, 10, 1}}, // miss
+            // input1
+            {{1, 10, 1}, {1, 10, 1}, {1, 10, 1}, {1, 10, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 10, -1}, {{1, 10, 22}, {1, 10, 44}, {1, 10, 22}, {1, 10, 44}}},
+            // input1
+            {{-1, 10, -1}, {{1, 10, 22}, {1, 10, 44}, {1, 10, 22}, {1, 10, 44}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 10, 1}, {1, 10, 1}, {1, 10, 1}, {1, 10, 1}}, // miss
+            // input1
+            {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, // miss
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 10, -1}, {{1, 10, 22}, {1, 10, 22}}},
+            // input1
+            {{-1, 20, -1}, {{1, 20, 22}, {1, 20, 22}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 10, 1}, {1, 10, 1}, {1, 10, 1}, {1, 10, 1}}, // miss
+            // input1
+            {{1, 20, 1}, {1, 20, 1}, {1, 20, 1}, {1, 20, 1}}, // hit
+        },
+        // reshape shape
+        {-1, 10, 22},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeCache_3D, FakeQuantizeCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_3D),
+                                specificParams,
+                                ::testing::ValuesIn(inputRanges),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_3D)),
+                                ::testing::Values(disableSnippets)),
+                        FakeQuantizeCacheTest::getTestCaseName);
+
+
+// 4D
+std::vector<CPUSpecificParams> cpuParams_4D = {
+        CPUSpecificParams({nchw}, {nchw}, {}, {}),
+        CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
+        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
+};
+
+std::vector<InputShapesTuple> inputShapes_4D = {
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, -1, -1, 43}, {{1, 17, 3, 43}, {1, 34, 3, 43}, {1, 17, 3, 43}, {1, 34, 3, 43}}},
+            // input1
+            {{-1, -1, -1, 43}, {{1, 17, 3, 43}, {1, 34, 3, 43}, {1, 17, 3, 43}, {1, 34, 3, 43}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, // miss
+            // input1
+            {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 47, -1, -1}, {{1, 47, 2, 22}, {1, 47, 3, 33}, {1, 47, 2, 22}, {1, 47, 3, 33}}},
+            // input1
+            {{-1, 47, -1, -1}, {{1, 47, 2, 22}, {1, 47, 3, 33}, {1, 47, 2, 22}, {1, 47, 3, 33}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}}, // miss
+            // input1
+            {{1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 47, -1, -1}, {{1, 47, 2, 22}, {1, 47, 3, 33}, {1, 47, 2, 22}, {1, 47, 3, 33}}},
+            // input1
+            {{-1, 47, -1, -1}, {{1, 47, 2, 22}, {1, 47, 3, 33}, {1, 47, 2, 22}, {1, 47, 3, 33}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}, {1, 47, 1, 1}}, // miss
+            // input1
+            {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, // miss
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 17, -1, -1}, {{1, 17, 2, 22}, {1, 17, 2, 22}}},
+            // input1
+            {{-1, 34, -1, -1}, {{1, 34, 2, 22}, {1, 34, 2, 22}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 17, 1, 1}, {1, 17, 1, 1}, {1, 17, 1, 1}, {1, 17, 1, 1}}, // miss
+            // input1
+            {{1, 34, 1, 1}, {1, 34, 1, 1}, {1, 34, 1, 1}, {1, 34, 1, 1}}, // hit
+        },
+        // reshape shape
+        {-1, 17, 2, 22},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeCache_4D, FakeQuantizeCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_4D),
+                                specificParams,
+                                ::testing::ValuesIn(inputRanges),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
+                                ::testing::Values(disableSnippets)),
+                        FakeQuantizeCacheTest::getTestCaseName);
+
+
+// 5D
+std::vector<CPUSpecificParams> cpuParams_5D = {
+        CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}),
+        CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
+        CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}),
+};
+
+std::vector<InputShapesTuple> inputShapes_5D = {
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, -1, -1, -1, 43}, {{1, 17, 2, 3, 43}, {1, 34, 2, 3, 43}, {1, 17, 2, 3, 43}, {1, 34, 2, 3, 43}}},
+            // input1
+            {{-1, -1, -1, -1, 43}, {{1, 17, 2, 3, 43}, {1, 34, 2, 3, 43}, {1, 17, 2, 3, 43}, {1, 34, 2, 3, 43}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, // miss
+            // input1
+            {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 47, -1, -1, -1}, {{1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}, {1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}}},
+            // input1
+            {{-1, 47, -1, -1, -1}, {{1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}, {1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}}, // miss
+            // input1
+            {{1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}}, // hit
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 47, -1, -1, -1}, {{1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}, {1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}}},
+            // input1
+            {{-1, 47, -1, -1, -1}, {{1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}, {1, 47, 2, 3, 22}, {1, 47, 3, 2, 33}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}, {1, 47, 1, 1, 1}}, // miss
+            // input1
+            {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, // miss
+        },
+        // reshape shape
+        {},
+    },
+    {
+        // fq dynamic data shapes
+        {
+            // input0
+            {{-1, 17, -1, -1, -1}, {{1, 17, 2, 3, 22}, {1, 17, 2, 3, 22}}},
+            // input1
+            {{-1, 34, -1, -1, -1}, {{1, 34, 2, 3, 22}, {1, 34, 2, 3, 22}}},
+        },
+        // fq range input shapes
+        {
+            // input0
+            {{1, 17, 1, 1, 1}, {1, 17, 1, 1, 1}, {1, 17, 1, 1, 1}, {1, 17, 1, 1, 1}}, // miss
+            // input1
+            {{1, 34, 1, 1, 1}, {1, 34, 1, 1, 1}, {1, 34, 1, 1, 1}, {1, 34, 1, 1, 1}}, // hit
+        },
+        // reshape shape
+        {-1, 17, 2, 3, 22},
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeCache_5D, FakeQuantizeCacheTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_5D),
+                                specificParams,
+                                ::testing::ValuesIn(inputRanges),
+                                ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
+                                ::testing::Values(disableSnippets)),
+                        FakeQuantizeCacheTest::getTestCaseName);
+
+} // namespace
+} // namespace CPUSubgraphTestsDefinitions

From 8491f15ba7cabe6115243a9d01921dc743275977 Mon Sep 17 00:00:00 2001
From: Yaroslav Torzuk <yaroslav.torzuk2@altran.com>
Date: Mon, 3 Apr 2023 08:21:02 +0200
Subject: [PATCH 207/296] [GPU] Softmax for stable diffusion (#15863)

---
 .../cl_kernels/softmax_gpu_bf.cl              | 117 +++++++++++++++---
 .../kernels/softmax/softmax_kernel_base.h     |   1 +
 .../kernels/softmax/softmax_kernel_bf.cpp     |  14 +++
 .../single_layer_tests/softmax.cpp            |  12 ++
 .../src/base/utils/generate_inputs.cpp        |  15 +++
 5 files changed, 140 insertions(+), 19 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl
index 9e65869700d525..5610702037269e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl
@@ -3,11 +3,27 @@
 //
 
 #include "include/batch_headers/common.cl"
+#include "include/batch_headers/fetch_data.cl"
+#include "include/batch_headers/sub_group_block_read.cl"
+#include "include/batch_headers/sub_group_block_write.cl"
+
+#if SUBGROUP_BLOCK_SIZE == 1
+#define BLOCK_READ(ptr, offset) DT_INPUT_BLOCK_READ(ptr, offset)
+#define BLOCK_WRITE(ptr, offset, val) DT_OUTPUT_BLOCK_WRITE(ptr, offset, val)
+#define BLOCK_TYPE INPUT0_TYPE
+#else
+#define BLOCK_READ(ptr, offset) CAT(DT_INPUT_BLOCK_READ, SUBGROUP_BLOCK_SIZE)(ptr, offset)
+#define BLOCK_WRITE(ptr, offset, val) CAT(DT_OUTPUT_BLOCK_WRITE, SUBGROUP_BLOCK_SIZE)(ptr, offset, val)
+#define BLOCK_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, SUBGROUP_BLOCK_SIZE)
+#endif
 
 #if IS_DYNAMIC
 #define CALC_POWER(n) ({uint pos = 0; uint i = n; do { i >>= 1; ++pos; } while (i); --pos;})
 #endif
 
+#define SUB_GROUP_SIZE 16
+
+REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
 #if !IS_DYNAMIC
 __attribute__((reqd_work_group_size(LWS, 1, 1)))
 #endif
@@ -36,36 +52,54 @@ KERNEL (softmax_gpu_continuous_bfyx)(
 #endif
 
     const uint data_set_offset = data_set_idx * data_set_size;
-    const uint my_data_offset = data_set_offset + in_data_set_idx;
+    const uint subgroup_offset = get_sub_group_id() * get_sub_group_size() * items_num;
 
     INPUT0_TYPE my_chunk[STACK_SIZE];
     INPUT0_TYPE my_maximum = -UNIT_VAL_MAX;
     INPUT0_TYPE my_sum = UNIT_VAL_ZERO;
-    INPUT0_TYPE tmp;
-
+    
     __local INPUT0_TYPE lg_storage[SLM_SIZE];
 
-    //each WI reads items_num consecutive items from batch
-    for (uint i=0; i<items_num; ++i)
+    uint i=0;
+    if (workers_per_data_set > SUB_GROUP_SIZE)
+    {
+        for (; i<items_num - (items_num % SUBGROUP_BLOCK_SIZE); i+=SUBGROUP_BLOCK_SIZE)
+        {
+            BLOCK_TYPE vec_tmp = BLOCK_READ(input, data_set_offset + subgroup_offset + i * get_sub_group_size());
+#if SUBGROUP_BLOCK_SIZE == 1
+            my_maximum = max(my_maximum, vec_tmp);
+            my_chunk[i] = vec_tmp;
+#else
+            for (int j = 0; j < SUBGROUP_BLOCK_SIZE; j++)
+            {
+                INPUT0_TYPE tmp = vec_tmp[j];
+                my_maximum = max(my_maximum, tmp);
+                my_chunk[i+j] = tmp;
+            }
+#endif
+        }
+    }
+    for (; i<items_num; i++)
     {
-        tmp = input[my_data_offset + i * workers_per_data_set];
+        INPUT0_TYPE tmp = input[data_set_offset + subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()];
         my_maximum = max(my_maximum, tmp);
         my_chunk[i] = tmp;
     }
-
     if (in_data_set_idx < leftovers)
     {
-        tmp = input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
+        INPUT0_TYPE tmp = input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
         my_maximum = max(my_maximum, tmp);
         my_chunk[items_num] = tmp;
     }
-
-    lg_storage[in_data_set_idx] = my_maximum;
+    my_maximum = sub_group_reduce_max(my_maximum);
+    
+    if (get_sub_group_local_id() == 0)
+        lg_storage[get_sub_group_id()] = my_maximum;
 
     barrier(CLK_LOCAL_MEM_FENCE);
     if (in_data_set_idx == 0)
     {
-        for (uint i=1; i<LWS; ++i)
+        for (uint i=1; i<get_num_sub_groups(); ++i)
             my_maximum = max(my_maximum, lg_storage[i]);
 
         lg_storage[0] = my_maximum;
@@ -79,24 +113,27 @@ KERNEL (softmax_gpu_continuous_bfyx)(
 
     for (uint i=0; i<items_num; ++i)
     {
-        tmp = native_exp(my_chunk[i] - my_maximum);
+        INPUT0_TYPE tmp = native_exp(my_chunk[i] - my_maximum);
         my_sum += tmp;
         my_chunk[i] = tmp;
     }
 
     if (in_data_set_idx < leftovers)
     {
-        tmp = native_exp(my_chunk[items_num] - my_maximum);
+        INPUT0_TYPE tmp = native_exp(my_chunk[items_num] - my_maximum);
         my_sum += tmp;
         my_chunk[items_num] = tmp;
     }
+    
+    my_sum = sub_group_reduce_add(my_sum);
 
-    lg_storage[in_data_set_idx] = my_sum;
+    if (get_sub_group_local_id() == 0)
+        lg_storage[get_sub_group_id()] = my_sum;
 
     barrier(CLK_LOCAL_MEM_FENCE);
     if (in_data_set_idx == 0)
     {
-        for (uint i=1; i<LWS; ++i)
+        for (uint i=1; i<get_num_sub_groups(); ++i)
             my_sum += lg_storage[i];
 
         lg_storage[0] = my_sum;
@@ -104,13 +141,35 @@ KERNEL (softmax_gpu_continuous_bfyx)(
     barrier(CLK_LOCAL_MEM_FENCE);
 
     my_sum = lg_storage[0];
+    
+    i=0;
 
 #if HAS_FUSED_OPS
-    for (uint i=0; i<items_num; ++i)
+    if (workers_per_data_set > SUB_GROUP_SIZE)
+    {
+        for (; i < items_num - (items_num % SUBGROUP_BLOCK_SIZE); i+=SUBGROUP_BLOCK_SIZE)
+        {
+            BLOCK_TYPE vec_tmp;
+#if SUBGROUP_BLOCK_SIZE == 1
+            ACTIVATION_TYPE dequantized = my_chunk[i] / my_sum;
+            FUSED_OPS_MAIN;
+            vec_tmp = FUSED_OPS_RESULT_MAIN;
+#else
+            for (int j = 0; j < SUBGROUP_BLOCK_SIZE; j++)
+            {
+                ACTIVATION_TYPE dequantized = my_chunk[i + j] / my_sum;
+                FUSED_OPS_MAIN;
+                vec_tmp[j] = FUSED_OPS_RESULT_MAIN;
+            }
+#endif
+            BLOCK_WRITE(output, data_set_offset + subgroup_offset + i * get_sub_group_size(), vec_tmp);
+        }
+    }
+    for (; i<items_num; i++)
     {
         ACTIVATION_TYPE dequantized = my_chunk[i] / my_sum;
         FUSED_OPS_MAIN;
-        output[my_data_offset + i * workers_per_data_set] = FUSED_OPS_RESULT_MAIN;
+        output[data_set_offset + subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()] = FUSED_OPS_RESULT_MAIN;
     }
     if (in_data_set_idx < leftovers)
     {
@@ -119,8 +178,24 @@ KERNEL (softmax_gpu_continuous_bfyx)(
         output[data_set_offset + workers_per_data_set * items_num + in_data_set_idx] = FUSED_OPS_RESULT_LEFTOVERS;
     }
 #else
-    for (uint i=0; i<items_num; ++i)
-        output[my_data_offset + i * workers_per_data_set] = ACTIVATION(my_chunk[i] / my_sum, ACTIVATION_PARAMS);
+    if (workers_per_data_set > SUB_GROUP_SIZE)
+    {
+        for (; i<items_num - (items_num % SUBGROUP_BLOCK_SIZE); i+=SUBGROUP_BLOCK_SIZE)
+        {
+            BLOCK_TYPE vec_tmp;
+#if SUBGROUP_BLOCK_SIZE == 1
+            vec_tmp = ACTIVATION(my_chunk[i] / my_sum, ACTIVATION_PARAMS);
+#else
+            for (int j = 0; j < SUBGROUP_BLOCK_SIZE; j++)
+                vec_tmp[j] = ACTIVATION(my_chunk[i + j] / my_sum, ACTIVATION_PARAMS);
+#endif
+            BLOCK_WRITE(output, data_set_offset + subgroup_offset + i * get_sub_group_size(), vec_tmp);
+        }
+    }
+    for (; i < items_num; i++)
+    {
+        output[data_set_offset + subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()] = ACTIVATION(my_chunk[i] / my_sum, ACTIVATION_PARAMS);
+    }
     if (in_data_set_idx < leftovers)
         output[data_set_offset + workers_per_data_set * items_num + in_data_set_idx] = ACTIVATION(my_chunk[items_num] / my_sum, ACTIVATION_PARAMS);
 #endif
@@ -128,3 +203,7 @@ KERNEL (softmax_gpu_continuous_bfyx)(
 #ifdef CALC_POWER
 #undef CALC_POWER
 #endif
+#undef BLOCK_READ
+#undef BLOCK_WRITE
+#undef BLOCK_TYPE
+
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.h
index b523e05e294ced..9e7ba37696f0fd 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.h
@@ -46,6 +46,7 @@ class SoftmaxKernelBase : public KernelBaseOpenCL {
         size_t maxSlmSize;
         size_t normIndex;  // which dimension (from in-memory representation) is normalized, e.g. for bfyx and
                            // softmax::normalize_f, it will be f's index == 2 (used only by naive kernel)
+        size_t subgroupBlockSize;
     };
 
 protected:
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
index b2923ff82b60e6..f285f7a3cb0f7c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
@@ -31,6 +31,7 @@ SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softma
     auto dispatchData = Parent::SetDefault(params);
 
     dispatchData.normIndex = 0;
+
     // We have two units of data per work item in current implementation.
     auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
     // Combining device execution and local memory restrictions to compute maximum possible LWS.
@@ -50,12 +51,23 @@ SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softma
             dispatchData.itemsNum /= 2;
         }
 
+        if (dispatchData.itemsNum >> 3)
+            dispatchData.subgroupBlockSize = 8;
+        else if (dispatchData.itemsNum >> 2)
+            dispatchData.subgroupBlockSize = 4;
+        else if (dispatchData.itemsNum >> 1)
+            dispatchData.subgroupBlockSize = 2;
+        else
+            dispatchData.subgroupBlockSize = 1;
+
         assert((dispatchData.itemsNum + 1) * dispatchData.lws[0] >= dispatchData.dataSetSize && "More than 'lws[0]' items per batch remains! Lws too small?");
 
         dispatchData.gws[0] = dispatchData.lws[0];
         dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0];
 
         assert(dispatchData.itemsNum > 0 && dispatchData.lws[0] && dispatchData.gws[0] > 0);
+    } else {
+        dispatchData.subgroupBlockSize = 1;
     }
     return dispatchData;
 }
@@ -106,6 +118,7 @@ JitConstants SoftmaxKernel_bf::GetJitConstants(const softmax_params& params, Dis
             MakeJitConstant("DATA_SETS_COUNT", data_set_count),
             MakeJitConstant("DATA_SET_SIZE", data_set_size),
             MakeJitConstant("STACK_SIZE", stack_size),
+            MakeJitConstant("SUBGROUP_BLOCK_SIZE", dispatchData.subgroupBlockSize),
         });
     } else {
         jit.AddConstants({
@@ -116,6 +129,7 @@ JitConstants SoftmaxKernel_bf::GetJitConstants(const softmax_params& params, Dis
             MakeJitConstant("DATA_SET_SIZE", dispatchData.dataSetSize),
             MakeJitConstant("LEFTOVERS", dispatchData.leftovers),
             MakeJitConstant("STACK_SIZE", dispatchData.itemsNum + 1),
+            MakeJitConstant("SUBGROUP_BLOCK_SIZE", dispatchData.subgroupBlockSize),
         });
     }
     auto activation_dt = GetActivationType(params);
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp
index e83327d17d59cc..a135f1a75b17e8 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp
@@ -84,6 +84,18 @@ INSTANTIATE_TEST_SUITE_P(
                      testing::Values(ov::AnyMap())),
     SoftMax8LayerTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(
+    smoke_SoftMaxStableDiffusion,
+    SoftMax8LayerTest,
+    testing::Combine(testing::ValuesIn(netPrecisions),
+                     ::testing::Values(ov::element::undefined),
+                     ::testing::Values(ov::element::undefined),
+                     ::testing::ValuesIn(ov::test::static_shapes_to_test_representation({{16, 4096, 4096}})),
+                     testing::Values(-1),
+                     testing::Values(CommonTestUtils::DEVICE_GPU),
+                     testing::Values(ov::AnyMap())),
+    SoftMax8LayerTest::getTestCaseName);
+
 const std::vector<ov::Shape> inputShapes5D = {
     {1, 100, 1, 1, 1},
     {1, 3, 4, 3, 4},
diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
index db7818d5c21731..9b197162aa0c69 100644
--- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
@@ -812,6 +812,21 @@ ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v5::Round>& node,
     return Activation::generate(elemType, targetShape, InputGenerateData(-10, 20, 4));
 }
 
+ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v8::Softmax>& node,
+                             size_t port,
+                             const ov::element::Type& elemType,
+                             const ov::Shape& targetShape) {
+    auto axis = node->get_axis();
+    axis = axis < 0 ? targetShape.size() + axis : axis;
+    unsigned datasetSize = std::accumulate(targetShape.begin() + axis, targetShape.end(), 1,
+        [](std::size_t a, size_t b) { return a * b; });
+    // Generate small negative values for datasets which exceed 2048 size
+    // to avoid NaN values in Softmax results for fp16 precision
+    if (datasetSize >= 2048 && static_cast<ov::element::Type_t>(elemType) == ov::element::Type_t::f16)
+        return ov::test::utils::create_and_fill_tensor_normal_distribution(elemType, targetShape, -5.f, 0.5f, 7235346);
+    return generate(std::dynamic_pointer_cast<ov::Node>(node), port, elemType, targetShape);
+}
+
 template<typename T>
 ov::runtime::Tensor generateInput(const std::shared_ptr<ov::Node>& node,
                                   size_t port,

From f5dced8e69886e21e9b440e1b35e885df35838b5 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 3 Apr 2023 08:23:44 +0200
Subject: [PATCH 208/296] DOCS shift to rst - Hello Classification Samples
 (#16681)

---
 samples/c/hello_classification/README.md      | 183 +++++++++------
 samples/cpp/hello_classification/README.md    | 216 ++++++++++--------
 samples/python/hello_classification/README.md | 200 ++++++++--------
 3 files changed, 348 insertions(+), 251 deletions(-)

diff --git a/samples/c/hello_classification/README.md b/samples/c/hello_classification/README.md
index 2cbc2b8065df62..4d47bca2c8a948 100644
--- a/samples/c/hello_classification/README.md
+++ b/samples/c/hello_classification/README.md
@@ -1,98 +1,139 @@
 # Hello Classification C Sample {#openvino_inference_engine_ie_bridges_c_samples_hello_classification_README}
 
+@sphinxdirective
+
 This sample demonstrates how to execute an inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API and input auto-resize feature.
 
 Hello Classification C sample application demonstrates how to use the C API from OpenVINO in applications.
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | `ov_get_openvino_version` | Get Openvino API version |
-| Basic Infer Flow | `ov_core_create`, `ov_core_read_model`, `ov_core_compile_model`, `ov_compiled_model_create_infer_request`, `ov_infer_request_set_input_tensor_by_index`, `ov_infer_request_get_output_tensor_by_index`  | Common API to do inference: read and compile a model, create an infer request, configure input and output tensors |
-| Synchronous Infer | `ov_infer_request_infer` | Do synchronous inference |
-| Model Operations | `ov_model_const_input`, `ov_model_const_output` | Get inputs and outputs of a model |
-| Tensor Operations | `ov_tensor_create_from_host_ptr` | Create a tensor shape |
-| Preprocessing | `ov_preprocess_prepostprocessor_create`, `ov_preprocess_prepostprocessor_get_input_info_by_index`, `ov_preprocess_input_info_get_tensor_info`, `ov_preprocess_input_tensor_info_set_from`, `ov_preprocess_input_tensor_info_set_layout`, `ov_preprocess_input_info_get_preprocess_steps`, `ov_preprocess_preprocess_steps_resize`, `ov_preprocess_input_model_info_set_layout`, `ov_preprocess_output_set_element_type`, `ov_preprocess_prepostprocessor_build` | Set image of the original size as input for a model with other input size. Resize and layout conversions are performed automatically by the corresponding plugin just before inference. |
-
-| Options  | Values |
-|:---                              |:---
-| Validated Models                 | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1)
-| Model Format                     | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)
-| Validated images                 | The sample uses OpenCV\* to [read input image](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56) (\*.bmp, \*.png)
-| Supported devices                | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization       | [C++](../../../samples/cpp/hello_classification/README.md), [Python](../../python/hello_classification/README.md) |
-
-## How It Works
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Feature                             | API                                                         | Description                                                                                                                                                                             |
++=====================================+=============================================================+=========================================================================================================================================================================================+
+| OpenVINO Runtime Version            | ``ov_get_openvino_version``                                 | Get Openvino API version                                                                                                                                                                |
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Basic Infer Flow                    | ``ov_core_create``,                                         | Common API to do inference: read and compile a model, create an infer request, configure input and output tensors                                                                       |
+|                                     | ``ov_core_read_model``,                                     |                                                                                                                                                                                         |
+|                                     | ``ov_core_compile_model``,                                  |                                                                                                                                                                                         |
+|                                     | ``ov_compiled_model_create_infer_request``,                 |                                                                                                                                                                                         |
+|                                     | ``ov_infer_request_set_input_tensor_by_index``,             |                                                                                                                                                                                         |
+|                                     | ``ov_infer_request_get_output_tensor_by_index``             |                                                                                                                                                                                         |
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Synchronous Infer                   | ``ov_infer_request_infer``                                  | Do synchronous inference                                                                                                                                                                |
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Operations                    | ``ov_model_const_input``,                                   | Get inputs and outputs of a model                                                                                                                                                       |
+|                                     | ``ov_model_const_output``                                   |                                                                                                                                                                                         +
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Tensor Operations                   | ``ov_tensor_create_from_host_ptr``                          | Create a tensor shape                                                                                                                                                                   |
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Preprocessing                       | ``ov_preprocess_prepostprocessor_create``,                  | Set image of the original size as input for a model with other input size. Resize and layout conversions are performed automatically by the corresponding plugin just before inference. |
+|                                     | ``ov_preprocess_prepostprocessor_get_input_info_by_index``, |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_input_info_get_tensor_info``,               |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_input_tensor_info_set_from``,               |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_input_tensor_info_set_layout``,             |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_input_info_get_preprocess_steps``,          |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_preprocess_steps_resize``,                  |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_input_model_info_set_layout``,              |                                                                                                                                                                                         |
+|                                     | ``ov_preprocess_output_set_element_type``,                  |                                                                                                                                                                                         | 
+|                                     | ``ov_preprocess_prepostprocessor_build``                    |                                                                                                                                                                                         |
++-------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Options                    | Values                                                                                                                                                                     |
++============================+============================================================================================================================================================================+
+| Validated Models           | :doc:`alexnet <omz_models_model_alexnet>`, :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`                                                                             |
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Format               | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)                                                                                             |
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Validated images           | The sample uses OpenCV\* to `read input image <https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56>`__ (\*.bmp, \*.png)         |
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Supported devices          | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                                                                                                       |
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Other language realization | :doc:`C++ <openvino_inference_engine_samples_hello_classification_README>`, :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>` |
++----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 Upon the start-up, the sample application reads command line parameters, loads specified network and an image to the Inference Engine plugin.
 Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in Inference Engine Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in Inference Engine Samples guide.
 
-## Running
+Running
+#######
 
 To run the sample, you need specify a model and image:
 
-- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
+
+.. note:: 
+  
+   - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (\*.onnx) that do not require preprocessing.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (\*.onnx) that do not require preprocessing.
+Example
++++++++
 
-### Example
 1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
-   ```
-   python <path_to_omz_tools>/downloader.py --name alexnet
-   ```
+   
+   .. code-block:: console
+      
+      python <path_to_omz_tools>/downloader.py --name alexnet
 
 2. If a model is not in the Inference Engine IR or ONNX format, it must be converted. You can do this using the model converter script:
-   ```
-   python <path_to_omz_tools>/converter.py --name alexnet
-   ```
+   
+   .. code-block:: console
+      
+      python <path_to_omz_tools>/converter.py --name alexnet
 
-3. Perform inference of `car.bmp` using `alexnet` model on a `GPU`, for example:
-   ```
-   <path_to_sample>/hello_classification_c <path_to_model>/alexnet.xml <path_to_image>/car.bmp GPU
-   ```
+3. Perform inference of ``car.bmp`` using ``alexnet`` model on a ``GPU``, for example:
+   
+   .. code-block:: console
+      
+      <path_to_sample>/hello_classification_c <path_to_model>/alexnet.xml <path_to_image>/car.bmp GPU
 
-## Sample Output
+Sample Output
+#############
 
 The application outputs top-10 inference results.
 
-```
-Top 10 results:
-
-Image /opt/intel/openvino/samples/scripts/car.png
-
-classid probability
-------- -----------
-656       0.666479
-654       0.112940
-581       0.068487
-874       0.033385
-436       0.026132
-817       0.016731
-675       0.010980
-511       0.010592
-569       0.008178
-717       0.006336
-
-This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate OpenVINO™ into Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-- [C API Reference](https://docs.openvino.ai/latest/api/api_reference.html)
\ No newline at end of file
+.. code-block:: console
+   
+   Top 10 results:
+   
+   Image /opt/intel/openvino/samples/scripts/car.png
+   
+   classid probability
+   ------- -----------
+   656       0.666479
+   654       0.112940
+   581       0.068487
+   874       0.033385
+   436       0.026132
+   817       0.016731
+   675       0.010980
+   511       0.010592
+   569       0.008178
+   717       0.006336
+   
+   This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate OpenVINO™ into Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+- :doc:`C API Reference <pot_compression_api_README>`
+
+@endsphinxdirective
+
diff --git a/samples/cpp/hello_classification/README.md b/samples/cpp/hello_classification/README.md
index 6c129904a7a6ff..51e05bb077c6fc 100644
--- a/samples/cpp/hello_classification/README.md
+++ b/samples/cpp/hello_classification/README.md
@@ -1,117 +1,155 @@
 # Hello Classification C++ Sample {#openvino_inference_engine_samples_hello_classification_README}
 
-This sample demonstrates how to do inference of image classification models using Synchronous Inference Request API.  
-Models with only one input and output are supported.
+@sphinxdirective
 
-The following C++ API is used in the application:
+This sample demonstrates how to do inference of image classification models using Synchronous Inference Request API. 
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Version | `ov::get_openvino_version` | Get Openvino API version |
-| Basic Infer Flow | `ov::Core::read_model`, `ov::Core::compile_model`, `ov::CompiledModel::create_infer_request`, `ov::InferRequest::set_input_tensor`, `ov::InferRequest::get_output_tensor`  | Common API to do inference: read and compile a model, create an infer request, configure input and output tensors |
-| Synchronous Infer | `ov::InferRequest::infer` | Do synchronous inference |
-| Model Operations | `ov::Model::inputs`, `ov::Model::outputs` | Get inputs and outputs of a model |
-| Tensor Operations | `ov::Tensor::get_shape` | Get a tensor shape |
-| Preprocessing | `ov::preprocess::InputTensorInfo::set_element_type`, `ov::preprocess::InputTensorInfo::set_layout`, `ov::preprocess::InputTensorInfo::set_spatial_static_shape`, `ov::preprocess::PreProcessSteps::resize`, `ov::preprocess::InputModelInfo::set_layout`, `ov::preprocess::OutputTensorInfo::set_element_type`, `ov::preprocess::PrePostProcessor::build` | Set image of the original size as input for a model with other input size. Resize and layout conversions are performed automatically by the corresponding plugin just before inference. |
+Models with only one input and output are supported.
 
-| Options | Values |
-| :--- | :--- |
-| Validated Models | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1) |
-| Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) |
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C](../../../samples/c/hello_classification/README.md), [Python](../../../samples/python/hello_classification/README.md) |
+The following C++ API is used in the application:
 
-## How It Works
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Feature                             | API                                                            | Description                                                                                                                                                                             |
++=====================================+================================================================+=========================================================================================================================================================================================+
+| OpenVINO Runtime Version            | ``ov::get_openvino_version``                                   | Get Openvino API version                                                                                                                                                                |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Basic Infer Flow                    | ``ov::Core::read_model``,                                      | Common API to do inference: read and compile a model, create an infer request, configure input and output tensors                                                                       |
+|                                     | ``ov::Core::compile_model``,                                   |                                                                                                                                                                                         |
+|                                     | ``ov::CompiledModel::create_infer_request``,                   |                                                                                                                                                                                         |
+|                                     | ``ov::InferRequest::set_input_tensor``,                        |                                                                                                                                                                                         |
+|                                     | ``ov::InferRequest::get_output_tensor``                        |                                                                                                                                                                                         |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Synchronous Infer                   | ``ov::InferRequest::infer``                                    | Do synchronous inference                                                                                                                                                                |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Operations                    | ``ov::Model::inputs``,                                         | Get inputs and outputs of a model                                                                                                                                                       |
+|                                     | ``ov::Model::outputs``                                         |                                                                                                                                                                                         |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Tensor Operations                   | ``ov::Tensor::get_shape``                                      | Get a tensor shape                                                                                                                                                                      |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Preprocessing                       | ``ov::preprocess::InputTensorInfo::set_element_type``,         | Set image of the original size as input for a model with other input size. Resize and layout conversions are performed automatically by the corresponding plugin just before inference. |
+|                                     | ``ov::preprocess::InputTensorInfo::set_layout``,               |                                                                                                                                                                                         |
+|                                     | ``ov::preprocess::InputTensorInfo::set_spatial_static_shape``, |                                                                                                                                                                                         |
+|                                     | ``ov::preprocess::PreProcessSteps::resize``,                   |                                                                                                                                                                                         |
+|                                     | ``ov::preprocess::InputModelInfo::set_layout``,                |                                                                                                                                                                                         |
+|                                     | ``ov::preprocess::OutputTensorInfo::set_element_type``,        |                                                                                                                                                                                         |
+|                                     | ``ov::preprocess::PrePostProcessor::build``                    |                                                                                                                                                                                         |
++-------------------------------------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
++-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Options                             | Values                                                                                                                                                                                |
++=====================================+=======================================================================================================================================================================================+
+| Validated Models                    | :doc:`alexnet <omz_models_model_alexnet>`, :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`                                                                                        |
++-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Format                        | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)                                                                                                       |
++-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Supported devices                   | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                                                                                                                  |
++-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Other language realization          | :doc:`C <openvino_inference_engine_ie_bridges_c_samples_hello_classification_README>`, :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>` |
++-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command line parameters, prepares input data, loads a specified model and image to the OpenVINO™ Runtime plugin and performs synchronous inference. Then processes output data and write it to a standard output stream.
 
 You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
-```
-hello_classification <path_to_model> <path_to_image> <device_name>
-```
+.. code-block:: console
+   
+   hello_classification <path_to_model> <path_to_image> <device_name>
 
 To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
+
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and Demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-### Example
+Example
++++++++
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+   
+   .. code-block:: console
+      
+      python -m pip install openvino-dev[caffe]
 
 2. Download a pre-trained model using:
-   ```
-   omz_downloader --name googlenet-v1
-   ```
+   
+   .. code-block:: console
+      
+      omz_downloader --name googlenet-v1
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name googlenet-v1
-   ```
+   
+   .. code-block:: console
+      
+      omz_converter --name googlenet-v1
 
-4. Perform inference of `car.bmp` using the `googlenet-v1` model on a `GPU`, for example:
-   ```
-   hello_classification googlenet-v1.xml car.bmp GPU
-   ```
+4. Perform inference of ``car.bmp`` using the ``googlenet-v1`` model on a ``GPU``, for example:
+   
+   .. code-block:: console
+      
+      hello_classification googlenet-v1.xml car.bmp GPU
 
-## Sample Output
+Sample Output
+#############
 
 The application outputs top-10 inference results.
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Loading model files: /models/googlenet-v1.xml
-[ INFO ] model name: GoogleNet
-[ INFO ]     inputs
-[ INFO ]         input name: data
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {1, 3, 224, 224}
-[ INFO ]     outputs
-[ INFO ]         output name: prob
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {1, 1000}
-
-Top 10 results:
-
-Image /images/car.bmp
-
-classid probability
-------- -----------
-656     0.8139648
-654     0.0550537
-468     0.0178375
-436     0.0165405
-705     0.0111694
-817     0.0105820
-581     0.0086823
-575     0.0077515
-734     0.0064468
-785     0.0043983
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+.. code-block:: console
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Loading model files: /models/googlenet-v1.xml
+   [ INFO ] model name: GoogleNet
+   [ INFO ]     inputs
+   [ INFO ]         input name: data
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {1, 3, 224, 224}
+   [ INFO ]     outputs
+   [ INFO ]         output name: prob
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {1, 1000}
+   
+   Top 10 results:
+   
+   Image /images/car.bmp
+   
+   classid probability
+   ------- -----------
+   656     0.8139648
+   654     0.0550537
+   468     0.0178375
+   436     0.0165405
+   705     0.0111694
+   817     0.0105820
+   581     0.0086823
+   575     0.0077515
+   734     0.0064468
+   785     0.0043983
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+
diff --git a/samples/python/hello_classification/README.md b/samples/python/hello_classification/README.md
index ad342702c0a959..8ef1a3aeae8870 100644
--- a/samples/python/hello_classification/README.md
+++ b/samples/python/hello_classification/README.md
@@ -1,116 +1,134 @@
 # Hello Classification Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_classification_README}
 
-This sample demonstrates how to do inference of image classification models using Synchronous Inference Request API.  
-Models with only 1 input and output are supported.
+@sphinxdirective
 
-The following Python API is used in the application:
+This sample demonstrates how to do inference of image classification models using Synchronous Inference Request API. 
 
-| Feature           | API                                                                                                                                                                                                                                                                                                                                                                                                              | Description                                                                                                                                                                                |
-| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Basic Infer Flow  | [openvino.runtime.Core], [openvino.runtime.Core.read_model], [openvino.runtime.Core.compile_model]                                                                                                                                                                                                                                                                                                               | Common API to do inference                                                                                                                                                                 |
-| Synchronous Infer | [openvino.runtime.CompiledModel.infer_new_request]                                                                                                                                                                                                                                                                                                                                                               | Do synchronous inference                                                                                                                                                                   |
-| Model Operations  | [openvino.runtime.Model.inputs], [openvino.runtime.Model.outputs]                                                                                                                                                                                                                                                                                                                                                | Managing of model                                                                                                                                                                          |
-| Preprocessing     | [openvino.preprocess.PrePostProcessor], [openvino.preprocess.InputTensorInfo.set_element_type],[openvino.preprocess.InputTensorInfo.set_layout],[openvino.preprocess.InputTensorInfo.set_spatial_static_shape],[openvino.preprocess.PreProcessSteps.resize],[openvino.preprocess.InputModelInfo.set_layout],[openvino.preprocess.OutputTensorInfo.set_element_type],[openvino.preprocess.PrePostProcessor.build] | Set image of the original size as input for a model with other input size. Resize and layout conversions will be performed automatically by the corresponding plugin just before inference |
+Models with only 1 input and output are supported.
 
-| Options                    | Values                                                                                                  |
-| :------------------------- | :------------------------------------------------------------------------------------------------------ |
-| Validated Models           | [alexnet](@ref omz_models_model_alexnet), [googlenet-v1](@ref omz_models_model_googlenet_v1)            |
-| Model Format               | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx)                                |
-| Supported devices          | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md)                                       |
-| Other language realization | [C++](../../../samples/cpp/hello_classification/README.md), [C](../../c/hello_classification/README.md) |
+The following Python API is used in the application:
 
-## How It Works
++-----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Feature                     | API                                                                                                                                                                                                                                       | Description                                                                                                                                                                                |
++=============================+===========================================================================================================================================================================================================================================+============================================================================================================================================================================================+
+| Basic Infer Flow            | `openvino.runtime.Core <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__ ,                                                                                                                   |                                                                                                                                                                                            |
+|                             | `openvino.runtime.Core.read_model <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.read_model>`__ ,                                                                       |                                                                                                                                                                                            |
+|                             | `openvino.runtime.Core.compile_model <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.compile_model>`__                                                                   | Common API to do inference                                                                                                                                                                 |
++-----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Synchronous Infer           | `openvino.runtime.CompiledModel.infer_new_request <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.infer_new_request>`__                                | Do synchronous inference                                                                                                                                                                   |
++-----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Operations            | `openvino.runtime.Model.inputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.inputs>`__ ,                                                                            | Managing of model                                                                                                                                                                          |
+|                             | `openvino.runtime.Model.outputs <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.outputs>`__                                                                            |                                                                                                                                                                                            |
++-----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Preprocessing               | `openvino.preprocess.PrePostProcessor <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PrePostProcessor.html>`__ ,                                                                                     | Set image of the original size as input for a model with other input size. Resize and layout conversions will be performed automatically by the corresponding plugin just before inference |
+|                             | `openvino.preprocess.InputTensorInfo.set_element_type <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_element_type>`__ ,                 |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.InputTensorInfo.set_layout <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_layout>`__ ,                             |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.InputTensorInfo.set_spatial_static_shape <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_spatial_static_shape>`__ , |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.PreProcessSteps.resize <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PreProcessSteps.html#openvino.preprocess.PreProcessSteps.resize>`__ ,                                     |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.InputModelInfo.set_layout <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputModelInfo.html#openvino.preprocess.InputModelInfo.set_layout>`__ ,                                |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.OutputTensorInfo.set_element_type <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.OutputTensorInfo.html#openvino.preprocess.OutputTensorInfo.set_element_type>`__ ,              |                                                                                                                                                                                            |
+|                             | `openvino.preprocess.PrePostProcessor.build <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PrePostProcessor.html#openvino.preprocess.PrePostProcessor.build>`__                                      |                                                                                                                                                                                            |
++-----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
++-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Options                           | Values                                                                                                                                                            |
++===================================+===================================================================================================================================================================+
+| Validated Models                  | :doc:`alexnet <omz_models_model_alexnet>`, :doc:`googlenet-v1 <omz_models_model_googlenet_v1>`                                                                    |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Format                      | OpenVINO™ toolkit Intermediate Representation (.xml + .bin), ONNX (.onnx)                                                                                         |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Supported devices                 | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                                                                                              |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Other language realization        | :doc:`C++ <openvino_inference_engine_samples_hello_classification_README>`, :doc:`C <openvino_inference_engine_ie_bridges_c_samples_hello_classification_README>` |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image to the OpenVINO™ Runtime plugin, performs synchronous inference, and processes output data, logging each step in a standard output stream.
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Running
+Running
+#######
 
-```
-python hello_classification.py <path_to_model> <path_to_image> <device_name>
-```
+.. code-block:: console
+   
+   python hello_classification.py <path_to_model> <path_to_image> <device_name>
 
 To run the sample, you need to specify a model and image:
-- You can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
-- You can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
 
-> **NOTES**:
->
-> - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Embedding Preprocessing Computation](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md).
->
-> - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
->
-> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+- You can use :doc:`public <omz_models_group_public>` or :doc:`Intel's <omz_models_group_intel>` pre-trained models from the Open Model Zoo. The models can be downloaded using the :doc:`Model Downloader <omz_tools_downloader>`.
+- You can use images from the media files collection available at `the storage <https://storage.openvinotoolkit.org/data/test_data>`__.
+
+.. note::
+  
+   - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with ``--reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>`.
+   - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using the :doc:`Model Optimizer tool <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+   - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-### Example
+Example
++++++++
 
-1. Install the `openvino-dev` Python package to use Open Model Zoo Tools:
-   ```
-   python -m pip install openvino-dev[caffe]
-   ```
+1. Install the ``openvino-dev`` Python package to use Open Model Zoo Tools:
+   
+   .. code-block:: console
+      
+      python -m pip install openvino-dev[caffe]
 
 2. Download a pre-trained model:
-   ```
-   omz_downloader --name alexnet
-   ```
+   
+   .. code-block:: console
+      
+      omz_downloader --name alexnet
 
 3. If a model is not in the IR or ONNX format, it must be converted. You can do this using the model converter:
-   ```
-   omz_converter --name alexnet
-   ```
+   
+   .. code-block:: console
+      
+      omz_converter --name alexnet
 
-4. Perform inference of `banana.jpg` using the `alexnet` model on a `GPU`, for example:
-   ```
-   python hello_classification.py alexnet.xml banana.jpg GPU
-   ```
+4. Perform inference of ``banana.jpg`` using the ``alexnet`` model on a ``GPU``, for example:
+   
+   .. code-block:: console
+      
+      python hello_classification.py alexnet.xml banana.jpg GPU
 
-## Sample Output
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream and outputs top-10 inference results.
 
-```
-[ INFO ] Creating OpenVINO Runtime Core
-[ INFO ] Reading the model: /models/alexnet/alexnet.xml
-[ INFO ] Loading the model to the plugin
-[ INFO ] Starting inference in synchronous mode
-[ INFO ] Image path: /images/banana.jpg
-[ INFO ] Top 10 results:     
-[ INFO ] class_id probability
-[ INFO ] --------------------
-[ INFO ] 954      0.9703885
-[ INFO ] 666      0.0219518
-[ INFO ] 659      0.0033120
-[ INFO ] 435      0.0008246
-[ INFO ] 809      0.0004433
-[ INFO ] 502      0.0003852
-[ INFO ] 618      0.0002906
-[ INFO ] 910      0.0002848
-[ INFO ] 951      0.0002427
-[ INFO ] 961      0.0002213
-[ INFO ]
-[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-
-[openvino.runtime.Core]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html
-[openvino.runtime.Core.read_model]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.read_model
-[openvino.runtime.Core.compile_model]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.compile_model
-[openvino.runtime.CompiledModel.infer_new_request]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.infer_new_request
-[openvino.runtime.Model.inputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.inputs
-[openvino.runtime.Model.outputs]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.outputs
-[openvino.preprocess.PrePostProcessor]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PrePostProcessor.html
-[openvino.preprocess.InputTensorInfo.set_element_type]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_element_type
-[openvino.preprocess.InputTensorInfo.set_layout]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_layout
-[openvino.preprocess.InputTensorInfo.set_spatial_static_shape]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputTensorInfo.html#openvino.preprocess.InputTensorInfo.set_spatial_static_shape
-[openvino.preprocess.PreProcessSteps.resize]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PreProcessSteps.html#openvino.preprocess.PreProcessSteps.resize
-[openvino.preprocess.InputModelInfo.set_layout]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.InputModelInfo.html#openvino.preprocess.InputModelInfo.set_layout
-[openvino.preprocess.OutputTensorInfo.set_element_type]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.OutputTensorInfo.html#openvino.preprocess.OutputTensorInfo.set_element_type
-[openvino.preprocess.PrePostProcessor.build]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.preprocess.PrePostProcessor.html#openvino.preprocess.PrePostProcessor.build
+.. code-block:: console
+   
+   [ INFO ] Creating OpenVINO Runtime Core
+   [ INFO ] Reading the model: /models/alexnet/alexnet.xml
+   [ INFO ] Loading the model to the plugin
+   [ INFO ] Starting inference in synchronous mode
+   [ INFO ] Image path: /images/banana.jpg
+   [ INFO ] Top 10 results:     
+   [ INFO ] class_id probability
+   [ INFO ] --------------------
+   [ INFO ] 954      0.9703885
+   [ INFO ] 666      0.0219518
+   [ INFO ] 659      0.0033120
+   [ INFO ] 435      0.0008246
+   [ INFO ] 809      0.0004433
+   [ INFO ] 502      0.0003852
+   [ INFO ] 618      0.0002906
+   [ INFO ] 910      0.0002848
+   [ INFO ] 951      0.0002427
+   [ INFO ] 961      0.0002213
+   [ INFO ]
+   [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From bb20151c9d94ccb3875c13bfd177244447f1db08 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 3 Apr 2023 08:24:02 +0200
Subject: [PATCH 209/296] DOCS shift to rst - Hello Query Device C++ Sample &
 Hello Query Device Python* Sample (#16650)

---
 samples/cpp/hello_query_device/README.md    | 164 +++++++++++---------
 samples/python/hello_query_device/README.md | 162 ++++++++++---------
 2 files changed, 177 insertions(+), 149 deletions(-)

diff --git a/samples/cpp/hello_query_device/README.md b/samples/cpp/hello_query_device/README.md
index 202f733542becc..b6ce6fa099bbcf 100644
--- a/samples/cpp/hello_query_device/README.md
+++ b/samples/cpp/hello_query_device/README.md
@@ -1,94 +1,110 @@
 # Hello Query Device C++ Sample {#openvino_inference_engine_samples_hello_query_device_README}
 
-This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using [Properties API](../../../docs/OV_Runtime_UG/supported_plugins/config_properties.md).
+@sphinxdirective
+
+This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using :doc:`Properties API <openvino_docs_OV_UG_query_api>`.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| Available Devices | `ov::Core::get_available_devices`, `ov::Core::get_property` | Get available devices information and configuration for inference |
++----------------------------------------+---------------------------------------+-------------------------------------------------------------------+
+| Feature                                | API                                   | Description                                                       |
++========================================+=======================================+===================================================================+
+| Available Devices                      | ``ov::Core::get_available_devices``,  | Get available devices information and configuration for inference |
+|                                        | ``ov::Core::get_property``            |                                                                   |
++----------------------------------------+---------------------------------------+-------------------------------------------------------------------+
 
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
 
-| Options  | Values |
-| :--- |:---
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../python/hello_query_device/README.md) |
++----------------------------------------+----------------------------------------------------------------------------------------------+
+| Options                                | Values                                                                                       |
++========================================+==============================================================================================+
+| Supported devices                      | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                         |
++----------------------------------------+----------------------------------------------------------------------------------------------+
+| Other language realization             | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README>` |
++----------------------------------------+----------------------------------------------------------------------------------------------+
 
-## How It Works
+How It Works
+############
 
 The sample queries all available OpenVINO™ Runtime devices, prints their supported metrics and plugin configuration parameters.
 
-## Building
+Building
+########
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-## Running
+Running
+#######
 
 To see quired information, run the following:
 
-```
-hello_query_device
-```
+.. code-block:: console
+   
+   hello_query_device
 
-## Sample Output
+Sample Output
+#############
 
 The application prints all available devices with their supported metrics and default values for configuration parameters:
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Available devices:
-[ INFO ] CPU
-[ INFO ]        SUPPORTED_METRICS:
-[ INFO ]                AVAILABLE_DEVICES : [  ]
-[ INFO ]                FULL_DEVICE_NAME : Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
-[ INFO ]                OPTIMIZATION_CAPABILITIES : [ FP32 FP16 INT8 BIN ]
-[ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS : { 1, 1, 1 }
-[ INFO ]                RANGE_FOR_STREAMS : { 1, 8 }
-[ INFO ]                IMPORT_EXPORT_SUPPORT : true
-[ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
-[ INFO ]                CACHE_DIR : ""
-[ INFO ]                CPU_BIND_THREAD : NO
-[ INFO ]                CPU_THREADS_NUM : 0
-[ INFO ]                CPU_THROUGHPUT_STREAMS : 1
-[ INFO ]                DUMP_EXEC_GRAPH_AS_DOT : ""
-[ INFO ]                DYN_BATCH_ENABLED : NO
-[ INFO ]                DYN_BATCH_LIMIT : 0
-[ INFO ]                ENFORCE_BF16 : NO
-[ INFO ]                EXCLUSIVE_ASYNC_REQUESTS : NO
-[ INFO ]                PERFORMANCE_HINT : ""
-[ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS : 0
-[ INFO ]                PERF_COUNT : NO
-[ INFO ]
-[ INFO ] GNA
-[ INFO ]        SUPPORTED_METRICS:
-[ INFO ]                AVAILABLE_DEVICES : [ GNA_SW_EXACT ]
-[ INFO ]                OPTIMAL_NUMBER_OF_INFER_REQUESTS : 1
-[ INFO ]                FULL_DEVICE_NAME : GNA_SW_EXACT
-[ INFO ]                GNA_LIBRARY_FULL_VERSION : 3.0.0.1455
-[ INFO ]                IMPORT_EXPORT_SUPPORT : true
-[ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
-[ INFO ]                EXCLUSIVE_ASYNC_REQUESTS : NO
-[ INFO ]                GNA_COMPACT_MODE : YES
-[ INFO ]                GNA_COMPILE_TARGET : ""
-[ INFO ]                GNA_DEVICE_MODE : GNA_SW_EXACT
-[ INFO ]                GNA_EXEC_TARGET : ""
-[ INFO ]                GNA_FIRMWARE_MODEL_IMAGE : ""
-[ INFO ]                GNA_FIRMWARE_MODEL_IMAGE_GENERATION : ""
-[ INFO ]                GNA_LIB_N_THREADS : 1
-[ INFO ]                GNA_PRECISION : I16
-[ INFO ]                GNA_PWL_MAX_ERROR_PERCENT : 1.000000
-[ INFO ]                GNA_PWL_UNIFORM_DESIGN : NO
-[ INFO ]                GNA_SCALE_FACTOR : 1.000000
-[ INFO ]                GNA_SCALE_FACTOR_0 : 1.000000
-[ INFO ]                LOG_LEVEL : LOG_NONE
-[ INFO ]                PERF_COUNT : NO
-[ INFO ]                SINGLE_THREAD : YES
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
+.. code-block:: console
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Available devices:
+   [ INFO ] CPU
+   [ INFO ]        SUPPORTED_METRICS:
+   [ INFO ]                AVAILABLE_DEVICES : [  ]
+   [ INFO ]                FULL_DEVICE_NAME : Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
+   [ INFO ]                OPTIMIZATION_CAPABILITIES : [ FP32 FP16 INT8 BIN ]
+   [ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS : { 1, 1, 1 }
+   [ INFO ]                RANGE_FOR_STREAMS : { 1, 8 }
+   [ INFO ]                IMPORT_EXPORT_SUPPORT : true
+   [ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
+   [ INFO ]                CACHE_DIR : ""
+   [ INFO ]                CPU_BIND_THREAD : NO
+   [ INFO ]                CPU_THREADS_NUM : 0
+   [ INFO ]                CPU_THROUGHPUT_STREAMS : 1
+   [ INFO ]                DUMP_EXEC_GRAPH_AS_DOT : ""
+   [ INFO ]                DYN_BATCH_ENABLED : NO
+   [ INFO ]                DYN_BATCH_LIMIT : 0
+   [ INFO ]                ENFORCE_BF16 : NO
+   [ INFO ]                EXCLUSIVE_ASYNC_REQUESTS : NO
+   [ INFO ]                PERFORMANCE_HINT : ""
+   [ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS : 0
+   [ INFO ]                PERF_COUNT : NO
+   [ INFO ]
+   [ INFO ] GNA
+   [ INFO ]        SUPPORTED_METRICS:
+   [ INFO ]                AVAILABLE_DEVICES : [ GNA_SW_EXACT ]
+   [ INFO ]                OPTIMAL_NUMBER_OF_INFER_REQUESTS : 1
+   [ INFO ]                FULL_DEVICE_NAME : GNA_SW_EXACT
+   [ INFO ]                GNA_LIBRARY_FULL_VERSION : 3.0.0.1455
+   [ INFO ]                IMPORT_EXPORT_SUPPORT : true
+   [ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
+   [ INFO ]                EXCLUSIVE_ASYNC_REQUESTS : NO
+   [ INFO ]                GNA_COMPACT_MODE : YES
+   [ INFO ]                GNA_COMPILE_TARGET : ""
+   [ INFO ]                GNA_DEVICE_MODE : GNA_SW_EXACT
+   [ INFO ]                GNA_EXEC_TARGET : ""
+   [ INFO ]                GNA_FIRMWARE_MODEL_IMAGE : ""
+   [ INFO ]                GNA_FIRMWARE_MODEL_IMAGE_GENERATION : ""
+   [ INFO ]                GNA_LIB_N_THREADS : 1
+   [ INFO ]                GNA_PRECISION : I16
+   [ INFO ]                GNA_PWL_MAX_ERROR_PERCENT : 1.000000
+   [ INFO ]                GNA_PWL_UNIFORM_DESIGN : NO
+   [ INFO ]                GNA_SCALE_FACTOR : 1.000000
+   [ INFO ]                GNA_SCALE_FACTOR_0 : 1.000000
+   [ INFO ]                LOG_LEVEL : LOG_NONE
+   [ INFO ]                PERF_COUNT : NO
+   [ INFO ]                SINGLE_THREAD : YES
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+
+@endsphinxdirective
+
diff --git a/samples/python/hello_query_device/README.md b/samples/python/hello_query_device/README.md
index 1b93fb6a0b055a..2d97a03875f658 100644
--- a/samples/python/hello_query_device/README.md
+++ b/samples/python/hello_query_device/README.md
@@ -1,93 +1,105 @@
 # Hello Query Device Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README}
 
-This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using [Query Device API feature](../../../docs/OV_Runtime_UG/supported_plugins/config_properties.md).
+@sphinxdirective
+
+This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using :doc:`Query Device API feature <openvino_docs_OV_UG_query_api>`.
 
 The following Python API is used in the application:
 
-| Feature      | API                                                                                                               | Description           |
-| :----------- | :---------------------------------------------------------------------------------------------------------------- | :-------------------- |
-| Basic        | [openvino.runtime.Core]                                                                                           | Common API            |
-| Query Device | [openvino.runtime.Core.available_devices], [openvino.runtime.Core.get_metric], [openvino.runtime.Core.get_config] | Get device properties |
++---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------+
+| Feature                               | API                                                                                                                                                                                        | Description                            |
++=======================================+============================================================================================================================================================================================+========================================+
+| Basic                                 | `openvino.runtime.Core <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__                                                                      | Common API                             |
++---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------+
+| Query Device                          | `openvino.runtime.Core.available_devices <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.available_devices>`__ ,          | Get device properties                  |
+|                                       | `openvino.runtime.Core.get_metric <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_metric>`__ ,  |                                        |
+|                                       | `openvino.runtime.Core.get_config <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config>`__    |                                        |
++---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------+
 
-| Options                    | Values                                                            |
-| :------------------------- | :---------------------------------------------------------------- |
-| Supported devices          | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [C++](../../../samples/cpp/hello_query_device/README.md)          |
++-------------------------------------------------------+--------------------------------------------------------------------------+
+| Options                                               | Values                                                                   |
++=======================================================+==========================================================================+
+| Supported devices                                     | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`     |
++-------------------------------------------------------+--------------------------------------------------------------------------+
+| Other language realization                            | :doc:`C++ <openvino_inference_engine_samples_hello_query_device_README>` |
++-------------------------------------------------------+--------------------------------------------------------------------------+
 
-## How It Works
+How It Works
+############
 
 The sample queries all available OpenVINO™ Runtime devices and prints their supported metrics and plugin configuration parameters.
 
-## Running
+Running
+#######
 
 The sample has no command-line parameters. To see the report, run the following command:
 
-```
-python hello_query_device.py
-```
+.. code-block:: console
+   
+   python hello_query_device.py
 
-## Sample Output
+Sample Output
+#############
 
 The application prints all available devices with their supported metrics and default values for configuration parameters.
 For example:
 
-```
-[ INFO ] Available devices:
-[ INFO ] CPU :
-[ INFO ]        SUPPORTED_METRICS:
-[ INFO ]                AVAILABLE_DEVICES:
-[ INFO ]                FULL_DEVICE_NAME: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
-[ INFO ]                OPTIMIZATION_CAPABILITIES: FP32, FP16, INT8, BIN
-[ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 1, 1
-[ INFO ]                RANGE_FOR_STREAMS: 1, 8
-[ INFO ]                IMPORT_EXPORT_SUPPORT: True
-[ INFO ]
-[ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
-[ INFO ]                CACHE_DIR:
-[ INFO ]                CPU_BIND_THREAD: NO
-[ INFO ]                CPU_THREADS_NUM: 0
-[ INFO ]                CPU_THROUGHPUT_STREAMS: 1
-[ INFO ]                DUMP_EXEC_GRAPH_AS_DOT:
-[ INFO ]                DYN_BATCH_ENABLED: NO
-[ INFO ]                DYN_BATCH_LIMIT: 0
-[ INFO ]                ENFORCE_BF16: NO
-[ INFO ]                EXCLUSIVE_ASYNC_REQUESTS: NO
-[ INFO ]                PERFORMANCE_HINT:
-[ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS: 0
-[ INFO ]                PERF_COUNT: NO
-[ INFO ]
-[ INFO ] GNA :
-[ INFO ]        SUPPORTED_METRICS:
-[ INFO ]                AVAILABLE_DEVICES: GNA_SW
-[ INFO ]                OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
-[ INFO ]                FULL_DEVICE_NAME: GNA_SW
-[ INFO ]                GNA_LIBRARY_FULL_VERSION: 3.0.0.1455
-[ INFO ]                IMPORT_EXPORT_SUPPORT: True
-[ INFO ]
-[ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
-[ INFO ]                EXCLUSIVE_ASYNC_REQUESTS: NO
-[ INFO ]                GNA_COMPACT_MODE: YES
-[ INFO ]                GNA_COMPILE_TARGET:
-[ INFO ]                GNA_DEVICE_MODE: GNA_SW_EXACT
-[ INFO ]                GNA_EXEC_TARGET:
-[ INFO ]                GNA_FIRMWARE_MODEL_IMAGE:
-[ INFO ]                GNA_FIRMWARE_MODEL_IMAGE_GENERATION:
-[ INFO ]                GNA_LIB_N_THREADS: 1
-[ INFO ]                GNA_PRECISION: I16
-[ INFO ]                GNA_PWL_MAX_ERROR_PERCENT: 1.000000
-[ INFO ]                GNA_PWL_UNIFORM_DESIGN: NO
-[ INFO ]                GNA_SCALE_FACTOR: 1.000000
-[ INFO ]                GNA_SCALE_FACTOR_0: 1.000000
-[ INFO ]                LOG_LEVEL: LOG_NONE
-[ INFO ]                PERF_COUNT: NO
-[ INFO ]                SINGLE_THREAD: YES
-```
-
-## See Also
-
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-
-[openvino.runtime.Core]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html
-[openvino.runtime.Core.available_devices]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.available_devices
-[openvino.runtime.Core.get_metric]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_metric
-[openvino.runtime.Core.get_config]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config
+.. code-block:: console
+   
+   [ INFO ] Available devices:
+   [ INFO ] CPU :
+   [ INFO ]        SUPPORTED_METRICS:
+   [ INFO ]                AVAILABLE_DEVICES:
+   [ INFO ]                FULL_DEVICE_NAME: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
+   [ INFO ]                OPTIMIZATION_CAPABILITIES: FP32, FP16, INT8, BIN
+   [ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 1, 1
+   [ INFO ]                RANGE_FOR_STREAMS: 1, 8
+   [ INFO ]                IMPORT_EXPORT_SUPPORT: True
+   [ INFO ]
+   [ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
+   [ INFO ]                CACHE_DIR:
+   [ INFO ]                CPU_BIND_THREAD: NO
+   [ INFO ]                CPU_THREADS_NUM: 0
+   [ INFO ]                CPU_THROUGHPUT_STREAMS: 1
+   [ INFO ]                DUMP_EXEC_GRAPH_AS_DOT:
+   [ INFO ]                DYN_BATCH_ENABLED: NO
+   [ INFO ]                DYN_BATCH_LIMIT: 0
+   [ INFO ]                ENFORCE_BF16: NO
+   [ INFO ]                EXCLUSIVE_ASYNC_REQUESTS: NO
+   [ INFO ]                PERFORMANCE_HINT:
+   [ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS: 0
+   [ INFO ]                PERF_COUNT: NO
+   [ INFO ]
+   [ INFO ] GNA :
+   [ INFO ]        SUPPORTED_METRICS:
+   [ INFO ]                AVAILABLE_DEVICES: GNA_SW
+   [ INFO ]                OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+   [ INFO ]                FULL_DEVICE_NAME: GNA_SW
+   [ INFO ]                GNA_LIBRARY_FULL_VERSION: 3.0.0.1455
+   [ INFO ]                IMPORT_EXPORT_SUPPORT: True
+   [ INFO ]
+   [ INFO ]        SUPPORTED_CONFIG_KEYS (default values):
+   [ INFO ]                EXCLUSIVE_ASYNC_REQUESTS: NO
+   [ INFO ]                GNA_COMPACT_MODE: YES
+   [ INFO ]                GNA_COMPILE_TARGET:
+   [ INFO ]                GNA_DEVICE_MODE: GNA_SW_EXACT
+   [ INFO ]                GNA_EXEC_TARGET:
+   [ INFO ]                GNA_FIRMWARE_MODEL_IMAGE:
+   [ INFO ]                GNA_FIRMWARE_MODEL_IMAGE_GENERATION:
+   [ INFO ]                GNA_LIB_N_THREADS: 1
+   [ INFO ]                GNA_PRECISION: I16
+   [ INFO ]                GNA_PWL_MAX_ERROR_PERCENT: 1.000000
+   [ INFO ]                GNA_PWL_UNIFORM_DESIGN: NO
+   [ INFO ]                GNA_SCALE_FACTOR: 1.000000
+   [ INFO ]                GNA_SCALE_FACTOR_0: 1.000000
+   [ INFO ]                LOG_LEVEL: LOG_NONE
+   [ INFO ]                PERF_COUNT: NO
+   [ INFO ]                SINGLE_THREAD: YES
+
+See Also
+########
+
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+
+@endsphinxdirective
+

From 950b46ecadd318d7ce41a9fd649a2436e0fc0b07 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 3 Apr 2023 08:24:35 +0200
Subject: [PATCH 210/296] DOCS shift to rst - Model Creation C++ Sample & Model
 Creation Python* Sample (#16637)

---
 samples/cpp/model_creation_sample/README.md   | 349 ++++++++++--------
 .../python/model_creation_sample/README.md    | 258 +++++++------
 2 files changed, 329 insertions(+), 278 deletions(-)

diff --git a/samples/cpp/model_creation_sample/README.md b/samples/cpp/model_creation_sample/README.md
index 542d6d82ec0077..f270db56758fe2 100644
--- a/samples/cpp/model_creation_sample/README.md
+++ b/samples/cpp/model_creation_sample/README.md
@@ -1,183 +1,220 @@
 # Model Creation C++ Sample {#openvino_inference_engine_samples_model_creation_sample_README}
 
-This sample demonstrates how to execute an synchronous inference using [model](../../../docs/OV_Runtime_UG/model_representation.md) built on the fly which uses weights from LeNet classification model, which is known to work well on digit classification tasks.
+@sphinxdirective
+
+This sample demonstrates how to execute an synchronous inference using :doc:`model <openvino_docs_OV_UG_Model_Representation>` built on the fly which uses weights from LeNet classification model, which is known to work well on digit classification tasks.
 
 You do not need an XML file to create a model. The API of ov::Model allows creating a model on the fly from the source code.
 
 The following C++ API is used in the application:
 
-| Feature | API | Description |
-| :--- | :--- | :--- |
-| OpenVINO Runtime Info | `ov::Core::get_versions` | Get device plugins versions |
-| Shape Operations | `ov::Output::get_shape`, `ov::Shape::size`, `ov::shape_size`| Operate with shape |
-| Tensor Operations | `ov::Tensor::get_byte_size`, `ov::Tensor:data` | Get tensor byte size and its data |
-| Model Operations | `ov::set_batch` | Operate with model batch size |
-| Infer Request Operations | `ov::InferRequest::get_input_tensor` | Get a input tensor |
-| Model creation objects | `ov::opset8::Parameter`, `ov::Node::output`, `ov::opset8::Constant`, `ov::opset8::Convolution`, `ov::opset8::Add`, `ov::opset1::MaxPool`, `ov::opset8::Reshape`, `ov::opset8::MatMul`, `ov::opset8::Relu`, `ov::opset8::Softmax`, `ov::descriptor::Tensor::set_names`, `ov::opset8::Result`, `ov::Model`, `ov::ParameterVector::vector` | Used to construct an OpenVINO model |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md).
-
-| Options | Values |
-| :--- | :--- |
-| Validated Models | LeNet |
-| Model Format | model weights file (\*.bin) |
-| Validated images | single-channel `MNIST ubyte` images |
-| Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) |
-| Other language realization | [Python](../../../samples/python/model_creation_sample/README.md) |
-
-## How It Works
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Feature                                  | API                                     | Description                           |
++==========================================+=========================================+=======================================+
+| OpenVINO Runtime Info                    | ``ov::Core::get_versions``              | Get device plugins versions           |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Shape Operations                         | ``ov::Output::get_shape``,              | Operate with shape                    |
+|                                          | ``ov::Shape::size``,                    |                                       |
+|                                          | ``ov::shape_size``                      |                                       |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Tensor Operations                        | ``ov::Tensor::get_byte_size``,          | Get tensor byte size and its data     |
+|                                          | ``ov::Tensor:data``                     |                                       |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Model Operations                         | ``ov::set_batch``                       | Operate with model batch size         |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Infer Request Operations                 | ``ov::InferRequest::get_input_tensor``  | Get a input tensor                    |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+| Model creation objects                   | ``ov::opset8::Parameter``,              | Used to construct an OpenVINO model   |
+|                                          | ``ov::Node::output``,                   |                                       |
+|                                          | ``ov::opset8::Constant``,               |                                       |
+|                                          | ``ov::opset8::Convolution``,            |                                       |
+|                                          | ``ov::opset8::Add``,                    |                                       |
+|                                          | ``ov::opset1::MaxPool``,                |                                       |
+|                                          | ``ov::opset8::Reshape``,                |                                       |
+|                                          | ``ov::opset8::MatMul``,                 |                                       |
+|                                          | ``ov::opset8::Relu``,                   |                                       |
+|                                          | ``ov::opset8::Softmax``,                |                                       |
+|                                          | ``ov::descriptor::Tensor::set_names``,  |                                       |
+|                                          | ``ov::opset8::Result``,                 |                                       |
+|                                          | ``ov::Model``,                          |                                       |
+|                                          | ``ov::ParameterVector::vector``         |                                       |
++------------------------------------------+-----------------------------------------+---------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification C++ sample <openvino_inference_engine_samples_hello_classification_README>`.
+
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+| Options                                                 | Values                                                                                          |
++=========================================================+=================================================================================================+
+| Validated Models                                        | LeNet                                                                                           |
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+| Model Format                                            | model weights file (\*.bin)                                                                     |
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+| Validated images                                        | single-channel ``MNIST ubyte`` images                                                           |
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+| Supported devices                                       | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`                            |
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+| Other language realization                              | :doc:`Python <openvino_inference_engine_ie_bridges_python_sample_model_creation_sample_README>` |
++---------------------------------------------------------+-------------------------------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application does the following:
+
 - Reads command line parameters
-- [Build a Model](../../../docs/OV_Runtime_UG/model_representation.md) and passed weights file
+- :doc:`Build a Model <openvino_docs_OV_UG_Model_Representation>` and passed weights file
 - Loads the model and input data to the OpenVINO™ Runtime plugin
 - Performs synchronous inference and processes output data, logging each step in a standard output stream
 
-You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+
+Building
+########
 
-## Building
+To build the sample, please use instructions available at :doc:`Build the Sample Applications <openvino_docs_OV_UG_Samples_Overview>` section in OpenVINO™ Toolkit Samples guide.
 
-To build the sample, please use instructions available at [Build the Sample Applications](../../../docs/OV_Runtime_UG/Samples_Overview.md) section in OpenVINO™ Toolkit Samples guide.
+Running
+#######
 
-## Running
+.. code-block:: console
 
-```
-model_creation_sample <path_to_lenet_weights> <device>
-```
+   model_creation_sample <path_to_lenet_weights> <device>
 
-> **NOTES**:
->
-> - you can use LeNet model weights in the sample folder: `lenet.bin` with FP32 weights file
-> - The `lenet.bin` with FP32 weights file was generated by the [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) tool from the public LeNet model with the `--input_shape [64,1,28,28]` parameter specified.
->
-> The original model is available in the [Caffe* repository](https://github.com/BVLC/caffe/tree/master/examples/mnist) on GitHub\*.
+.. note::
+
+   - you can use LeNet model weights in the sample folder: ``lenet.bin`` with FP32 weights file
+   - The ``lenet.bin`` with FP32 weights file was generated by the :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` tool from the public LeNet model with the ``--input_shape [64,1,28,28]`` parameter specified.
+   
+   The original model is available in the `Caffe* repository <https://github.com/BVLC/caffe/tree/master/examples/mnist>`__ on GitHub\*.
 
 
 You can do inference of an image using a pre-trained model on a GPU using the following command:
 
-```
-model_creation_sample lenet.bin GPU
-```
+.. code-block:: console
+   
+   model_creation_sample lenet.bin GPU
 
-## Sample Output
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream and outputs top-10 inference results.
 
-```
-[ INFO ] OpenVINO Runtime version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ] Device info:
-[ INFO ] GPU
-[ INFO ] Intel GPU plugin version ......... <version>
-[ INFO ] Build ........... <build>
-[ INFO ]
-[ INFO ]
-[ INFO ] Create model from weights: lenet.bin
-[ INFO ] model name: lenet
-[ INFO ]     inputs
-[ INFO ]         input name: NONE
-[ INFO ]         input type: f32
-[ INFO ]         input shape: {64, 1, 28, 28}
-[ INFO ]     outputs
-[ INFO ]         output name: output_tensor
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {64, 10}
-[ INFO ] Batch size is 10
-[ INFO ] model name: lenet
-[ INFO ]     inputs
-[ INFO ]         input name: NONE
-[ INFO ]         input type: u8
-[ INFO ]         input shape: {10, 28, 28, 1}
-[ INFO ]     outputs
-[ INFO ]         output name: output_tensor
-[ INFO ]         output type: f32
-[ INFO ]         output shape: {10, 10}
-[ INFO ] Compiling a model for the GPU device
-[ INFO ] Create infer request
-[ INFO ] Combine images in batch and set to input tensor
-[ INFO ] Start sync inference
-[ INFO ] Processing output tensor
-
-Top 1 results:
-
-Image 0
-
-classid probability label
-------- ----------- -----
-0       1.0000000   0
-
-Image 1
-
-classid probability label
-------- ----------- -----
-1       1.0000000   1
-
-Image 2
-
-classid probability label
-------- ----------- -----
-2       1.0000000   2
-
-Image 3
-
-classid probability label
-------- ----------- -----
-3       1.0000000   3
-
-Image 4
-
-classid probability label
-------- ----------- -----
-4       1.0000000   4
-
-Image 5
-
-classid probability label
-------- ----------- -----
-5       1.0000000   5
-
-Image 6
-
-classid probability label
-------- ----------- -----
-6       1.0000000   6
-
-Image 7
-
-classid probability label
-------- ----------- -----
-7       1.0000000   7
-
-Image 8
-
-classid probability label
-------- ----------- -----
-8       1.0000000   8
-
-Image 9
-
-classid probability label
-------- ----------- -----
-9       1.0000000   9
-
-```
-
-## Deprecation Notice
-
-<table>
-  <tr>
-    <td><strong>Deprecation Begins</strong></td>
-    <td>June 1, 2020</td>
-  </tr>
-  <tr>
-    <td><strong>Removal Date</strong></td>
-    <td>December 1, 2020</td>
-  </tr>
-</table>
-
-## See Also
+.. code-block:: console
+   
+   [ INFO ] OpenVINO Runtime version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ] Device info:
+   [ INFO ] GPU
+   [ INFO ] Intel GPU plugin version ......... <version>
+   [ INFO ] Build ........... <build>
+   [ INFO ]
+   [ INFO ]
+   [ INFO ] Create model from weights: lenet.bin
+   [ INFO ] model name: lenet
+   [ INFO ]     inputs
+   [ INFO ]         input name: NONE
+   [ INFO ]         input type: f32
+   [ INFO ]         input shape: {64, 1, 28, 28}
+   [ INFO ]     outputs
+   [ INFO ]         output name: output_tensor
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {64, 10}
+   [ INFO ] Batch size is 10
+   [ INFO ] model name: lenet
+   [ INFO ]     inputs
+   [ INFO ]         input name: NONE
+   [ INFO ]         input type: u8
+   [ INFO ]         input shape: {10, 28, 28, 1}
+   [ INFO ]     outputs
+   [ INFO ]         output name: output_tensor
+   [ INFO ]         output type: f32
+   [ INFO ]         output shape: {10, 10}
+   [ INFO ] Compiling a model for the GPU device
+   [ INFO ] Create infer request
+   [ INFO ] Combine images in batch and set to input tensor
+   [ INFO ] Start sync inference
+   [ INFO ] Processing output tensor
+   
+   Top 1 results:
+   
+   Image 0
+   
+   classid probability label
+   ------- ----------- -----
+   0       1.0000000   0
+   
+   Image 1
+   
+   classid probability label
+   ------- ----------- -----
+   1       1.0000000   1
+   
+   Image 2
+   
+   classid probability label
+   ------- ----------- -----
+   2       1.0000000   2
+   
+   Image 3
+   
+   classid probability label
+   ------- ----------- -----
+   3       1.0000000   3
+   
+   Image 4
+   
+   classid probability label
+   ------- ----------- -----
+   4       1.0000000   4
+   
+   Image 5
+   
+   classid probability label
+   ------- ----------- -----
+   5       1.0000000   5
+   
+   Image 6
+   
+   classid probability label
+   ------- ----------- -----
+   6       1.0000000   6
+   
+   Image 7
+   
+   classid probability label
+   ------- ----------- -----
+   7       1.0000000   7
+   
+   Image 8
+   
+   classid probability label
+   ------- ----------- -----
+   8       1.0000000   8
+   
+   Image 9
+   
+   classid probability label
+   ------- ----------- -----
+   9       1.0000000   9
+   
+
+
+Deprecation Notice
+##################
+
++--------------------+------------------+
+| Deprecation Begins | June 1, 2020     |
++====================+==================+
+| Removal Date       | December 1, 2020 |
++--------------------+------------------+
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
 
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
diff --git a/samples/python/model_creation_sample/README.md b/samples/python/model_creation_sample/README.md
index f8fef9427231b2..986ffc2587925b 100644
--- a/samples/python/model_creation_sample/README.md
+++ b/samples/python/model_creation_sample/README.md
@@ -1,146 +1,160 @@
 # Model Creation Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_model_creation_sample_README}
 
-This sample demonstrates how to run inference using a [model](../../../docs/OV_Runtime_UG/model_representation.md) built on the fly that uses weights from the LeNet classification model, which is known to work well on digit classification tasks. You do not need an XML file, the model is created from the source code on the fly.
+@sphinxdirective
 
-The following OpenVINO Python API is used in the application:
-
-| Feature          | API                                                                                                                                                                                                                                                                                                               | Description                                             |
-| :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------ |
-| Model Operations | [openvino.runtime.Model], [openvino.runtime.set_batch], [openvino.runtime.Model.input]                                                                                                                                                                                                                            | Managing of model                                       |
-| Opset operations | [openvino.runtime.op.Parameter], [openvino.runtime.op.Constant], [openvino.runtime.opset8.convolution], [openvino.runtime.opset8.add], [openvino.runtime.opset1.max_pool], [openvino.runtime.opset8.reshape], [openvino.runtime.opset8.matmul], [openvino.runtime.opset8.relu], [openvino.runtime.opset8.softmax] | Description of a model topology using OpenVINO Python API |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](../hello_classification/README.md).
+This sample demonstrates how to run inference using a :doc:`model <openvino_docs_OV_UG_Model_Representation>` built on the fly that uses weights from the LeNet classification model, which is known to work well on digit classification tasks. You do not need an XML file, the model is created from the source code on the fly.
 
-| Options                    | Values                                                                |
-| :------------------------- | :-------------------------------------------------------------------- |
-| Validated Models           | LeNet                                                                 |
-| Model Format               | Model weights file (\*.bin)                                         |
-| Supported devices          | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md)     |
-| Other language realization | [C++](../../../samples/cpp/model_creation_sample/README.md) |
+The following OpenVINO Python API is used in the application:
 
-## How It Works
++------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------+
+| Feature                                  | API                                                                                                                                                          | Description                                                                        |
++==========================================+==============================================================================================================================================================+====================================================================================+
+| Model Operations                         | `openvino.runtime.Model <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html>`__ ,                                    | Managing of model                                                                  |
+|                                          | `openvino.runtime.set_batch <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.set_batch.html>`__ ,                            |                                                                                    |
+|                                          | `openvino.runtime.Model.input <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.input>`__   |                                                                                    |
++------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------+
+| Opset operations                         | `openvino.runtime.op.Parameter <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.op.Parameter.html>`__ ,                      | Description of a model topology using OpenVINO Python API                          |
+|                                          | `openvino.runtime.op.Constant <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.op.Constant.html>`__ ,                        |                                                                                    |
+|                                          | `openvino.runtime.opset8.convolution <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.convolution.html>`__ ,          |                                                                                    |
+|                                          | `openvino.runtime.opset8.add <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.add.html>`__ ,                          |                                                                                    |
+|                                          | `openvino.runtime.opset1.max_pool <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset1.max_pool.html>`__ ,                |                                                                                    |
+|                                          | `openvino.runtime.opset8.reshape <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.reshape.html>`__ ,                  |                                                                                    |
+|                                          | `openvino.runtime.opset8.matmul <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.matmul.html>`__ ,                    |                                                                                    |
+|                                          | `openvino.runtime.opset8.relu <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.relu.html>`__ ,                        |                                                                                    |
+|                                          | `openvino.runtime.opset8.softmax <https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.softmax.html>`__                    |                                                                                    |
++------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------+
+
+Basic OpenVINO™ Runtime API is covered by :doc:`Hello Classification Python* Sample <openvino_inference_engine_ie_bridges_python_sample_hello_classification_README>`.
+
++------------------------------------------------+-----------------------------------------------------------------------------+
+| Options                                        | Values                                                                      |
++================================================+=============================================================================+
+| Validated Models                               | LeNet                                                                       |
++------------------------------------------------+-----------------------------------------------------------------------------+
+| Model Format                                   | Model weights file (\*.bin)                                                 |
++------------------------------------------------+-----------------------------------------------------------------------------+
+| Supported devices                              | :doc:`All <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`        |
++------------------------------------------------+-----------------------------------------------------------------------------+
+| Other language realization                     | :doc:`C++ <openvino_inference_engine_samples_model_creation_sample_README>` |
++------------------------------------------------+-----------------------------------------------------------------------------+
+
+How It Works
+############
 
 At startup, the sample application does the following:
+
 - Reads command line parameters
-- [Build a Model](../../../docs/OV_Runtime_UG/model_representation.md) and passed weights file
+- :doc:`Build a Model <openvino_docs_OV_UG_Model_Representation>` and passed weights file
 - Loads the model and input data to the OpenVINO™ Runtime plugin
 - Performs synchronous inference and processes output data, logging each step in a standard output stream
 
 
-You can see the explicit description of
-each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide.
+You can see the explicit description of each sample step at :doc:`Integration Steps <openvino_docs_OV_UG_Integrate_OV_with_your_application>` section of "Integrate OpenVINO™ Runtime with Your Application" guide.
 
-## Running
+Running
+#######
 
 To run the sample, you need to specify model weights and device.
 
-```
-python model_creation_sample.py <path_to_model> <device_name>
-```
+.. code-block:: console
+   
+   python model_creation_sample.py <path_to_model> <device_name>
 
-> **NOTE**:
->
-> - This sample supports models with FP32 weights only.
->
-> - The `lenet.bin` weights file was generated by the [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) tool from the public LeNet model with the `--input_shape [64,1,28,28]` parameter specified.  
->
-> - The original model is available in the [Caffe* repository](https://github.com/BVLC/caffe/tree/master/examples/mnist) on GitHub\*.
+.. note::
+   
+   - This sample supports models with FP32 weights only.
+   
+   - The ``lenet.bin`` weights file was generated by the :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` tool from the public LeNet model with the ``--input_shape [64,1,28,28]`` parameter specified.  
+   
+   - The original model is available in the `Caffe* repository <https://github.com/BVLC/caffe/tree/master/examples/mnist>`__ on GitHub\*.
 
 For example:
 
-```
-python model_creation_sample.py lenet.bin GPU
-```
+.. code-block:: console
+   
+   python model_creation_sample.py lenet.bin GPU
 
-## Sample Output
+Sample Output
+#############
 
 The sample application logs each step in a standard output stream and outputs 10 inference results.
 
-```
-[ INFO ] Creating OpenVINO Runtime Core
-[ INFO ] Loading the model using ngraph function with weights from lenet.bin
-[ INFO ] Loading the model to the plugin
-[ INFO ] Starting inference in synchronous mode
-[ INFO ] Top 1 results: 
-[ INFO ] Image 0
-[ INFO ]        
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 0       1.0000000   0
-[ INFO ]
-[ INFO ] Image 1
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 1       1.0000000   1
-[ INFO ]
-[ INFO ] Image 2
-[ INFO ] 
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 2       1.0000000   2
-[ INFO ]
-[ INFO ] Image 3
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 3       1.0000000   3
-[ INFO ]
-[ INFO ] Image 4
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 4       1.0000000   4
-[ INFO ]
-[ INFO ] Image 5
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 5       1.0000000   5
-[ INFO ]
-[ INFO ] Image 6
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 6       1.0000000   6
-[ INFO ]
-[ INFO ] Image 7
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 7       1.0000000   7
-[ INFO ]
-[ INFO ] Image 8
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 8       1.0000000   8
-[ INFO ]
-[ INFO ] Image 9
-[ INFO ]
-[ INFO ] classid probability label
-[ INFO ] -------------------------
-[ INFO ] 9       1.0000000   9
-[ INFO ]
-[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
-```
-
-## See Also
-
-- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md)
-- [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md)
-- [Model Downloader](@ref omz_tools_downloader)
-- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-
-[openvino.runtime.Model]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html
-[openvino.runtime.set_batch]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.set_batch.html
-[openvino.runtime.Model.input]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.input
-[openvino.runtime.op.Parameter]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.op.Parameter.html
-[openvino.runtime.op.Constant]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.op.Constant.html
-[openvino.runtime.opset8.convolution]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.convolution.html
-[openvino.runtime.opset8.add]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.add.html
-[openvino.runtime.opset1.max_pool]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset1.max_pool.html
-[openvino.runtime.opset8.reshape]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.reshape.html
-[openvino.runtime.opset8.matmul]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.matmul.html
-[openvino.runtime.opset8.relu]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.relu.html
-[openvino.runtime.opset8.softmax]:https://docs.openvino.ai/2022.3/api/ie_python_api/_autosummary/openvino.runtime.opset8.softmax.html
+.. code-block:: console
+   
+   [ INFO ] Creating OpenVINO Runtime Core
+   [ INFO ] Loading the model using ngraph function with weights from lenet.bin
+   [ INFO ] Loading the model to the plugin
+   [ INFO ] Starting inference in synchronous mode
+   [ INFO ] Top 1 results: 
+   [ INFO ] Image 0
+   [ INFO ]        
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 0       1.0000000   0
+   [ INFO ]
+   [ INFO ] Image 1
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 1       1.0000000   1
+   [ INFO ]
+   [ INFO ] Image 2
+   [ INFO ] 
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 2       1.0000000   2
+   [ INFO ]
+   [ INFO ] Image 3
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 3       1.0000000   3
+   [ INFO ]
+   [ INFO ] Image 4
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 4       1.0000000   4
+   [ INFO ]
+   [ INFO ] Image 5
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 5       1.0000000   5
+   [ INFO ]
+   [ INFO ] Image 6
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 6       1.0000000   6
+   [ INFO ]
+   [ INFO ] Image 7
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 7       1.0000000   7
+   [ INFO ]
+   [ INFO ] Image 8
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 8       1.0000000   8
+   [ INFO ]
+   [ INFO ] Image 9
+   [ INFO ]
+   [ INFO ] classid probability label
+   [ INFO ] -------------------------
+   [ INFO ] 9       1.0000000   9
+   [ INFO ]
+   [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+
+See Also
+########
+
+- :doc:`Integrate the OpenVINO™ Runtime with Your Application <openvino_docs_OV_UG_Integrate_OV_with_your_application>`
+- :doc:`Using OpenVINO™ Toolkit Samples <openvino_docs_OV_UG_Samples_Overview>`
+- :doc:`Model Downloader <omz_tools_downloader>`
+- :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`
+
+@endsphinxdirective
+

From cddbb667a57d02bb35ad48fcd332568025318ab4 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Mon, 3 Apr 2023 08:26:21 +0200
Subject: [PATCH 211/296] [DOCS] Automatic Batching Update (#16607)

---
 docs/OV_Runtime_UG/automatic_batching.md | 269 +++++++++++++++--------
 1 file changed, 176 insertions(+), 93 deletions(-)

diff --git a/docs/OV_Runtime_UG/automatic_batching.md b/docs/OV_Runtime_UG/automatic_batching.md
index 836c6daf231d37..618a7644eeb562 100644
--- a/docs/OV_Runtime_UG/automatic_batching.md
+++ b/docs/OV_Runtime_UG/automatic_batching.md
@@ -1,155 +1,238 @@
 # Automatic Batching {#openvino_docs_OV_UG_Automatic_Batching}
 
-The Automatic Batching Execution mode (or Auto-batching for short) performs automatic batching on-the-fly to improve device utilization by grouping inference requests together, with no programming effort from the user.
-With Automatic Batching, gathering the input and scattering the output from the individual inference requests required for the batch happen transparently, without affecting the application code. 
+@sphinxdirective
 
-This article provides a preview of the new Automatic Batching function, including how it works, its configurations, and testing performance.
+The Automatic Batching Execution mode (or Auto-batching for short) performs automatic batching on-the-fly to improve device utilization by grouping inference requests together, without programming effort from the user.
+With Automatic Batching, gathering the input and scattering the output from the individual inference requests required for the batch happen transparently, without affecting the application code. 
 
-## Enabling/Disabling Automatic Batching
+Auto Batching can be used :ref:`directly as a virtual device <auto-batching-as-device>` or as an :ref:`option for inference on CPU/GPU/VPU <auto-batching-as-option>` (by means of configuration/hint). These 2 ways are provided for the user to enable the BATCH devices **explicitly** or **implicitly**, with the underlying logic remaining the same. An example of the difference is that the CPU device doesn’t support implicitly to enable BATCH device, commands such as ``./benchmark_app -m <model> -d CPU -hint tput`` will not apply BATCH device **implicitly**, but ``./benchmark_app -m <model> -d "BATCH:CPU(16)`` can **explicitly** load BATCH device.
 
-Auto-batching primarily targets the existing code written for inferencing many requests, each instance with the batch size 1. To obtain corresponding performance improvements, the application **must be running many inference requests simultaneously**. 
+Auto-batching primarily targets the existing code written for inferencing many requests, each instance with the batch size 1. To get corresponding performance improvements, the application **must be running multiple inference requests simultaneously**. 
 Auto-batching can also be used via a particular *virtual* device.       
 
-Batching is a straightforward way of leveraging the compute power of GPU and saving on communication overheads. Automatic Batching is "implicitly" triggered on the GPU when `ov::hint::PerformanceMode::THROUGHPUT` is specified for the `ov::hint::performance_mode` property for the `compile_model` or `set_property` calls.
+This article provides a preview of the Automatic Batching function, including how it works, its configurations, and testing performance.
+
+How Automatic Batching Works
+############################
+
+.. tab-set::
+   
+   .. tab-item:: Enabling Automatic Batching
+      :sync: enabling-automatic-batching
+         
+      Batching is a straightforward way of leveraging the compute power of GPU and saving on communication overheads. Automatic Batching is "implicitly" triggered on the GPU when ``ov::hint::PerformanceMode::THROUGHPUT`` is specified for the ``ov::hint::performance_mode`` property for the ``compile_model`` or ``set_property`` calls.
+
+      .. tab-set::
+   
+         .. tab-item:: C++
+            :sync: cpp
+               
+            .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp
+               :language: cpp
+               :fragment: [compile_model]
+      
+         .. tab-item:: Python
+            :sync: py
+      
+            .. doxygensnippet:: docs/snippets/ov_auto_batching.py
+               :language: Python
+               :fragment: [compile_model]
+         
+      To enable Auto-batching in the legacy apps not akin to the notion of performance hints, you need to use the **explicit** device notion, such as ``BATCH:GPU``.
+
+   .. tab-item:: Disabling Automatic Batching
+      :sync: disabling-automatic-batching
+
+      Auto-Batching can be disabled (for example, for the GPU device) to prevent being triggered by ``ov::hint::PerformanceMode::THROUGHPUT``. To do that, set ``ov::hint::allow_auto_batching`` to **false** in addition to the ``ov::hint::performance_mode``, as shown below:
+
+      .. tab-set::
+   
+         .. tab-item:: C++
+            :sync: cpp
+               
+            .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp
+               :language: cpp
+               :fragment: [compile_model_no_auto_batching]
+      
+         .. tab-item:: Python
+            :sync: py
+      
+            .. doxygensnippet:: docs/snippets/ov_auto_batching.py
+               :language: Python
+               :fragment: [compile_model_no_auto_batching]
+
+
+Configuring Automatic Batching
+++++++++++++++++++++++++++++++
 
-@sphinxtabset
+Following the OpenVINO naming convention, the *batching* device is assigned the label of *BATCH*. The configuration options are as follows:
 
-@sphinxtab{C++}
++----------------------------+------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Parameter name             | Parameter description                                                                                | Examples                                                                                                                                                                                                                                         |
++============================+======================================================================================================+==================================================================================================================================================================================================================================================+
+| ``AUTO_BATCH_DEVICE``      | The name of the device to apply Automatic batching,  with the optional batch size value in brackets. | ``BATCH:GPU`` triggers the automatic batch size selection. ``BATCH:GPU(4)`` directly specifies the batch size.                                                                                                                                   |
++----------------------------+------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``ov::auto_batch_timeout`` | The timeout value, in ms. (1000 by default)                                                          | You can reduce the timeout value to avoid performance penalty when the data arrives too unevenly. For example, set it to "100", or the contrary, i.e., make it large enough to accommodate input preparation (e.g. when it is a serial process). |
++----------------------------+------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
-@snippet docs/snippets/ov_auto_batching.cpp compile_model
+Automatic Batch Size Selection
+++++++++++++++++++++++++++++++
 
-@endsphinxtab
+In both the THROUGHPUT hint and the explicit BATCH device cases, the optimal batch size is selected automatically, as the implementation queries the ``ov::optimal_batch_size`` property from the device and passes the model graph as the parameter. The actual value depends on the model and device specifics, for example, the on-device memory for dGPUs.
+The support for Auto-batching is not limited to GPU. However, if a device does not support ``ov::optimal_batch_size`` yet, to work with Auto-batching, an explicit batch size must be specified, e.g., ``BATCH:<device>(16)``.
 
-@sphinxtab{Python}
+This "automatic batch size selection" works on the presumption that the application queries ``ov::optimal_number_of_infer_requests`` to create the requests of the returned number and run them simultaneously:
 
-@snippet docs/snippets/ov_auto_batching.py compile_model
+.. tab-set::
+   
+   .. tab-item:: C++
+      :sync: cpp
+         
+      .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp
+         :language: cpp
+         :fragment: [query_optimal_num_requests]
 
-@endsphinxtab
+   .. tab-item:: Python
+      :sync: py
 
-@endsphinxtabset
+      .. doxygensnippet:: docs/snippets/ov_auto_batching.py
+         :language: Python
+         :fragment: [query_optimal_num_requests]
 
 
-To enable Auto-batching in the legacy apps not akin to the notion of performance hints, you need to use the **explicit** device notion, such as `BATCH:GPU`.
+.. _limiting-batch-size:
 
-### Disabling Automatic Batching
+Optimizing Performance by Limiting Batch Size
+---------------------------------------------
 
-Auto-Batching can be disabled (for example, for the GPU device) to prevent being triggered by `ov::hint::PerformanceMode::THROUGHPUT`. To do that, set `ov::hint::allow_auto_batching` to **false** in addition to the `ov::hint::performance_mode`, as shown below:
+If not enough inputs were collected, the ``timeout`` value makes the transparent execution fall back to the execution of individual requests. This value can be configured via the ``AUTO_BATCH_TIMEOUT`` property.
+The timeout, which adds itself to the execution time of the requests, heavily penalizes the performance. To avoid this, when your parallel slack is bounded, provide OpenVINO with an additional hint.
 
-@sphinxtabset
+For example, when the application processes only 4 video streams, there is no need to use a batch larger than 4. The most future-proof way to communicate the limitations on the parallelism is to equip the performance hint with the optional ``ov::hint::num_requests`` configuration key set to 4. This will limit the batch size for the GPU and the number of inference streams for the CPU, hence each device uses ``ov::hint::num_requests`` while converting the hint to the actual device configuration options:
 
-@sphinxtab{C++}
 
-@snippet docs/snippets/ov_auto_batching.cpp compile_model_no_auto_batching
+.. tab-set::
+   
+   .. tab-item:: C++
+      :sync: cpp
+         
+      .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp
+         :language: cpp
+         :fragment: [hint_num_requests]
 
-@endsphinxtab
+   .. tab-item:: Python
+      :sync: py
 
-@sphinxtab{Python}
+      .. doxygensnippet:: docs/snippets/ov_auto_batching.py
+         :language: Python
+         :fragment: [hint_num_requests]
 
-@snippet docs/snippets/ov_auto_batching.py compile_model_no_auto_batching
 
-@endsphinxtab
+For the *explicit* usage, you can limit the batch size by using ``BATCH:GPU(4)``, where 4 is the number of requests running in parallel.
 
-@endsphinxtabset
 
+.. _auto-batching-as-device:
 
-## Configuring Automatic Batching
-Following the OpenVINO naming convention, the *batching* device is assigned the label of *BATCH*. The configuration options are as follows:
+Automatic Batching as an explicit device
+++++++++++++++++++++++++++++++++++++++++
 
-| Parameter name     | Parameter description      |             Examples                                                      |
-| :---               | :---                  |:-----------------------------------------------------------------------------|
-| `AUTO_BATCH_DEVICE` | The name of the device to apply Automatic batching,  with the optional batch size value in brackets. | `BATCH:GPU` triggers the automatic batch size selection. `BATCH:GPU(4)` directly specifies the batch size.     |
-| `ov::auto_batch_timeout` | The timeout value, in ms. (1000 by default) |  You can reduce the timeout value to avoid performance penalty when the data arrives too unevenly. For example, set it to "100", or the contrary, i.e., make it large enough to accommodate input preparation (e.g. when it is a serial process).     |
+The below examples show how AUTO Batching can be used in the form of device that the user can apply to perform inference directly:
 
-## Automatic Batch Size Selection
+.. code-block:: sh
 
-In both the THROUGHPUT hint and the explicit BATCH device cases, the optimal batch size is selected automatically, as the implementation queries the `ov::optimal_batch_size` property from the device and passes the model graph as the parameter. The actual value depends on the model and device specifics, for example, the on-device memory for dGPUs.
-The support for Auto-batching is not limited to GPU. However, if a device does not support `ov::optimal_batch_size` yet, to work with Auto-batching, an explicit batch size must be specified, e.g., `BATCH:<device>(16)`.
+   ./benchmark_app -m <model> -d "BATCH:GPU"
+   ./benchmark_app -m <model> -d "BATCH:GPU(16)"
+   ./benchmark_app -m <model> -d "BATCH:CPU(16)"
 
-This "automatic batch size selection" works on the presumption that the application queries `ov::optimal_number_of_infer_requests` to create the requests of the returned number and run them simultaneously:
 
-@sphinxtabset
+* ``BATCH`` -- load BATCH device explicitly, 
+* ``:GPU(16)`` -- BATCH devices configuration, which tell BATCH device to apply GPU device with batch size = 16.
 
-@sphinxtab{C++}
+.. _auto-batching-as-option:
 
-@snippet docs/snippets/ov_auto_batching.cpp query_optimal_num_requests
+Automatic Batching as underlying device configured to other devices
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-@endsphinxtab
+In the following example, BATCH device will be configured to another device in case of ``tput/ctput mode``.
 
-@sphinxtab{Python}
+.. code-block:: sh
 
-@snippet docs/snippets/ov_auto_batching.py query_optimal_num_requests
+   ./benchmark_app -m <model> -d GPU -hint tput
+   ./benchmark_app -m <model> -d AUTO -hint tput
+   ./benchmark_app -m <model> -d AUTO -hint ctput  
+   ./benchmark_app -m <model> -d AUTO:GPU -hint ctput
 
-@endsphinxtab
+.. note::
+   If you run ``./benchmark_app``, do not set ``batch_size`` by ``-b <batch_size>``, otherwise AUTO mode will not be applied.
 
-@endsphinxtabset
+Other Performance Considerations
+################################
 
+To achieve the best performance with Automatic Batching, the application should:
 
-### Optimizing Performance by Limiting Batch Size
+- Operate inference requests of the number that represents the multiple of the batch size. In the example from :ref:`Optimizing Performance by Limiting Batch Size section <limiting-batch-size>` -- for batch size 4, the application should operate 4, 8, 12, 16, etc. requests.
+- Use the requests that are grouped by the batch size together. For example, the first 4 requests are inferred, while the second group of the requests is being populated. Essentially, Automatic Batching shifts the asynchronicity from the individual requests to the groups of requests that constitute the batches.
+- Balance the ``timeout`` value vs. the batch size. For example, in many cases, having a smaller ``timeout`` value/batch size may yield better performance than having a larger batch size with a ``timeout`` value that is not large enough to accommodate the full number of the required requests.
+- When Automatic Batching is enabled, the ``timeout`` property of ``ov::CompiledModel`` can be changed anytime, even after the loading/compilation of the model. For example, setting the value to 0 disables Auto-batching effectively, as the collection of requests would be omitted.
+- Carefully apply Auto-batching to the pipelines. For example, in the conventional "video-sources -> detection -> classification" flow, it is most beneficial to do Auto-batching over the inputs to the detection stage. The resulting number of detections is usually fluent, which makes Auto-batching less applicable for the classification stage.
 
-If not enough inputs were collected, the `timeout` value makes the transparent execution fall back to the execution of individual requests. This value can be configured via the `AUTO_BATCH_TIMEOUT` property.
-The timeout, which adds itself to the execution time of the requests, heavily penalizes the performance. To avoid this, when your parallel slack is bounded, provide OpenVINO with an additional hint.
+Limitations
++++++++++++
 
-For example, when the application processes only 4 video streams, there is no need to use a batch larger than 4. The most future-proof way to communicate the limitations on the parallelism is to equip the performance hint with the optional `ov::hint::num_requests` configuration key set to 4. This will limit the batch size for the GPU and the number of inference streams for the CPU, hence each device uses `ov::hint::num_requests` while converting the hint to the actual device configuration options:
+The following are limitations of the current AUTO Batching implementations:
 
-@sphinxtabset
+- The dynamic model is not supported by ``BATCH`` device.
+- ``BATCH`` device can only support ``tput/ctput mode``. The ``latency/none mode`` is not supported.
+- Supported are only models with ``batch dimension = 1``.
+- The input/output tensor should come from ``inferRequest``, otherwise the user-created tensor will trigger a memory copying.
+- The ``OPTIMAL_BATCH_SIZE`` should be greater than ``2``. In case it's not, user needs to specify a batch size which depends on model and device (CPU does not support this property).
+- ``BATCH`` device supports GPU by default, while CPU will not trigger ``auto_batch`` in ``tput`` mode.
+- ``AUTO_BATCH`` will bring much more compilation latency.
+- Although it is less critical for the throughput-oriented scenarios, the load time with Auto-batching increases by almost double.
+- Certain networks are not safely reshapable by the "batching" dimension (specified as ``N`` in the layout terms). Besides, if the batching dimension is not zeroth, Auto-batching will not be triggered "implicitly" by the throughput hint.
+-  The "explicit" notion, for example, ``BATCH:GPU``, using the relaxed dimensions tracking, often makes Auto-batching possible. For example, this method unlocks most **detection networks**.
+- When *forcing* Auto-batching via the "explicit" device notion, make sure that you validate the results for correctness.   
+- Performance improvements happen at the cost of the growth of memory footprint. However, Auto-batching queries the available memory (especially for dGPU) and limits the selected batch size accordingly.
 
-@sphinxtab{C++}
 
-@snippet docs/snippets/ov_auto_batching.cpp hint_num_requests
+Testing Performance with Benchmark_app
+######################################
 
-@endsphinxtab
+The ``benchmark_app`` sample, that has both :doc:`C++ <openvino_inference_engine_samples_benchmark_app_README>` and :doc:`Python <openvino_inference_engine_tools_benchmark_tool_README>` versions, is the best way to evaluate the performance of Automatic Batching:
 
-@sphinxtab{Python}
+- The most straightforward way is using the performance hints:
+  
+  - benchmark_app **-hint tput** -d GPU -m 'path to your favorite model'
+- You can also use the "explicit" device notion to override the strict rules of the implicit reshaping by the batch dimension:
+  
+  - benchmark_app **-hint none -d BATCH:GPU** -m 'path to your favorite model'
+- or override the automatically deduced batch size as well:
+  
+  - $benchmark_app -hint none -d **BATCH:GPU(16)** -m 'path to your favorite model'
+  - This example also applies to CPU or any other device that generally supports batch execution.
+  - Keep in mind that some shell versions (e.g. ``bash``) may require adding quotes around complex device names, i.e. ``-d "BATCH:GPU(16)"`` in this example.
 
-@snippet docs/snippets/ov_auto_batching.py hint_num_requests
 
-@endsphinxtab
+Note that Benchmark_app performs a warm-up run of a *single* request. As Auto-Batching requires significantly more requests to execute in batch, this warm-up run hits the default timeout value (1000 ms), as reported in the following example:
 
-@endsphinxtabset
+.. code-block:: sh
 
+   [ INFO ] First inference took 1000.18ms 
 
-For the *explicit* usage, you can limit the batch size by using `BATCH:GPU(4)`, where 4 is the number of requests running in parallel.
+This value also exposed as the final execution statistics on the ``benchmark_app`` exit:
+   
+.. code-block:: sh
 
-## Other Performance Considerations
+   [ INFO ] Latency: 
+   [ INFO ]  Max:      1000.18 ms
 
-To achieve the best performance with Automatic Batching, the application should:
- - Operate inference requests of the number that represents the multiple of the batch size. In the example above, for batch size 4, the application should operate 4, 8, 12, 16, etc. requests.
- - Use the requests that are grouped by the batch size together. For example, the first 4 requests are inferred, while the second group of the requests is being populated. Essentially, Automatic Batching shifts the asynchronicity from the individual requests to the groups of requests that constitute the batches.
-  - Balance the `timeout` value vs. the batch size. For example, in many cases, having a smaller `timeout` value/batch size may yield better performance than having a larger batch size with a `timeout` value that is not large enough to accommodate the full number of the required requests.
-  - When Automatic Batching is enabled, the `timeout` property of `ov::CompiledModel` can be changed anytime, even after the loading/compilation of the model. For example, setting the value to 0 disables Auto-batching effectively, as the collection of requests would be omitted.
-  - Carefully apply Auto-batching to the pipelines. For example, in the conventional "video-sources -> detection -> classification" flow, it is most beneficial to do Auto-batching over the inputs to the detection stage. The resulting number of detections is usually fluent, which makes Auto-batching less applicable for the classification stage.
+This is NOT the actual latency of the batched execution, so you are recommended to refer to other metrics in the same log, for example, "Median" or "Average" execution. 
 
-The following are limitations of the current implementations:
-- Although it is less critical for the throughput-oriented scenarios, the load time with Auto-batching increases by almost double.
- - Certain networks are not safely reshapable by the "batching" dimension (specified as `N` in the layout terms). Besides, if the batching dimension is not zeroth, Auto-batching will not be triggered "implicitly" by the throughput hint.
- -  The "explicit" notion, for example, `BATCH:GPU`, using the relaxed dimensions tracking, often makes Auto-batching possible. For example, this method unlocks most **detection networks**.
- - When *forcing* Auto-batching via the "explicit" device notion, make sure that you validate the results for correctness.   
- - Performance improvements happen at the cost of the growth of memory footprint. However, Auto-batching queries the available memory (especially for dGPU) and limits the selected batch size accordingly.
-
-
-## Testing Performance with Benchmark_app
-The `benchmark_app` sample, that has both [C++](../../samples/cpp/benchmark_app/README.md) and [Python](../../tools/benchmark_tool/README.md) versions, is the best way to evaluate the performance of Automatic Batching:
- -  The most straightforward way is using the performance hints:
-    - benchmark_app **-hint tput** -d GPU -m 'path to your favorite model'
- -  You can also use the "explicit" device notion to override the strict rules of the implicit reshaping by the batch dimension:
-    - benchmark_app **-hint none -d BATCH:GPU** -m 'path to your favorite model'
- -  or override the automatically deduced batch size as well:
-    - $benchmark_app -hint none -d **BATCH:GPU(16)** -m 'path to your favorite model'
-    - This example also applies to CPU or any other device that generally supports batch execution.
-    - Keep in mind that some shell versions (e.g. `bash`) may require adding quotes around complex device names, i.e. `-d "BATCH:GPU(16)"` in this example.
+Additional Resources
+####################
 
-Note that Benchmark_app performs a warm-up run of a *single* request. As Auto-Batching requires significantly more requests to execute in batch, this warm-up run hits the default timeout value (1000 ms), as reported in the following example:
+* :doc:`Supported Devices <openvino_docs_OV_UG_supported_plugins_Supported_Devices>`
 
-  ```
-  [ INFO ] First inference took 1000.18ms 
-  ```
-This value also exposed as the final execution statistics on the `benchmark_app` exit: 
-  ```
-  [ INFO ] Latency: 
-  [ INFO ] 	Max:      1000.18 ms
-  ```
-This is NOT the actual latency of the batched execution, so you are recommended to refer to other metrics in the same log, for example, "Median" or "Average" execution. 
 
-### Additional Resources
+@endsphinxdirective
 
-* [Supported Devices](supported_plugins/Supported_Devices.md)

From 848c9e3b767d8fd505fc089e997ce5f84b43061d Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 08:27:02 +0200
Subject: [PATCH 212/296] DOCS shift to rst (#16616)

---
 tools/pot/docs/FrequentlyAskedQuestions.md | 157 +++++++++++++--------
 1 file changed, 100 insertions(+), 57 deletions(-)

diff --git a/tools/pot/docs/FrequentlyAskedQuestions.md b/tools/pot/docs/FrequentlyAskedQuestions.md
index cb2cfbb0a22ed0..93b66083086613 100644
--- a/tools/pot/docs/FrequentlyAskedQuestions.md
+++ b/tools/pot/docs/FrequentlyAskedQuestions.md
@@ -1,103 +1,146 @@
 # Post-training Optimization Tool FAQ {#pot_docs_FrequentlyAskedQuestions}
 
-If your question is not covered below, use the [OpenVINO™ Community Forum page](https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit),
-where you can participate freely.
+@sphinxdirective
 
-- <a href="#opensourced">Is the Post-training Optimization Tool opensourced?</a>
-- <a href="#dataset">Can I quantize my model without a dataset?</a>
-- <a href="#framework">Can a model in any framework be quantized by the POT?</a>
-- <a href="#tradeoff">What is a tradeoff when you go to low precision?</a>
-- <a href="#noac">I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do?</a>
-- <a href="#nncf">I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization.
-What else can I do?</a>
-- <a href="#memory">I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT.</a>
-- <a href="#quality">I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause?</a>
-- <a href="#longtime">The quantization process of my model takes a lot of time. Can it be decreased somehow?</a>
-- <a href="#import">I get "Import Error:... No such file or directory". How can I avoid it?</a>
-- <a href="#python">When I execute POT CLI, I get "File "/workspace/venv/lib/python3.7/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong?</a>
-- <a href="#nomodule">What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean?</a>
-- <a href="#dump">Is there a way to collect an intermidiate IR when the AccuracyAware mechanism fails?</a>
-- <a href="#outputs">What do the messages "Output name: result_operation_name not found" or "Output node with result_operation_name is not found in graph" mean?</a>
+If your question is not covered below, use the `OpenVINO™ Community Forum page <https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit>`__, where you can participate freely.
 
+- :ref:`Is the Post-training Optimization Tool opensourced? <opensourced-pot-faq>`
+- :ref:`Can I quantize my model without a dataset? <dataset-pot-faq>`
+- :ref:`Can a model in any framework be quantized by the POT? <framework-pot-faq>`
+- :ref:`What is a tradeoff when you go to low precision? <tradeoff-pot-faq>`
+- :ref:`I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do? <noac-pot-faq>`
+- :ref:`I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization. What else can I do? <nncf-pot-faq>`
+- :ref:`I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT. <memory-pot-faq>`
+- :ref:`I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause? <quality-pot-faq>`
+- :ref:`The quantization process of my model takes a lot of time. Can it be decreased somehow? <longtime-pot-faq>`
+- :ref:`When I execute POT CLI, I get "File "/workspace/venv/lib/python3.7/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong? <python-pot-faq>`
+- :ref:`What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean? <nomodule-pot-faq>`
+- :ref:`Is there a way to collect an intermediate IR when the AccuracyAware mechanism fails? <dump-pot-faq>`
+- :ref:`What do the messages "Output name: result_operation_name not found" or "Output node with result_operation_name is not found in graph" mean? <outputs-pot-faq>`
 
-### <a name="opensourced">Is the Post-training Optimization Tool (POT) opensourced?</a>
+.. _opensourced-pot-faq:
 
-Yes, POT is developed on GitHub as a part of [openvinotoolkit/openvino](https://github.com/openvinotoolkit/openvino) under Apache-2.0 License.
+Is the Post-training Optimization Tool (POT) opensourced?
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### <a name="dataset">Can I quantize my model without a dataset?</a>
+Yes, POT is developed on GitHub as a part of `openvinotoolkit/openvino <https://github.com/openvinotoolkit/openvino>` under Apache-2.0 License.
+
+.. _dataset-pot-faq:
+
+Can I quantize my model without a dataset?
+++++++++++++++++++++++++++++++++++++++++++
 
 In general, you should have a dataset. The dataset should be annotated if you want to validate the accuracy.
-If your dataset is not annotated, you can use [Default Quantization](@ref pot_default_quantization_usage) to quantize the model or command-line interface with [Simplified mode](@ref pot_docs_simplified_mode).
+If your dataset is not annotated, you can use :doc:`Default Quantization <pot_default_quantization_usage>` to quantize the model or command-line interface with :doc:`Simplified mode <pot_docs_simplified_mode>`.
+
+.. _framework-pot-faq:
 
-### <a name="framework">Can a model in any framework be quantized by the POT?</a>
+Can a model in any framework be quantized by the POT?
++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 The POT accepts models in the OpenVINO&trade; Intermediate Representation (IR) format only. For that you need to convert your model to the IR format using
-[Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide).
+:doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+
+.. _noac-pot-faq:
+
+I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do?
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-### <a name="noac">I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do?</a>
+1. Try quantization using Python API of the Post-training Optimization Tool. For more details see :doc:`Default Quantization <pot_default_quantization_usage>`.
+2. If you consider command-line usage only refer to :doc:`Accuracy Checker documentation <omz_tools_accuracy_checker>` to create the Accuracy Checker configuration file, and try to find the configuration file for your model among the ones available in the Accuracy Checker examples. 
+3. An alternative way is to quantize the model in the :doc:`Simplified mode <pot_docs_simplified_mode>` but you will not be able to measure the accuracy.
 
-1. Try quantization using Python API of the Post-training Optimization Tool. For more details see [Default Quantization](@ref pot_default_quantization_usage).
-2. If you consider command-line usage only refer to [Accuracy Checker documentation](@ref omz_tools_accuracy_checker) to create the Accuracy Checker configuration file, and try to find the configuration file for your model among the ones available in the Accuracy Checker examples. 
-3. An alternative way is to quantize the model in the [Simplified mode](@ref pot_docs_simplified_mode) but you will not be able to measure the accuracy.
+.. _tradeoff-pot-faq:
 
-### <a name="tradeoff">What is a tradeoff when you go to low precision?</a>
+What is a tradeoff when you go to low precision?
+++++++++++++++++++++++++++++++++++++++++++++++++
 
 The tradeoff is between the accuracy drop and performance. When a model is in low precision, it is usually performed
 compared to the same model in full precision but the accuracy might be worse. You can find some benchmarking results in
-[INT8 vs FP32 Comparison on Select Networks and Platforms](@ref openvino_docs_performance_int8_vs_fp32).
+:doc:`INT8 vs FP32 Comparison on Select Networks and Platforms <openvino_docs_performance_int8_vs_fp32>`.
 The other benefit of having a model in low precision is its smaller size.
 
-### <a name="nncf">I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization. What else can I do?</a>
+.. _nncf-pot-faq:
+
+I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization. What else can I do?
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 First of all, you should validate the POT compression pipeline you are running, which can be done with the following steps:
+
 1.	Make sure the accuracy of the original uncompressed model has the value you expect. Run your POT pipeline with an empty compression config and evaluate the resulting model metric. Compare this uncompressed model accuracy metric value with your reference.
-2.	Run your compression pipeline with a single compression algorithm ([Default Quantization](@ref pot_default_quantization_usage) or [Accuracy-aware Quantization](@ref pot_accuracyaware_usage)) without any parameter values specified in the config (except for `preset` and `stat_subset_size`). Make sure you get the desirable accuracy drop/performance gain in this case.
+2.	Run your compression pipeline with a single compression algorithm (:doc:`Default Quantization <pot_default_quantization_usage>` or :doc:`Accuracy-aware Quantization <pot_accuracyaware_usage>`) without any parameter values specified in the config (except for ``preset`` and ``stat_subset_size``). Make sure you get the desirable accuracy drop/performance gain in this case.
 
-Finally, if you have done the steps above and the problem persists, you could try to compress your model using the [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf_pytorch).
-Note that NNCF usage requires you to have a PyTorch or TensorFlow 2 based training pipeline of your model to perform Quantization-aware Training. See [Model Optimization Guide](@ref openvino_docs_model_optimization_guide) for more details.
+Finally, if you have done the steps above and the problem persists, you could try to compress your model using the `Neural Network Compression Framework (NNCF) <https://github.com/openvinotoolkit/nncf_pytorch>`__.
+Note that NNCF usage requires you to have a PyTorch or TensorFlow 2 based training pipeline of your model to perform Quantization-aware Training. See :doc:`Model Optimization Guide <openvino_docs_model_optimization_guide>` for more details.
 
-### <a name="memory">I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT.</a>
+.. _memory-pot-faq:
+
+I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 These issues happen due to insufficient available amount of memory for statistics collection during the quantization process of a huge model or
 due to a very high resolution of input images in the quantization dataset. If you do not have a possibility to increase your RAM size, one of the following options can help:
-- Set `inplace_statistics` parameters to "True". In that case the POT will change method collect statistics and use less memory. Note that such change might increase time required for quantization.
-- Set `eval_requests_number` and `stat_requests_number` parameters to 1. In that case the POT will limit the number of infer requests by 1 and use less memory.
+
+- Set ``inplace_statistics`` parameters to ``True``. In that case the POT will change method collect statistics and use less memory. Note that such change might increase time required for quantization.
+- Set ``eval_requests_number`` and ``stat_requests_number`` parameters to 1. In that case the POT will limit the number of infer requests by 1 and use less memory.
 Note that such change might increase time required for quantization.
-- Set `use_fast_bias` parameter to `false`. In that case the POT will switch from the FastBiasCorrection algorithm to the full BiasCorrection algorithm
-which is usually more accurate and takes more time but requires less memory. See [Post-Training Optimization Best Practices](BestPractices.md) for more details.
+- Set ``use_fast_bias`` parameter to ``false``. In that case the POT will switch from the FastBiasCorrection algorithm to the full BiasCorrection algorithm
+which is usually more accurate and takes more time but requires less memory. See :doc:`Post-Training Optimization Best Practices <pot_docs_BestPractices>` for more details.
 - Reshape your model to a lower resolution and resize the size of images in the dataset. Note that such change might impact the accuracy.
 
-### <a name="quality">I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause?</a>
+.. _quality-pot-faq:
+
+I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause?
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 It can happen due to the following reasons:
+
 - A wrong or not representative dataset was used during the quantization and accuracy validation. Please make sure that your data and labels are correct and they sufficiently reflect the use case.
-- If the command-line interface was used for quantization, a wrong Accuracy Checker configuration file could lead to this problem. Refer to [Accuracy Checker documentation](@ref omz_tools_accuracy_checker) for more information.
-- If [Default Quantization](@ref pot_default_quantization_usage) was used for quantization you can also try [Accuracy-aware Quantization](@ref pot_accuracyaware_usage) method that allows controlling maximum accuracy deviation.
+- If the command-line interface was used for quantization, a wrong Accuracy Checker configuration file could lead to this problem. Refer to :doc:`Accuracy Checker documentation <omz_tools_accuracy_checker>` for more information.
+- If :doc:`Default Quantization <pot_default_quantization_usage>` was used for quantization you can also try :doc:`Accuracy-aware Quantization <pot_accuracyaware_usage>` method that allows controlling maximum accuracy deviation.
+
+.. _longtime-pot-faq:
 
-### <a name="longtime">The quantization process of my model takes a lot of time. Can it be decreased somehow?</a>
+The quantization process of my model takes a lot of time. Can it be decreased somehow?
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 Quantization time depends on multiple factors such as the size of the model and the dataset. It also depends on the algorithm:
-the [Default Quantization](@ref pot_default_quantization_usage) algorithm takes less time than the [Accuracy-aware Quantization](@ref pot_accuracyaware_usage) algorithm.
+the :doc:`Default Quantization <pot_default_quantization_usage>` algorithm takes less time than the :doc:`Accuracy-aware Quantization <pot_accuracyaware_usage>` algorithm.
 The following configuration parameters also impact the quantization time duration
-(see details in [Post-Training Optimization Best Practices](BestPractices.md)):
-- `use_fast_bias`: when set to `false`, it increases the quantization time
-- `stat_subset_size`: the higher the value of this parameter, the more time will be required for the quantization
-- `tune_hyperparams`: if set to `true` when the AccuracyAwareQuantization algorithm is used, it increases the quantization time
-- `stat_requests_number`: the lower number, the more time might be required for the quantization
-- `eval_requests_number`: the lower number, the more time might be required for the quantization
-Note that higher values of `stat_requests_number` and `eval_requests_number` increase memory consumption by POT.
+(see details in :doc:`Post-Training Optimization Best Practices <pot_docs_BestPractices>`):
+- ``use_fast_bias``: when set to ``false``, it increases the quantization time
+- ``stat_subset_size``: the higher the value of this parameter, the more time will be required for the quantization
+- ``tune_hyperparams``: if set to ``true`` when the AccuracyAwareQuantization algorithm is used, it increases the quantization time
+- ``stat_requests_number``: the lower number, the more time might be required for the quantization
+- ``eval_requests_number``: the lower number, the more time might be required for the quantization
+Note that higher values of ``stat_requests_number`` and ``eval_requests_number`` increase memory consumption by POT.
 
-### <a name="python">When I execute POT CLI, I get "File "/workspace/venv/lib/python3.7/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong?</a>
+.. _python-pot-faq:
+
+When I execute POT CLI, I get "File "/workspace/venv/lib/python3.7/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong?
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 This error is reported when you have a Python version older than 3.7 in your environment. Upgrade your Python version.
 
-### <a name="nomodule">What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean?</a>
+.. _nomodule-pot-faq:
+
+What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean?
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+It means that some required python module is not installed in your environment. To install it, run ``pip install some_module_name``.
+
+.. _dump-pot-faq:
+
+Is there a way to collect an intermediate IR when the AccuracyAware mechanism fails?
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+You can add ``"dump_intermediate_model": true`` to the POT configuration file and it will drop an intermediate IR to ``accuracy_aware_intermediate`` folder.
 
-It means that some required python module is not installed in your environment. To install it, run `pip install some_module_name`.
+.. _outputs-pot-faq:
 
-### <a name="dump">Is there a way to collect an intermidiate IR when the AccuracyAware mechanism fails?</a>
+What do the messages "Output name: result_operation_name not found" or "Output node with result_operation_name is not found in graph" mean?
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
-You can add `"dump_intermediate_model": true` to the POT configuration file and it will drop an intermidiate IR to `accuracy_aware_intermediate` folder. 
+Errors are caused by missing output nodes names in a graph when using the POT tool for model quantization. It might appear for some models only for IRs converted from ONNX models using new frontend (which is the default conversion path starting from 2022.1 release). To avoid such errors, use legacy MO frontend to convert a model to IR by passing the ``--use_legacy_frontend`` option. Then, use the produced IR for quantization.
 
-### <a name="outputs">What do the messages "Output name: result_operation_name not found" or "Output node with result_operation_name is not found in graph" mean?</a>
-Errors are caused by missing output nodes names in a graph when using the POT tool for model quantization. It might appear for some models only for IRs converted from ONNX models using new frontend (which is the default conversion path starting from 2022.1 release). To avoid such errors, use legacy MO frontend to convert a model to IR by passing the --use_legacy_frontend option. Then, use the produced IR for quantization.
+@endsphinxdirective

From 2f5be5e81ce22170189d8cf59ca8a2df45834ebb Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 08:28:24 +0200
Subject: [PATCH 213/296] DOCS shift to rst - Post-Training Optimization
 (#16621)

---
 tools/pot/configs/README.md                |  95 ++--
 tools/pot/docs/CLI.md                      | 152 +++---
 tools/pot/docs/SaturationIssue.md          |  59 ++-
 tools/pot/docs/SimplifiedMode.md           |  84 ++--
 tools/pot/openvino/tools/pot/api/README.md | 546 ++++++++++++---------
 5 files changed, 558 insertions(+), 378 deletions(-)

diff --git a/tools/pot/configs/README.md b/tools/pot/configs/README.md
index 4a075015e0c479..c900db6ea38651 100644
--- a/tools/pot/configs/README.md
+++ b/tools/pot/configs/README.md
@@ -1,58 +1,75 @@
 # Configuration File Description {#pot_configs_README}
+
+@sphinxdirective
+
 The tool is designed to work with the configuration file where all the parameters required for the optimization are specified. These parameters are organized as a dictionary and stored in
-a JSON file. JSON file allows using comments that are supported by the `jstyleson` Python* package.
+a JSON file. JSON file allows using comments that are supported by the ``jstyleson`` Python package.
 Logically all parameters are divided into three groups:
+
 - **Model parameters** that are related to the model definition (e.g. model name, model path, etc.)
 - **Engine parameters** that define parameters of the engine which is responsible for the model inference and data preparation used for optimization and evaluation (e.g. preprocessing parameters, dataset path, etc.)
 - **Compression parameters** that are related to the optimization algorithm (e.g. algorithm name and specific parameters)
 
-## Model Parameters
+Model Parameters
+####################
+
+.. code-block:: json
+
+   "model": {
+           "model_name": "model_name",
+           "model": "<MODEL_PATH>",
+           "weights": "<PATH_TO_WEIGHTS>"
+       }
 
-```json
-"model": {
-        "model_name": "model_name",
-        "model": "<MODEL_PATH>",
-        "weights": "<PATH_TO_WEIGHTS>"
-    }
-```
 
 This section contains only three parameters:
-- `"model_name"` - string parameter that defines a model name, e.g. `"MobileNetV2"`
-- `"model"` - string parameter that defines the path to an input model topology (.xml)
-- `"weights"` - string parameter that defines the path to an input model weights (.bin)
-
-## Engine Parameters
-
-```json
-"engine": {
-        "type": "accuracy_checker",
-        "config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
-    }
-```
-The main parameter is `"type"` which can take two possible options: `"accuracy_checher"` (default) or `"simplified"`. It specifies the engine used for model inference and validation (if supported):
-- **Simplified mode** engines. These engines can be used only with `DefaultQuantization` algorithm to get a fully quantized model. They do not use the Accuracy Checker tool and annotation. In this case, the following parameters are applicable:
-  - `"data_source"` Specifies the path to the directory​ where the calibration data is stored.
-  - `"layout"` - (Optional) Layout of input data. Supported values: [`"NCHW"`, `"NHWC"`, `"CHW"`, `"CWH"`]​.
-- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker) (Accuracy Checker) when inferencing DL models and working with datasets.
+
+- ``"model_name"`` - string parameter that defines a model name, e.g. ``"MobileNetV2"``
+- ``"model"`` - string parameter that defines the path to an input model topology (.xml)
+- ``"weights"`` - string parameter that defines the path to an input model weights (.bin)
+
+Engine Parameters
+####################
+
+.. code-block:: json
+
+   "engine": {
+           "type": "accuracy_checker",
+           "config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
+       }
+
+
+The main parameter is ``"type"`` which can take two possible options: ``"accuracy_checher"`` (default) or ``"simplified"``. It specifies the engine used for model inference and validation (if supported):
+
+- **Simplified mode** engines. These engines can be used only with ``DefaultQuantization`` algorithm to get a fully quantized model. They do not use the Accuracy Checker tool and annotation. In this case, the following parameters are applicable:
+
+  - ``"data_source"`` specifies the path to the directory​ where the calibration data is stored.
+  - ``"layout"`` - (Optional) Layout of input data. Supported values: [``"NCHW"``, ``"NHWC"``, ``"CHW"``, ``"CWH"``]​.
+
+- **Accuracy Checker** engine. It relies on the :doc:`Deep Learning Accuracy Validation Framework <omz_tools_accuracy_checker>` (Accuracy Checker) when inferencing DL models and working with datasets.
+
 If you have annotations, you can benefit from this mode by measuring accuracy.  When this mode is selected, you can use the accuracy-aware algorithms family.
 There are two options to define engine parameters in this mode:
-  - Refer to the existing Accuracy Checker configuration file which is represented by the YAML file. It can be a file used for full-precision model validation. In this case, you should define only the `"config"` parameter containing the path to the AccuracyChecker configuration file.
-  - Define all the [required Accuracy Checker parameters](@ref omz_tools_accuracy_checker_openvino_launcher)
-    directly in the JSON file. In this case, POT just passes the corresponding dictionary of parameters to the Accuracy Checker when instantiating it.
-    For more details, refer to the corresponding Accuracy Checker information and examples of configuration files provided with the tool:
-    - 8-bit quantization of [SSD-MobileNet model](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples/quantization/object_detection/ssd_mobilenetv1_int8.json)
 
-## Compression Parameters
+- Refer to the existing Accuracy Checker configuration file which is represented by the YAML file. It can be a file used for full-precision model validation. In this case, you should define only the ``"config"`` parameter containing the path to the AccuracyChecker configuration file.
+- Define all the :doc:`required Accuracy Checker parameters <omz_tools_accuracy_checker_openvino_launcher>` directly in the JSON file. In this case, POT just passes the corresponding dictionary of parameters to the Accuracy Checker when instantiating it. For more details, refer to the corresponding Accuracy Checker information and examples of configuration files provided with the tool: 8-bit quantization of `SSD-MobileNet model <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples/quantization/object_detection/ssd_mobilenetv1_int8.json>`__
+
+Compression Parameters
+######################
+
+For more details about parameters of the concrete optimization algorithm, see descriptions of :doc:`Default Quantization <pot_compression_algorithms_quantization_default_README>` and :doc:`Accuracy-aware Quantizatoin <accuracy_aware_README>` methods.
 
-For more details about parameters of the concrete optimization algorithm, see descriptions of [Default Quantization](@ref pot_compression_algorithms_quantization_default_README) and [Accuracy-aware Quantizatoin](@ref accuracy_aware_README) methods. 
+Examples of the Configuration File
+##################################
 
-## Examples of the Configuration File
+For a quick start, many examples of configuration files are provided `here <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples>`__. 
+There, you can find ready-to-use configurations for the models from various domains: Computer Vision (Image Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We put configuration files for the models which require non-default configuration settings to get accurate results.
 
+For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the :doc:`example <pot_configs_examples_README>`.
 
-For a quick start, many examples of configuration files are provided [here](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples). There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image 
- Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We put configuration files for the models which require non-default configuration settings to get accurate results.
+Additional Resources
+####################
 
-For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the [example](@ref pot_configs_examples_README).
+* :doc:`Optimization with Simplified mode <pot_docs_simplified_mode>`
 
-## See Also
-* [Optimization with Simplified mode](@ref pot_docs_simplified_mode)
+@endsphinxdirective
diff --git a/tools/pot/docs/CLI.md b/tools/pot/docs/CLI.md
index fe9b3ef5c04f91..f7d5c06b975c84 100644
--- a/tools/pot/docs/CLI.md
+++ b/tools/pot/docs/CLI.md
@@ -5,71 +5,109 @@
 .. toctree::
    :maxdepth: 1
    :hidden:
-   
+
    Simplified Mode <pot_docs_simplified_mode>
    pot_configs_README
 
-@endsphinxdirective
 
-## Introduction
 
-POT command-line interface (CLI) is aimed at optimizing models that are similar to the models from OpenVINO&trade; [Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or if there is a valid [AccuracyChecker Tool](@ref omz_tools_accuracy_checker) configuration file for the model. Examples of AccuracyChecker configuration files can be found on [GitHub](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public). Each model folder contains YAML configuration file that can be used with POT as is.
+Introduction
+####################
+
+POT command-line interface (CLI) is aimed at optimizing models that are similar to the models from OpenVINO `Model Zoo <https://github.com/openvinotoolkit/open_model_zoo>`__ or if there is a valid :doc:`AccuracyChecker Tool <omz_tools_accuracy_checker>` configuration file for the model. Examples of AccuracyChecker configuration files can be found on `GitHub <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public>`__. Each model folder contains YAML configuration file that can be used with POT as is.
+
+.. note::
+
+   There is also the so-called :doc:`Simplified mode <pot_docs_simplified_mode>` aimed at optimization of models from the Computer Vision domain and has a simple dataset preprocessing, like image resize and crop. In this case, you can also use POT CLI for optimization. However, the accuracy results are not guaranteed in this case. Moreover, you are also limited in the optimization methods choice since the accuracy measurement is not available.
 
-> **NOTE**: There is also the so-called [Simplified mode](@ref pot_docs_simplified_mode) aimed at optimizatoin of models from the Computer Vision domain and has a simple dataset preprocessing, like image resize and crop. In this case, you can also use POT CLI for optimization. However, the accuracy results are not guaranteed in this case. Moreover, you are also limited in the optimization methods choice since the accuracy measurement is not available.
- 
 
-## Run POT CLI 
+Run POT CLI
+####################
+
 There are two ways how to run POT via command line:
 
-- **Basic usage for DefaultQuantization**. In this case you can run POT with basic setting just specifying all the options via command line. `-q default` stands for [DefaultQuantization](../openvino/tools/pot/algorithms/quantization/default/README.md) algorithm:
-   ```sh
-   pot -q default -m <path_to_xml> -w <path_to_bin> --ac-config <path_to_AC_config_yml>
-   ```
-- **Basic usage for AccuracyAwareQauntization**. You can also run [AccuracyAwareQuantization](../openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md) method with basic options. `--max-drop 0.01` option defines maximum accuracy deviation to 1 absolute percent from the original model:
-   ```sh
-   pot -q accuracy_aware -m <path_to_xml> -w <path_to_bin> --ac-config <path_to_AC_config_yml> --max-drop 0.01
-   ```
-- **Advanced usage**. In this case you should prepare a configuration file for the POT where you can specify advanced options for the optimization
-methods available. See [POT configuration file description](@ref pot_configs_README) for more details. 
-To launch the command-line tool with the configuration file run:
-   ```sh
-   pot -c <path_to_config_file>
-   ```
-For all available usage options, use the `-h`, `--help` arguments or refer to the Command-Line Arguments section below.  
-
-By default, the results are dumped into the separate output subfolder inside the `./results` folder that is created 
-in the same directory where the tool is run from. Use the `-e` option to evaluate the accuracy directly from the tool.
-
-See also the [End-to-end example](@ref pot_configs_examples_README) about how to run a particular example of 8-bit
+- **Basic usage for DefaultQuantization**. In this case you can run POT with basic setting just specifying all the options via command line. ``-q default`` stands for :doc:`DefaultQuantization <pot_compression_algorithms_quantization_default_README>` algorithm:
+
+  .. code-block:: sh
+
+     pot -q default -m <path_to_xml> -w <path_to_bin> --ac-config <path_to_AC_config_yml>
+
+- **Basic usage for AccuracyAwareQuantization**. You can also run :doc:`AccuracyAwareQuantization <accuracy_aware_README>` method with basic options. ``--max-drop 0.01`` option defines maximum accuracy deviation to 1 absolute percent from the original model:
+
+  .. code-block:: sh
+
+     pot -q accuracy_aware -m <path_to_xml> -w <path_to_bin> --ac-config <path_to_AC_config_yml> --max-drop 0.01
+
+
+- **Advanced usage**. In this case you should prepare a configuration file for the POT where you can specify advanced options for the optimization methods available. See :doc:`POT configuration file description <pot_configs_README>` for more details.
+
+  To launch the command-line tool with the configuration file run:
+
+  .. code-block:: sh
+
+     pot -c <path_to_config_file>
+
+
+For all available usage options, use the ``-h``, ``--help`` arguments or refer to the Command-Line Arguments section below.
+
+By default, the results are dumped into the separate output subfolder inside the ``./results`` folder that is created 
+in the same directory where the tool is run from. Use the ``-e`` option to evaluate the accuracy directly from the tool.
+
+See also the :doc:`End-to-end example <pot_configs_examples_README>` about how to run a particular example of 8-bit
 quantization with the POT.
 
-### Command-Line Arguments
-
-The following command-line options are available to run the tool: 
-
-| Argument                                          | Description                                             |
-| ------------------------------------------------- | ------------------------------------------------------- |
-| `-h`, `--help`                                    | Optional. Show help message and exit. |
-| `-q`, `--quantize`                                | Quantize model to 8 bits with specified quantization method: `default` or `accuracy_aware`. |
-| `--preset`                                        | Use `performance` for fully symmetric quantization or `mixed` preset for symmetric quantization of weight and asymmetric quantization of activations. Applicable only when `-q` option is used.|
-| `-m`, `--model`                                   | Path to the optimizing model file (.xml). Applicable only when `-q` option is used. |
-| `-w`, `--weights`                                 | Path to the weights file of the optimizing model (.bin). Applicable only when `-q` option is used. |
-| `-n`, `--name`                                    | Optional. Model name. Applicable only when `-q` option is used. |
-| `--engine {accuracy_checker, simplified}`         | Engine type used to specify CLI mode. Default: `accuracy_checker`. |
-| `--data-source DATA_DIR`                          | Optional. Valid and required for Simplified mode only. Specifies the path to calibration data. |
-| `--ac-config`                                     | Path to the Accuracy Checker configuration file. Applicable only when `-q` option is used. |
-| `--max-drop`                                      | Optional. Maximum accuracy drop. Valid only for accuracy-aware quantization. Applicable only when `-q` option is used and `accuracy_aware` method is selected. |
-| `-c CONFIG`, `--config CONFIG`                    | Path to a config file with task- or model-specific parameters.         |
-| `-e`, `--evaluate`                                | Optional. Evaluate model on the whole dataset after optimization.  |
-| `--output-dir OUTPUT_DIR`                         | Optional. A directory where results are saved. Default: `./results`. |
-| `-sm`, `--save-model`                             | Optional. Save the original full-precision model. |
-| `-d`, `--direct-dump`                             | Optional. Save results to the "optimized" subfolder within the specified output directory with no additional subpaths added at the end. |
-| `--log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG}` | Optional. Log level to print. Default: INFO. |
-| `--progress-bar`                                  | Optional. Disable CL logging and enable progress bar. |
-| `--stream-output`                                 | Optional. Switch model quantization progress display to a multiline mode. Use with third-party components. |
-| `--keep-uncompressed-weights`                     | Optional. Keep Convolution, Deconvolution and FullyConnected weights uncompressed. Use with third-party components.|
-
-
-## See Also
-* [Optimization with Simplified mode](@ref pot_docs_simplified_mode)
-* [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices)
+Command-Line Arguments
+++++++++++++++++++++++
+
+The following command-line options are available to run the tool:
+
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Argument                                            | Description                                                                                                                                                                                           |
++=====================================================+=======================================================================================================================================================================================================+
+| ``-h``, ```--help```                                | Optional. Show help message and exit.                                                                                                                                                                 |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ```-q``, ```--quantize```                           | Quantize model to 8 bits with specified quantization method: ``default`` or ``accuracy_aware``.                                                                                                       |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--preset``                                        | Use ``performance`` for fully symmetric quantization or ``mixed`` preset for symmetric quantization of weight and asymmetric quantization of activations. Applicable only when ``-q`` option is used. |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-m``, ``--model``                                 | Path to the optimizing model file (.xml). Applicable only when ``-q`` option is used.                                                                                                                 |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-w``, ``--weights``                               | Path to the weights file of the optimizing model (.bin). Applicable only when ``-q`` option is used.                                                                                                  |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-n``, ``--name``                                  | Optional. Model name. Applicable only when ``-q`` option is used.                                                                                                                                     |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--engine {accuracy_checker, simplified}``         | Engine type used to specify CLI mode. Default: ``accuracy_checker``.                                                                                                                                  |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--data-source DATA_DIR``                          | Optional. Valid and required for Simplified mode only. Specifies the path to calibration data.                                                                                                        |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--ac-config``                                     | Path to the Accuracy Checker configuration file. Applicable only when ``-q`` option is used.                                                                                                          |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--max-drop``                                      | Optional. Maximum accuracy drop. Valid only for accuracy-aware quantization. Applicable only when ``-q`` option is used and ``accuracy_aware`` method is selected.                                    |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-c CONFIG``, ``--config CONFIG``                  | Path to a config file with task- or model-specific parameters.                                                                                                                                        |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-e``, ``--evaluate``                              | Optional. Evaluate model on the whole dataset after optimization.                                                                                                                                     |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--output-dir OUTPUT_DIR``                         | Optional. A directory where results are saved. Default: ``./results``.                                                                                                                                |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-sm`, `--save-model``                             | Optional. Save the original full-precision model.                                                                                                                                                     |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``-d`, `--direct-dump``                             | Optional. Save results to the "optimized" subfolder within the specified output directory with no additional subpaths added at the end.                                                               |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG}`` | Optional. Log level to print. Default: INFO.                                                                                                                                                          |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--progress-bar``                                  | Optional. Disable CL logging and enable progress bar.                                                                                                                                                 |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--stream-output``                                 | Optional. Switch model quantization progress display to a multiline mode. Use with third-party components.                                                                                            |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``--keep-uncompressed-weights``                     | Optional. Keep Convolution, Deconvolution and FullyConnected weights uncompressed. Use with third-party components.                                                                                   |
++-----------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+See Also
+####################
+
+* :doc:`Optimization with Simplified mode <pot_docs_simplified_mode>`
+* :doc:`Post-Training Optimization Best Practices <pot_docs_BestPractices>`
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/tools/pot/docs/SaturationIssue.md b/tools/pot/docs/SaturationIssue.md
index b98580e04365b4..e27debf1c51bd0 100644
--- a/tools/pot/docs/SaturationIssue.md
+++ b/tools/pot/docs/SaturationIssue.md
@@ -1,39 +1,52 @@
 # Saturation (overflow) Issue Workaround {#pot_saturation_issue}
 
-## Introduction
+@sphinxdirective
+
+Introduction
+####################
+
 8-bit instructions of older Intel CPU generations (based on SSE, AVX-2, and AVX-512 instruction sets) are prone to so-called saturation (overflow) of the intermediate buffer when calculating the dot product, which is an essential part of Convolutional or MatMul operations. This saturation can lead to a drop in accuracy when running inference of 8-bit quantized models on the mentioned architectures. Additionally, it is impossible to predict if the issue occurs in a given setup, since most computations are executed in parallel during DL model inference, which makes this process non-deterministic. This is a common problem for models with non-ReLU activation functions and low level of redundancy (for example, optimized or efficient models). It can prevent deploying the model on legacy hardware or creating cross-platform applications. The problem does not occur on GPUs or CPUs with Intel Deep Learning Boost (VNNI) technology and further generations.
 
-## Saturation Problem Detection
+Saturation Problem Detection
+############################
+
 The only way to detect the saturation issue is to run inference on a CPU that allows it and then on one that does not (for example, a VNNI-based CPU). A significant difference in accuracy (more than 1%) will be the main indicator of the saturation issue impact.
 
-## Saturation Issue Workaround
+Saturation Issue Workaround
+###########################
+
 While quantizing activations use the full range of 8-bit data types, there is a workaround using only 7 bits to represent weights (of Convolutional or Fully-Connected layers). Using this algorithm for the first layer can help mitigate the saturation issue for many models. However, this can lead to lower accuracy due to reduced representation of weights.
 
-POT tool provides three options to deal with the saturation issue. The options can be enabled in the POT configuration file using the `saturation_fix` parameter:
+POT tool provides three options to deal with the saturation issue. The options can be enabled in the POT configuration file using the ``saturation_fix`` parameter:
 
-* "First_layer" option -- (default) fix saturation issue for the first layer. 
+* "First_layer" option -- (default) fix saturation issue for the first layer.
 * "All" option -- apply for all layers in the model.
 * "No" option -- do not apply saturation fix at all.
 
-Below is an example of the section in the POT configuration file with the `saturation_fix` option:
-```json
-"algorithms": [
-    {
-        "name": "DefaultQuantization",
-        "params": {
-            "preset": "performance",
-            "stat_subset_size": 300,
-            "saturation_fix": "all" // Apply the saturation fix to all the layers
-        }
-    }
-]
-```
-
-If you observe the saturation issue, try the "all" option during model quantization. If the accuracy problem still occurs, try using [Quantization-aware training from NNCF](https://github.com/openvinotoolkit/nncf) and fine-tuning the model.
+Below is an example of the section in the POT configuration file with the ``saturation_fix`` option:
+
+.. code-block:: json
+
+   "algorithms": [
+       {
+           "name": "DefaultQuantization",
+           "params": {
+               "preset": "performance",
+               "stat_subset_size": 300,
+               "saturation_fix": "all" // Apply the saturation fix to all the layers
+           }
+       }
+   ]
+
+
+If you observe the saturation issue, try the "all" option during model quantization. If the accuracy problem still occurs, try using `Quantization-aware training from NNCF <https://github.com/openvinotoolkit/nncf>`__ and fine-tuning the model.
 
 Use the "no" option when leaving out legacy CPU HW. It might also lead to slightly better accuracy.
 
-## Additional Resources
+Additional Resources
+####################
+
+* `Lower Numerical Precision Deep Learning Inference and Training blogpost <https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html>`__
+* :doc:`Configuration file description <pot_configs_README>`
 
-* [Lower Numerical Precision Deep Learning Inference and Training blogpost](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)
-* [Configuration file description](@ref pot_configs_README)
\ No newline at end of file
+@endsphinxdirective
diff --git a/tools/pot/docs/SimplifiedMode.md b/tools/pot/docs/SimplifiedMode.md
index bd17d689af67ec..ea543368c7c677 100644
--- a/tools/pot/docs/SimplifiedMode.md
+++ b/tools/pot/docs/SimplifiedMode.md
@@ -1,58 +1,76 @@
 # Optimization with Simplified Mode {#pot_docs_simplified_mode}
 
-## Introduction
+@sphinxdirective
 
-Simplified mode is designed to make data preparation for the model optimization process easier. The mode is represented by an implementation of Engine interface from the POT API. It allows reading the data from an arbitrary folder specified by the user. For more details about POT API, refer to the corresponding [description](@ref pot_compression_api_README). Currently, Simplified mode is available only for image data in PNG or JPEG formats, stored in a single folder. It supports Computer Vision models with a single input or two inputs where the second is "image_info" (Faster R-CNN, Mask R-CNN, etc.).
+Introduction
+####################
 
-> **NOTE**: This mode cannot be used with accuracy-aware methods. There is no way to control accuracy after optimization. Nevertheless, this mode can be helpful to estimate performance benefits when using model optimizations.
+Simplified mode is designed to make data preparation for the model optimization process easier. The mode is represented by an implementation of Engine interface from the POT API. It allows reading the data from an arbitrary folder specified by the user. For more details about POT API, refer to the corresponding :doc:`description <pot_compression_api_README>`. Currently, Simplified mode is available only for image data in PNG or JPEG formats, stored in a single folder. It supports Computer Vision models with a single input or two inputs where the second is "image_info" (Faster R-CNN, Mask R-CNN, etc.).
 
-## Usage
+.. note::
+
+   This mode cannot be used with accuracy-aware methods. There is no way to control accuracy after optimization. Nevertheless, this mode can be helpful to estimate performance benefits when using model optimizations.
+
+Usage
+####################
 
 To use the Simplified mode, prepare the data and place it in a separate folder. No other files should be present in this folder.
 
-To apply optimization when there is only a model and no data is available. It is possible to generate a synthetic dataset using Dataset Management Framework (Datumaro) available on [GitHub](https://github.com/openvinotoolkit/datumaro). Currently, data generation is available only for Computer Vision models, it can take time in some cases.
+To apply optimization when there is only a model and no data is available. It is possible to generate a synthetic dataset using Dataset Management Framework (Datumaro) available on `GitHub <https://github.com/openvinotoolkit/datumaro>`__. Currently, data generation is available only for Computer Vision models, it can take time in some cases.
 
 Install Datumaro:
 
-``` bash
-pip install datumaro
-```
+.. code-block:: bash
+
+   pip install datumaro
+
+
 Create a synthetic dataset with elements of the specified type and shape, and save it to the provided directory.
 
 Usage:
 
-``` bash
-datum generate [-h] -o OUTPUT_DIR -k COUNT --shape SHAPE [SHAPE ...]
-  [-t {image}] [--overwrite] [--model-dir MODEL_PATH]
-```
-Example of generating 300 images with height = 224 and width = 256 and saving them in the `./dataset` directory.
-```bash
-datum generate  -o ./dataset -k 300 --shape 224 256
-```
-After that, `OUTPUT_DIR` can be provided to `--data-source` CLI option or to `data_source` config parameter.
+.. code-block:: bash
+
+   datum generate [-h] -o OUTPUT_DIR -k COUNT --shape SHAPE [SHAPE ...]
+     [-t {image}] [--overwrite] [--model-dir MODEL_PATH]
+
+
+Example of generating 300 images with height = 224 and width = 256 and saving them in the ``./dataset`` directory.
+
+.. code-block:: bash
+
+   datum generate  -o ./dataset -k 300 --shape 224 256
+
+
+After that, ``OUTPUT_DIR`` can be provided to ``--data-source`` CLI option or to ``data_source`` config parameter.
 
 There are two options to run POT in the Simplified mode:
 
 * Using command-line options only. Here is an example for 8-bit quantization:
-  
-  `pot -q default -m <path_to_xml> -w <path_to_bin> --engine simplified --data-source <path_to_data>`
+
+  ``pot -q default -m <path_to_xml> -w <path_to_bin> --engine simplified --data-source <path_to_data>``
+
 * To provide more options, use the corresponding `"engine"` section in the POT configuration file as follows:
-    ```json
-    "engine": {
-        "type": "simplified",
-        "layout": "NCHW",               // Layout of input data. Supported ["NCHW",
-                                        // "NHWC", "CHW", "CWH"] layout
-        "data_source": "PATH_TO_SOURCE" // You can specify path to the directory with images 
-                                        // Also you can specify template for file names to filter images to load.
-                                        // Templates are unix style (this option is valid only in Simplified mode)
-    }
-    ```
 
+  .. code-block:: json
+
+     "engine": {
+         "type": "simplified",
+         "layout": "NCHW",               // Layout of input data. Supported ["NCHW",
+                                         // "NHWC", "CHW", "CWH"] layout
+         "data_source": "PATH_TO_SOURCE" // You can specify path to the directory with images
+                                         // Also you can specify template for file names to filter images to load.
+                                         // Templates are unix style (this option is valid only in Simplified mode)
+     }
+
+
+A template of configuration file for 8-bit quantization using Simplified mode can be found `at the following link <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/simplified_mode_template.json>`__.
 
-A template of configuration file for 8-bit quantization using Simplified mode can be found [at the following link](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/simplified_mode_template.json).
+For more details about POT usage via CLI, refer to this :doc:`CLI document <pot_compression_cli_README>`.
 
-For more details about POT usage via CLI, refer to this [CLI document](@ref pot_compression_cli_README).
+Additional Resources
+####################
 
-## Additional Resources
+* :doc:`Configuration File Description <pot_configs_README>`
 
- * [Configuration File Description](@ref pot_configs_README)
\ No newline at end of file
+@endsphinxdirective
\ No newline at end of file
diff --git a/tools/pot/openvino/tools/pot/api/README.md b/tools/pot/openvino/tools/pot/api/README.md
index 652c542c161af1..e7ebdc98ce83e5 100644
--- a/tools/pot/openvino/tools/pot/api/README.md
+++ b/tools/pot/openvino/tools/pot/api/README.md
@@ -1,307 +1,401 @@
 # API Reference  {#pot_compression_api_README}
 
+@sphinxdirective
+
 Post-training Optimization Tool API provides a full set of interfaces and helpers that allow users to implement a custom optimization pipeline for various types of DL models including cascaded or compound models. Below is a full specification of this API:
 
-### DataLoader
+DataLoader
+++++++++++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.DataLoader(config)
+
 
-```
-class openvino.tools.pot.DataLoader(config)
-```
 The base class for all DataLoaders.
 
-`DataLoader` loads data from a dataset and applies pre-processing to them providing access to the pre-processed data 
+``DataLoader`` loads data from a dataset and applies pre-processing to them providing access to the pre-processed data 
 by index. 
 
-All subclasses should override `__len__()` function, which should return the size of the dataset, and `__getitem__()`, 
-which supports integer indexing in the range of 0 to `len(self)`. `__getitem__()` method can return data in one of the possible formats:
-```
-(data, annotation)
-```
+All subclasses should override ``__len__()`` function, which should return the size of the dataset, and ``__getitem__()``, 
+which supports integer indexing in the range of 0 to ``len(self)``. ``__getitem__()`` method can return data in one of the possible formats:
+
+.. code-block:: sh
+
+   (data, annotation)
+
+
 or
-```
-(data, annotation, metadata)
-```
-`data` is the input that is passed to the model at inference so that it should be properly preprocessed. `data` can be either `numpy.array` object or dictionary where the key is the name of the model input and value is `numpy.array` which corresponds to this input. The format of `annotation` should correspond to the expectations of the `Metric` class. `metadata` is an optional field that can be used to store additional information required for post-processing.
 
-### Metric
+.. code-block:: sh
+
+   (data, annotation, metadata)
+
+
+``data`` is the input that is passed to the model at inference so that it should be properly preprocessed. ``data`` can be either ``numpy.array`` object or dictionary where the key is the name of the model input and value is ``numpy.array`` which corresponds to this input. The format of ``annotation`` should correspond to the expectations of the ``Metric`` class. ``metadata`` is an optional field that can be used to store additional information required for post-processing.
+
+Metric
+++++++++++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.Metric()
+
 
-```
-class openvino.tools.pot.Metric()
-```
 An abstract class representing an accuracy metric.
 
 All instances should override the following properties:
-- `value` - returns the accuracy metric value for the last model output in a format of `Dict[str, numpy.array]`.
-- `avg_value` - returns the average accuracy metric over collected model results in a format of `Dict[str, numpy.array]`.
-- `higher_better` should return `True` if a higher value of the metric corresponds to better performance, otherwise, returns `False`. Default implementation returns `True`.
+
+- ``value`` - returns the accuracy metric value for the last model output in a format of ``Dict[str, numpy.array]``.
+- ``avg_value`` - returns the average accuracy metric over collected model results in a format of ``Dict[str, numpy.array]``.
+- ``higher_better`` should return ``True`` if a higher value of the metric corresponds to better performance, otherwise, returns ``False``. Default implementation returns ``True``.
 
 and methods:
-- `update(output, annotation)` - calculates and updates the accuracy metric value using the last model output and annotation. The model output and annotation should be passed in this method. It should also contain the model-specific post-processing in case the model returns the raw output.
-- `reset()` - resets collected accuracy metric. 
-- `get_attributes()` - returns a dictionary of metric attributes:
-   ```
-   {metric_name: {attribute_name: value}}
-   ```
-   Required attributes: 
-   - `direction` - (`higher-better` or `higher-worse`) a string parameter defining whether metric value 
-    should be increased in accuracy-aware algorithms.
-   - `type` - a string representation of metric type. For example, 'accuracy' or 'mean_iou'.
-
-### Engine
-
-```
-class openvino.tools.pot.Engine(config, data_loader=None, metric=None)
-```
+
+- ``update(output, annotation)`` - calculates and updates the accuracy metric value using the last model output and annotation. The model output and annotation should be passed in this method. It should also contain the model-specific post-processing in case the model returns the raw output.
+- ``reset()`` - resets collected accuracy metric.
+- ``get_attributes()`` - returns a dictionary of metric attributes:
+
+  .. code-block:: sh
+
+     {metric_name: {attribute_name: value}}
+
+
+  Required attributes:
+
+  - ``direction`` - (``higher-better`` or ``higher-worse``) a string parameter defining whether metric value should be increased in accuracy-aware algorithms.
+  - ``type`` - a string representation of metric type. For example, 'accuracy' or 'mean_iou'.
+
+Engine
+++++++++++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.Engine(config, data_loader=None, metric=None)
+
 Base class for all Engines.
 
 The engine provides model inference, statistics collection for activations and calculation of accuracy metrics for a dataset.
 
-*Parameters* 
-- `config` - engine specific config.
-- `data_loader` - `DataLoader` instance to iterate over dataset.
-- `metric` - `Metric` instance to calculate the accuracy metric of the model.
+*Parameters*
+
+- ``config`` - engine specific config.
+- ``data_loader`` - ``DataLoader`` instance to iterate over dataset.
+- ``metric`` - ``Metric`` instance to calculate the accuracy metric of the model.
 
 All subclasses should override the following methods:
-- `set_model(model)` - sets/resets a model.<br><br>
+
+- ``set_model(model)`` - sets/resets a model.
+
   *Parameters*
-  - `model` - `CompressedModel` instance for inference.
 
-- `predict(stats_layout=None, sampler=None, metric_per_sample=False, print_progress=False)` - performs model inference 
-on the specified subset of data.<br><br>
+  - ``model`` - `CompressedModel` instance for inference.
+
+- `predict(stats_layout=None, sampler=None, metric_per_sample=False, print_progress=False)` - performs model inference on the specified subset of data.
+
   *Parameters*
-  - `stats_layout` - dictionary of statistic collection functions. An optional parameter. 
-  ```
-  {
-      'node_name': {
-          'stat_name': fn
-      }
-  }
-  ```
+
+  - `stats_layout` - dictionary of statistic collection functions. An optional parameter.
+
+    .. code-block:: sh
+
+       {
+           'node_name': {
+               'stat_name': fn
+           }
+       }
+
   - `sampler` - `Sampler` instance that provides a way to iterate over the dataset. (See details below).
   - `metric_per_sample` - if `Metric` is specified and this parameter is set to True, then the metric value should be 
   calculated for each data sample, otherwise for the whole dataset.
   - `print_progress` - print inference progress.
-  
+
   *Returns*
-  - a tuple of dictionaries of per-sample and overall metric values if `metric_per_sample` is True
-  ```
-  (
-      {
-          'sample_id': sample_index,
-          'metric_name': metric_name,
-          'result': metric_value
-      },
-      {
-          'metric_name': metric_value
-      }
-  )
-  ```
-  Otherwise, a dictionary of overall metrics.<br>
-  ```
-  { 'metric_name': metric_value }
-  ```
-- a dictionary of collected statistics 
-  ```
-  {
-      'node_name': {
-          'stat_name': [statistics]
-      }
-  }
-  ```
-
-### Pipeline
-
-```
-class openvino.tools.pot.Pipeline(engine)
-```
+
+  - a tuple of dictionaries of per-sample and overall metric values if ``metric_per_sample`` is True
+
+    .. code-block:: sh
+
+       (
+           {
+               'sample_id': sample_index,
+               'metric_name': metric_name,
+               'result': metric_value
+           },
+           {
+               'metric_name': metric_value
+           }
+       )
+
+
+    Otherwise, a dictionary of overall metrics.
+
+    .. code-block:: sh
+
+       { 'metric_name': metric_value }
+
+
+- a dictionary of collected statistics
+
+  .. code-block:: sh
+
+     {
+         'node_name': {
+             'stat_name': [statistics]
+         }
+     }
+
+
+Pipeline
+++++++++++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.Pipeline(engine)
+
+
 Pipeline class represents the optimization pipeline.
 
-*Parameters* 
-- `engine` - instance of `Engine` class for model inference.
+*Parameters*
 
-The pipeline can be applied to the DL model by calling `run(model)` method where `model` is the `NXModel` instance.
+- ``engine`` - instance of ``Engine`` class for model inference.
 
-#### Create a pipeline
+The pipeline can be applied to the DL model by calling ``run(model)`` method where ``model`` is the ``NXModel`` instance.
+
+Create a pipeline
+--------------------
 
 The POT Python* API provides the utility function to create and configure the pipeline:
-```
-openvino.tools.pot.create_pipeline(algo_config, engine)
-```
-*Parameters* 
-- `algo_config` - a list defining optimization algorithms and their parameters included in the optimization pipeline. 
-  The order in which they are applied to the model in the optimization pipeline is determined by the order in the list. 
+
+.. code-block:: sh
+
+   openvino.tools.pot.create_pipeline(algo_config, engine)
+
+
+*Parameters*
+
+- ``algo_config`` - a list defining optimization algorithms and their parameters included in the optimization pipeline. 
+  The order in which they are applied to the model in the optimization pipeline is determined by the order in the list.
 
   Example of the algorithm configuration of the pipeline:
-  ``` 
-  algo_config = [
-      {
-          'name': 'DefaultQuantization',
-          'params': {
-              'preset': 'performance',
-              'stat_subset_size': 500
-          }
-       },
-      ...
-  ]
-  ```
-- `engine` - instance of `Engine` class for model inference.
+
+  .. code-block:: sh
+
+     algo_config = [
+         {
+             'name': 'DefaultQuantization',
+             'params': {
+                 'preset': 'performance',
+                 'stat_subset_size': 500
+             }
+          },
+         ...
+     ]
+
+
+- ``engine`` - instance of ``Engine`` class for model inference.
 
 *Returns*
-- instance of the `Pipeline` class.
 
-## Helpers and Internal Model Representation
+- instance of the ``Pipeline`` class.
+
+Helpers and Internal Model Representation
+#########################################
+
 In order to simplify implementation of optimization pipelines we provide a set of ready-to-use helpers. Here we also 
 describe internal representation of the DL model and how to work with it.
 
-### IEEngine
+IEEngine
+++++++++++++++++++++
+
+.. code-block:: sh
 
-```
-class openvino.tools.pot.IEEngine(config, data_loader=None, metric=None)
-```
-IEEngine is a helper which implements Engine class based on [OpenVINO&trade; Inference Engine Python* API](https://docs.openvino.ai/latest/api/ie_python_api/api.html).
+   class openvino.tools.pot.IEEngine(config, data_loader=None, metric=None)
+
+IEEngine is a helper which implements Engine class based on :doc:`OpenVINO&trade; Inference Engine Python API <api/ie_python_api/api>`.
 This class support inference in synchronous and asynchronous modes and can be reused as-is in the custom pipeline or 
 with some modifications, e.g. in case of custom post-processing of inference results.
 
 The following methods can be overridden in subclasses:
-- `postprocess_output(outputs, metadata)` - Processes model output data using the image metadata obtained during data loading.<br><br>
+
+- ``postprocess_output(outputs, metadata)`` - Processes model output data using the image metadata obtained during data loading.
+
   *Parameters*
-  - `outputs` - dictionary of output data per output name.
-  - `metadata` - information about the data used for inference.
-  
+
+  - ``outputs`` - dictionary of output data per output name.
+  - ``metadata`` - information about the data used for inference.
+
   *Return*
+
   - list of the output data in an order expected by the accuracy metric if any is used
-  
-`IEEngine` supports data returned by `DataLoader` in the format:
-```
-(data, annotation)
-```
+
+``IEEngine`` supports data returned by ``DataLoader`` in the format:
+
+.. code-block:: sh
+
+   (data, annotation)
+
+
 or
-```
-(data, annotation, metadata)
-```
 
-Metric values returned by a `Metric` instance are expected to be in the format:
-- for `value()`:
-```
-{metric_name: [metric_values_per_image]}
-```
-- for `avg_value()`:
-```
-{metric_name: metric_value}
-```
+.. code-block:: sh
+
+   (data, annotation, metadata)
+
+
+Metric values returned by a ``Metric`` instance are expected to be in the format:
+
+- for ``value()``:
+
+  .. code-block:: sh
 
-In order to implement a custom `Engine` class you may need to get familiar with the following interfaces:
+     {metric_name: [metric_values_per_image]}
 
-### CompressedModel
+- for ``avg_value()``:
 
-The Python* POT API provides the `CompressedModel` class as one interface for working with single and cascaded DL model. 
+  .. code-block:: sh
+
+     {metric_name: metric_value}
+
+
+In order to implement a custom ``Engine`` class you may need to get familiar with the following interfaces:
+
+CompressedModel
+++++++++++++++++++++
+
+The Python POT API provides the ``CompressedModel`` class as one interface for working with single and cascaded DL model. 
 It is used to load, save and access the model, in case of the cascaded model, access each model of the cascaded model.
 
-```
-class openvino.tools.pot.graph.nx_model.CompressedModel(**kwargs)
-```
+.. code-block:: sh
+
+   class openvino.tools.pot.graph.nx_model.CompressedModel(**kwargs)
+
 The CompressedModel class provides a representation of the DL model. A single model and cascaded model can be 
 represented as an instance of this class. The cascaded model is stored as a list of models.
 
 *Properties*
-- `models` - list of models of the cascaded model.
-- `is_cascade` - returns True if the loaded model is cascaded model.
-  
-### Read model from OpenVINO IR
-
-The Python* POT API provides the utility function to load model from the OpenVINO&trade; Intermediate Representation (IR):
-```
-openvino.tools.pot.load_model(model_config)
-```
+
+- ``models`` - list of models of the cascaded model.
+- ``is_cascade`` - returns True if the loaded model is cascaded model.
+
+Read model from OpenVINO IR
+++++++++++++++++++++++++++++++
+
+The Python POT API provides the utility function to load model from the OpenVINO&trade; Intermediate Representation (IR):
+
+.. code-block:: sh
+
+   openvino.tools.pot.load_model(model_config)
+
 *Parameters*
-- `model_config` - dictionary describing a model that includes the following attributes:
-  - `model_name` - model name.
-  - `model` - path to the network topology (.xml).
-  - `weights` - path to the model weights (.bin).
-  
-  Example of `model_config` for a single model:
-  ```
-  model_config = {
-      'model_name': 'mobilenet_v2',
-      'model': '<PATH_TO_MODEL>/mobilenet_v2.xml',
-      'weights': '<PATH_TO_WEIGHTS>/mobilenet_v2.bin'
-  }
-  ```
-  Example of `model_config` for a cascaded model:
-  ```
-  model_config = {
-      'model_name': 'mtcnn',
-      'cascade': [
-          {
-              'name': 'pnet',
-              "model": '<PATH_TO_MODEL>/pnet.xml',
-              'weights': '<PATH_TO_WEIGHTS>/pnet.bin'
-          },
-          {
-              'name': 'rnet',
-              'model': '<PATH_TO_MODEL>/rnet.xml',
-              'weights': '<PATH_TO_WEIGHTS>/rnet.bin'
-          },
-          {
-              'name': 'onet',
-              'model': '<PATH_TO_MODEL>/onet.xml',
-              'weights': '<PATH_TO_WEIGHTS>/onet.bin'
-          }
-      ]
-  }
-  ```
+
+- ``model_config`` - dictionary describing a model that includes the following attributes:
+  - ``model_name`` - model name.
+  - ``model`` - path to the network topology (.xml).
+  - ``weights`` - path to the model weights (.bin).
+
+  Example of ``model_config`` for a single model:
+
+  .. code-block:: sh
+
+     model_config = {
+         'model_name': 'mobilenet_v2',
+         'model': '<PATH_TO_MODEL>/mobilenet_v2.xml',
+         'weights': '<PATH_TO_WEIGHTS>/mobilenet_v2.bin'
+     }
+
+  Example of ``model_config`` for a cascaded model:
+
+  .. code-block:: sh
+
+     model_config = {
+         'model_name': 'mtcnn',
+         'cascade': [
+             {
+                 'name': 'pnet',
+                 "model": '<PATH_TO_MODEL>/pnet.xml',
+                 'weights': '<PATH_TO_WEIGHTS>/pnet.bin'
+             },
+             {
+                 'name': 'rnet',
+                 'model': '<PATH_TO_MODEL>/rnet.xml',
+                 'weights': '<PATH_TO_WEIGHTS>/rnet.bin'
+             },
+             {
+                 'name': 'onet',
+                 'model': '<PATH_TO_MODEL>/onet.xml',
+                 'weights': '<PATH_TO_WEIGHTS>/onet.bin'
+             }
+         ]
+     }
+
 
 *Returns*
-- `CompressedModel` instance
 
-#### Save model to IR
-The Python* POT API provides the utility function to save model in the OpenVINO&trade; Intermediate Representation (IR):
-```
-openvino.tools.pot.save_model(model, save_path, model_name=None, for_stat_collection=False)
-```
+- ``CompressedModel`` instance
+
+Save model to IR
+----------------
+
+The Python POT API provides the utility function to save model in the OpenVINO&trade; Intermediate Representation (IR):
+
+.. code-block:: sh
+
+   openvino.tools.pot.save_model(model, save_path, model_name=None, for_stat_collection=False)
+
+
 *Parameters*
-- `model` - `CompressedModel` instance.
-- `save_path` - path to save the model.
-- `model_name` - name under which the model will be saved.
-- `for_stat_collection` - whether model is saved to be used for statistic collection or for normal inference
- (affects only cascaded models). If set to False, removes model prefixes from node names.
+
+- ``model`` - ``CompressedModel`` instance.
+- ``save_path`` - path to save the model.
+- ``model_name`` - name under which the model will be saved.
+- ``for_stat_collection`` - whether model is saved to be used for statistic collection or for normal inference (affects only cascaded models). If set to False, removes model prefixes from node names.
 
 *Returns*
+
 - list of dictionaries with paths:
-  ```
-  [
-      {
-          'name': model name, 
-          'model': path to .xml, 
-          'weights': path to .bin
-      },
-      ...
-  ]
-  ```
-
-### Sampler
-
-```
-class openvino.tools.pot.samplers.Sampler(data_loader=None, batch_size=1, subset_indices=None)
-```
+
+  .. code-block:: sh
+
+     [
+         {
+             'name': model name,
+             'model': path to .xml,
+             'weights': path to .bin
+         },
+         ...
+     ]
+
+
+Sampler
+++++++++++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.samplers.Sampler(data_loader=None, batch_size=1, subset_indices=None)
+
 Base class for all Samplers.
 
 Sampler provides a way to iterate over the dataset.
 
-All subclasses overwrite `__iter__()` method, providing a way to iterate over the dataset, and a `__len__()` method 
+All subclasses overwrite ``__iter__()`` method, providing a way to iterate over the dataset, and a ``__len__()`` method 
 that returns the length of the returned iterators.
 
-*Parameters* 
-- `data_loader` - instance of `DataLoader` class to load data.
-- `batch_size` - number of items in batch, default is 1.
-- `subset_indices` - indices of samples to load. If `subset_indices` is set to None then the sampler will take elements 
-  from the whole dataset.
+*Parameters*
+
+- ``data_loader`` - instance of ``DataLoader`` class to load data.
+- ``batch_size`` - number of items in batch, default is 1.
+- ``subset_indices`` - indices of samples to load. If ``subset_indices`` is set to None then the sampler will take elements from the whole dataset.
+
+BatchSampler
+++++++++++++
+
+.. code-block:: sh
+
+   class openvino.tools.pot.samplers.batch_sampler.BatchSampler(data_loader, batch_size=1, subset_indices=None):
 
-### BatchSampler
+Sampler provides an iterable over the dataset subset if ``subset_indices`` is specified 
+or over the whole dataset with given ``batch_size``. Returns a list of data items.
 
-```
-class openvino.tools.pot.samplers.batch_sampler.BatchSampler(data_loader, batch_size=1, subset_indices=None):
-```
-Sampler provides an iterable over the dataset subset if `subset_indices` is specified or over the whole dataset with 
-given `batch_size`. Returns a list of data items.
+@endsphinxdirective
 

From 3a5b819685801673a669745e543d50a6c8ba9e18 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 08:28:47 +0200
Subject: [PATCH 214/296] DOCS shift to rst - POT API examples (#16627)

---
 tools/pot/docs/Examples.md                    |   9 +-
 .../pot/api/samples/3d_segmentation/README.md |  53 ++++----
 .../openvino/tools/pot/api/samples/README.md  | 113 ++++++++++--------
 .../pot/api/samples/classification/README.md  |  50 +++++---
 .../pot/api/samples/face_detection/README.md  |  65 ++++++----
 .../api/samples/object_detection/README.md    |  58 +++++----
 .../pot/api/samples/segmentation/README.md    |  50 ++++----
 .../tools/pot/api/samples/speech/README.md    |  59 +++++----
 8 files changed, 269 insertions(+), 188 deletions(-)

diff --git a/tools/pot/docs/Examples.md b/tools/pot/docs/Examples.md
index 6563447772e3ca..49157e7975d172 100644
--- a/tools/pot/docs/Examples.md
+++ b/tools/pot/docs/Examples.md
@@ -5,13 +5,14 @@
 .. toctree::
    :maxdepth: 1
    :hidden:
-   
+
    API Examples <pot_example_README>
    Command-line Example <pot_configs_examples_README>
 
-@endsphinxdirective
 
 This section provides a set of examples that demonstrate how to apply the post-training optimization methods to optimize various models from different domains. It contains optimization recipes for concrete models, that unnecessarily cover your case, but which should be sufficient to reuse these recipes to optimize custom models:
 
-- [API Examples](@ref pot_example_README)
-- [Commanad-line Example](@ref pot_configs_examples_README)
\ No newline at end of file
+- :doc:`API Examples <pot_example_README>`
+- :doc:`Command-line Example <pot_configs_examples_README>`
+
+@endsphinxdirective
diff --git a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/README.md b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/README.md
index b653f646d2a6e7..ad6110c9d45413 100644
--- a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/README.md
@@ -1,29 +1,40 @@
-# Quantizatiing 3D Segmentation Model {#pot_example_3d_segmentation_README}
+# Quantizing 3D Segmentation Model {#pot_example_3d_segmentation_README}
 
-This example demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a 3D segmentation model.
-The [Brain Tumor Segmentation](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0002) model from PyTorch* is used for this purpose.
-A custom `DataLoader` is created to load images in NIfTI format from [Medical Segmentation Decathlon BRATS 2017](http://medicaldecathlon.com/) dataset for 3D semantic segmentation task 
-and the implementation of Dice Index metric is used for the model evaluation. In addition, this example demonstrates how one can use image metadata obtained during image reading and 
-preprocessing to post-process the model raw output. The code of the example is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation).
+@sphinxdirective
 
-## How to prepare the data
+This example demonstrates the use of the :doc:`Post-training Optimization Tool API <pot_compression_api_README>` for the task of quantizing a 3D segmentation model.
+The `Brain Tumor Segmentation <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0002>`__ model from PyTorch is used for this purpose. A custom ``DataLoader`` is created to load images in NIfTI format from `Medical Segmentation Decathlon BRATS 2017 <http://medicaldecathlon.com/>`__ dataset for 3D semantic segmentation task and the implementation of Dice Index metric is used for the model evaluation. In addition, this example demonstrates how one can use image metadata obtained during image reading and preprocessing to post-process the model raw output. The code of the example is available on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation>`__.
+
+How to prepare the data
+#######################
 
 To run this example, you will need to download the Brain Tumors 2017 part of the Medical Segmentation Decathlon image database http://medicaldecathlon.com/.
-3D MRI data in NIfTI format can be found in the `imagesTr` folder, and segmentation masks are in `labelsTr`.
+3D MRI data in NIfTI format can be found in the ``imagesTr`` folder, and segmentation masks are in ``labelsTr``.
+
+How to Run the example
+######################
+
+1. Launch :doc:`Model Downloader <omz_tools_downloader>` tool to download ``brain-tumor-segmentation-0002`` model from the Open Model Zoo repository.
+
+   .. code-block:: sh
+
+      omz_downloader --name brain-tumor-segmentation-0002
+
+
+2. Launch :doc:`Model Converter <omz_tools_downloader>` tool to generate Intermediate Representation (IR) files for the model:
 
+   .. code-block:: sh
+
+      omz_converter --name brain-tumor-segmentation-0002
 
-## How to Run the example
 
-1. Launch [Model Downloader](@ref omz_tools_downloader) tool to download `brain-tumor-segmentation-0002` model from the Open Model Zoo repository.
-   ```sh
-   omz_downloader --name brain-tumor-segmentation-0002
-   ```
-2. Launch [Model Converter](@ref omz_tools_downloader) tool to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   omz_converter --name brain-tumor-segmentation-0002
-   ```
 3. Launch the example script from the example directory:
-   ```sh
-   python3 ./3d_segmentation_example.py -m <PATH_TO_IR_XML> -d <BraTS_2017/imagesTr> --mask-dir <BraTS_2017/labelsTr>
-   ```
-   Optional: you can specify .bin file of IR directly using the `-w`, `--weights` options.
+
+   .. code-block:: sh
+
+      python3 ./3d_segmentation_example.py -m <PATH_TO_IR_XML> -d <BraTS_2017/imagesTr> --mask-dir <BraTS_2017/labelsTr>
+
+
+   Optional: you can specify .bin file of IR directly using the ``-w``, ``--weights`` options.
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/tools/pot/openvino/tools/pot/api/samples/README.md b/tools/pot/openvino/tools/pot/api/samples/README.md
index 8a733e8d0457e4..4de33a3622642e 100644
--- a/tools/pot/openvino/tools/pot/api/samples/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/README.md
@@ -5,7 +5,7 @@
 .. toctree::
    :maxdepth: 1
    :hidden:
-   
+
    Quantizing Image Classification Model <pot_example_classification_README>
    Quantizing Object Detection Model with Accuracy Control  <pot_example_object_detection_README>
    Quantizing Cascaded Model <pot_example_face_detection_README>
@@ -13,56 +13,63 @@
    Quantizing 3D Segmentation Model <pot_example_3d_segmentation_README>
    Quantizing for GNA Device <pot_example_speech_README>
 
-@endsphinxdirective
 
-The Post-training Optimization Tool contains multiple examples that demonstrate how to use its [API](@ref pot_compression_api_README) 
-to optimize DL models. All available examples can be found on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples).
-
-The following examples demonstrate the implementation of `Engine`, `Metric`, and `DataLoader` interfaces for various use cases:
-
-1. [Quantizing Image Classification model](./classification/README.md)
-    - Uses single `MobilenetV2` model from TensorFlow
-    - Implements `DataLoader` to load .JPEG images and annotations of Imagenet database
-    - Implements `Metric` interface to calculate Accuracy at top-1 metric
-    - Uses DefaultQuantization algorithm for quantization model
-
-2. [Quantizing Object Detection Model with Accuracy Control](./object_detection/README.md)
-    - Uses single `MobileNetV1 FPN` model from TensorFlow
-    - Implements `Dataloader` to load images of COCO database
-    - Implements `Metric` interface to calculate mAP@[.5:.95] metric
-    - Uses `AccuracyAwareQuantization` algorithm for quantization model
-
-3. [Quantizing Semantic Segmentation Model](./segmentation/README.md)
-    - Uses single `DeepLabV3` model from TensorFlow
-    - Implements `DataLoader` to load .JPEG images and annotations of Pascal VOC 2012 database
-    - Implements `Metric` interface to calculate Mean Intersection Over Union metric
-    - Uses DefaultQuantization algorithm for quantization model
-
-4. [Quantizing 3D Segmentation Model](./3d_segmentation/README.md)
-    - Uses single `Brain Tumor Segmentation` model from PyTorch
-    - Implements `DataLoader` to load images in NIfTI format from Medical Segmentation Decathlon BRATS 2017 database
-    - Implements `Metric` interface to calculate Dice Index metric
-    - Demonstrates how to use image metadata obtained during data loading to post-process the raw model output
-    - Uses DefaultQuantization algorithm for quantization model
-
-5. [Quantizing Cascaded model](./face_detection/README.md)
-    - Uses cascaded (composite) `MTCNN` model from Caffe that consists of three separate models in an OpenVino&trade; Intermediate Representation (IR)
-    - Implements `Dataloader` to load .jpg images of WIDER FACE database
-    - Implements `Metric` interface to calculate Recall metric
-    - Implements `Engine` class that is inherited from `IEEngine` to create a complex staged pipeline to sequentially execute 
-    each of the three stages of the MTCNN model, represented by multiple models in IR. It uses engine helpers to set model in 
-    OpenVino&trade; Inference Engine and process raw model output for the correct statistics collection
-    - Uses DefaultQuantization algorithm for quantization model
-
-6. [Quantizing for GNA Device](./speech/README.md)
-    - Uses models from Kaldi
-    - Implements `DataLoader` to load data in .ark format
-    - Uses DefaultQuantization algorithm for quantization model
-   
-After execution of each example above the quantized model is placed into the folder `optimized`. The accuracy validation of the quantized model is performed right after the quantization. 
-
-## See the tutorials
-* [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino)
-* [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization)
-* [Quantization of Segmentation model for medical data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize)
-* [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert)
+The Post-training Optimization Tool contains multiple examples that demonstrate how to use its :doc:`API <pot_compression_api_README>` 
+to optimize DL models. All available examples can be found on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples>`__.
+
+The following examples demonstrate the implementation of ``Engine``, ``Metric``, and ``DataLoader`` interfaces for various use cases:
+
+1. :doc:`Quantizing Image Classification model <pot_example_classification_README>`
+
+   - Uses single ``MobilenetV2`` model from TensorFlow
+   - Implements ``DataLoader`` to load .JPEG images and annotations of Imagenet database
+   - Implements ``Metric`` interface to calculate Accuracy at top-1 metric
+   - Uses DefaultQuantization algorithm for quantization model
+
+2. :doc:`Quantizing Object Detection Model with Accuracy Control <pot_example_object_detection_README>`
+
+   - Uses single ``MobileNetV1 FPN`` model from TensorFlow
+   - Implements ``Dataloader`` to load images of COCO database
+   - Implements ``Metric`` interface to calculate ``mAP@[.5:.95]`` metric
+   - Uses ``AccuracyAwareQuantization`` algorithm for quantization model
+
+3. :doc:`Quantizing Semantic Segmentation Model <pot_example_segmentation_README>`
+
+   - Uses single ``DeepLabV3`` model from TensorFlow
+   - Implements ``DataLoader`` to load .JPEG images and annotations of Pascal VOC 2012 database
+   - Implements ``Metric`` interface to calculate Mean Intersection Over Union metric
+   - Uses DefaultQuantization algorithm for quantization model
+
+4. :doc:`Quantizing 3D Segmentation Model <pot_example_3d_segmentation_README>`
+
+   - Uses single ``Brain Tumor Segmentation`` model from PyTorch
+   - Implements ``DataLoader`` to load images in NIfTI format from Medical Segmentation Decathlon BRATS 2017 database
+   - Implements ``Metric`` interface to calculate Dice Index metric
+   - Demonstrates how to use image metadata obtained during data loading to post-process the raw model output
+   - Uses DefaultQuantization algorithm for quantization model
+
+5. :doc:`Quantizing Cascaded model <pot_example_face_detection_README>`
+
+   - Uses cascaded (composite) ``MTCNN`` model from Caffe that consists of three separate models in an OpenVINO™ Intermediate Representation (IR)
+   - Implements ``Dataloader`` to load .jpg images of WIDER FACE database
+   - Implements ``Metric`` interface to calculate Recall metric
+   - Implements ``Engine`` class that is inherited from ``IEEngine`` to create a complex staged pipeline to sequentially execute each of the three stages of the MTCNN model, represented by multiple models in IR. It uses engine helpers to set model in OpenVINO Inference Engine and process raw model output for the correct statistics collection
+   - Uses DefaultQuantization algorithm for quantization model
+
+6. :doc:`Quantizing for GNA Device <pot_example_speech_README>`
+
+   - Uses models from Kaldi
+   - Implements ``DataLoader`` to load data in .ark format
+   - Uses DefaultQuantization algorithm for quantization model
+
+After execution of each example above the quantized model is placed into the folder ``optimized``. The accuracy validation of the quantized model is performed right after the quantization.
+
+See the tutorials
+####################
+
+* `Quantization of Image Classification model <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino>`__
+* `Quantization of Object Detection model from Model Zoo <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization)>`__
+* `Quantization of Segmentation model for medical data <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize>`__
+* `Quantization of BERT for Text Classification <https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert>`__
+
+@endsphinxdirective
diff --git a/tools/pot/openvino/tools/pot/api/samples/classification/README.md b/tools/pot/openvino/tools/pot/api/samples/classification/README.md
index 43f946a9897460..5de99c8152880c 100644
--- a/tools/pot/openvino/tools/pot/api/samples/classification/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/classification/README.md
@@ -1,27 +1,39 @@
 # Quantizing Image Classification Model {#pot_example_classification_README}
 
-This example demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a classification model.
-The [MobilenetV2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224) model from TensorFlow* is used for this purpose.
-A custom `DataLoader` is created to load the [ImageNet](http://www.image-net.org/) classification dataset and the implementation of Accuracy at top-1 metric is used for the model evaluation. The code of the example is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/classification).
+@sphinxdirective
 
-## How to prepare the data
+This example demonstrates the use of the :doc:`Post-training Optimization Tool API <pot_compression_api_README>` for the task of quantizing a classification model.
+The `MobilenetV2 <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224>`__ model from TensorFlow is used for this purpose.
+A custom ``DataLoader`` is created to load the `ImageNet <http://www.image-net.org/>`__ classification dataset and the implementation of Accuracy at top-1 metric is used for the model evaluation. The code of the example is available on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/classification>`__.
 
-To run this example, you need to [download](http://www.image-net.org/download-faq) the validation part of the ImageNet image database and place it in a separate folder, 
-which will be later referred as `<IMAGES_DIR>`. Annotations to images should be stored in a separate .txt file (`<IMAGENET_ANNOTATION_FILE>`) in the format `image_name label`.
+How to prepare the data
+#######################
 
+To run this example, you need to `download <https://image-net.org/download.php>`__ the validation part of the ImageNet image database and place it in a separate folder, 
+which will be later referred as ``<IMAGES_DIR>``. Annotations to images should be stored in a separate .txt file (``<IMAGENET_ANNOTATION_FILE>``) in the format ``image_name label``.
 
-## How to Run the example
 
-1. Launch [Model Downloader](@ref omz_tools_downloader) tool to download `mobilenet-v2-1.0-224` model from the Open Model Zoo repository.
-   ```sh
-   omz_downloader --name mobilenet-v2-1.0-224
-   ```
-2. Launch [Model Converter](@ref omz_tools_downloader) tool to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   omz_converter --name mobilenet-v2-1.0-224 --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
-   ```
+How to Run the example
+######################
+
+1. Launch :doc:`Model Downloader <omz_tools_downloader>` tool to download ``mobilenet-v2-1.0-224`` model from the Open Model Zoo repository.
+
+   .. code-block:: sh
+
+      omz_downloader --name mobilenet-v2-1.0-224
+
+2. Launch :doc:`Model Converter <omz_tools_downloader>` tool to generate Intermediate Representation (IR) files for the model:
+
+   .. code-block:: sh
+
+      omz_converter --name mobilenet-v2-1.0-224 --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
+
 3. Launch the example script from the example directory:
-   ```sh
-   python3 ./classification_example.py -m <PATH_TO_IR_XML> -a <IMAGENET_ANNOTATION_FILE> -d <IMAGES_DIR>
-   ```
-   Optional: you can specify .bin file of IR directly using the `-w`, `--weights` options.
+
+   .. code-block:: sh
+
+      python3 ./classification_example.py -m <PATH_TO_IR_XML> -a <IMAGENET_ANNOTATION_FILE> -d <IMAGES_DIR>
+
+   Optional: you can specify .bin file of IR directly using the ``-w``, ``--weights`` options.
+
+@endsphinxdirective
diff --git a/tools/pot/openvino/tools/pot/api/samples/face_detection/README.md b/tools/pot/openvino/tools/pot/api/samples/face_detection/README.md
index f11e0f7998581a..8b26a38b8ff88c 100644
--- a/tools/pot/openvino/tools/pot/api/samples/face_detection/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/face_detection/README.md
@@ -1,32 +1,47 @@
 # Quantizing Cascaded Face detection Model {#pot_example_face_detection_README}
 
-This example demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a face detection model.
-The [MTCNN](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mtcnn) model from Caffe* is used for this purpose.
-A custom `DataLoader` is created to load [WIDER FACE](http://shuoyang1213.me/WIDERFACE/) dataset for a face detection task 
+@sphinxdirective
+
+This example demonstrates the use of the :doc:`Post-training Optimization Tool API <pot_compression_api_README>` for the task of quantizing a face detection model.
+The `MTCNN <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mtcnn>`__ model from Caffe is used for this purpose.
+A custom ``DataLoader`` is created to load `WIDER FACE <http://shuoyang1213.me/WIDERFACE/>`__ dataset for a face detection task 
 and the implementation of Recall metric is used for the model evaluation. In addition, this example demonstrates how one can implement 
-an engine to infer a cascaded (composite) model that is represented by multiple submodels in an OpenVino&trade; Intermediate Representation (IR)
-and has a complex staged inference pipeline. The code of the example is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection).
+an engine to infer a cascaded (composite) model that is represented by multiple submodels in an OpenVINO™ Intermediate Representation (IR)
+and has a complex staged inference pipeline. The code of the example is available on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection>`__.
 
-## How to prepare the data
+How to prepare the data
+#######################
 
 To run this example, you need to download the validation part of the Wider Face dataset http://shuoyang1213.me/WIDERFACE/.
-Images with faces divided into categories are placed in the `WIDER_val/images` folder.
-Annotations in .txt format containing the coordinates of the face bounding boxes of the validation part of the dataset 
-can be downloaded separately and are located in the `wider_face_split/wider_face_val_bbx_gt.txt` file.
-
-## How to Run the example
-
-1. Launch [Model Downloader](@ref omz_tools_downloader) tool to download `mtcnn` model from the Open Model Zoo repository.
-   ```sh
-   omz_downloader --name mtcnn*
-   ```
-2. Launch [Model Converter](@ref omz_tools_downloader) tool to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   omz_converter --name mtcnn* --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
-   ```
+Images with faces divided into categories are placed in the ``WIDER_val/images`` folder. 
+Annotations in .txt format containing the coordinates of the face bounding boxes of the 
+validation part of the dataset can be downloaded separately and are located in the ``wider_face_split/wider_face_val_bbx_gt.txt`` file.
+
+How to Run the example
+######################
+
+1. Launch :doc:`Model Downloader <omz_tools_downloader>` tool to download ``mtcnn`` model from the Open Model Zoo repository.
+
+   .. code-block:: sh
+
+      omz_downloader --name mtcnn*
+
+
+2. Launch :doc:`Model Converter <omz_tools_downloader>` tool to generate Intermediate Representation (IR) files for the model:
+
+   .. code-block:: sh
+
+      omz_converter --name mtcnn* --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
+
+
 3. Launch the example script from the example directory:
-   ```sh
-   python3 ./face_detection_example.py -pm <PATH_TO_IR_XML_OF_PNET_MODEL> 
-   -rm <PATH_TO_IR_XML_OF_RNET_MODEL> -om <PATH_TO_IR_XML_OF_ONET_MODEL> -d <WIDER_val/images> -a <wider_face_split/wider_face_val_bbx_gt.txt>
-   ```
-   Optional: you can specify .bin files of corresponding IRs directly using the `-pw/--pnet-weights`, `-rw/--rnet-weights` and `-ow/--onet-weights` options.
+
+   .. code-block:: sh
+
+      python3 ./face_detection_example.py -pm <PATH_TO_IR_XML_OF_PNET_MODEL> 
+      -rm <PATH_TO_IR_XML_OF_RNET_MODEL> -om <PATH_TO_IR_XML_OF_ONET_MODEL> -d <WIDER_val/images> -a <wider_face_split/wider_face_val_bbx_gt.txt>
+
+
+   Optional: you can specify .bin files of corresponding IRs directly using the ``-pw/--pnet-weights``, ``-rw/--rnet-weights`` and ``-ow/--onet-weights`` options.
+
+@endsphinxdirective
diff --git a/tools/pot/openvino/tools/pot/api/samples/object_detection/README.md b/tools/pot/openvino/tools/pot/api/samples/object_detection/README.md
index 4ef025a4736106..c0456287bf91c9 100644
--- a/tools/pot/openvino/tools/pot/api/samples/object_detection/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/object_detection/README.md
@@ -1,26 +1,38 @@
 # Quantizing Object Detection Model with Accuracy Control {#pot_example_object_detection_README}
 
-This example demonstrates the use of the [Post-training Optimization Toolkit API](@ref pot_compression_api_README) to
- quantize an object detection model in the [accuracy-aware mode](@ref accuracy_aware_README).
-The [MobileNetV1 FPN](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_fpn_coco) model from TensorFlow for object detection task is used for this purpose.
-A custom `DataLoader` is created to load the [COCO](https://cocodataset.org/) dataset for object detection task 
-and the implementation of mAP COCO is used for the model evaluation. The code of the example is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection).
-
-## How to prepare the data
-
-To run this example, you will need to download the validation part of the [COCO](https://cocodataset.org/). The images should be placed in a separate folder, which will be later referred to as `<IMAGES_DIR>` and the annotation file `instances_val2017.json` later referred to as `<ANNOTATION_FILE>`.  
-## How to Run the example
-
-1. Launch [Model Downloader](@ref omz_tools_downloader) tool to download `ssd_mobilenet_v1_fpn_coco` model from the Open Model Zoo repository.
-   ```sh
-   omz_downloader --name ssd_mobilenet_v1_fpn_coco
-2. Launch [Model Converter](@ref omz_tools_downloader) tool to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   omz_converter --name ssd_mobilenet_v1_fpn_coco --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
-   ```
+@sphinxdirective
+
+This example demonstrates the use of the :doc:`Post-training Optimization Toolkit API <pot_compression_api_README>` to quantize an object detection model in the :doc:`accuracy-aware mode <accuracy_aware_README>`. The `MobileNetV1 FPN <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_fpn_coco>`__ model from TensorFlow for object detection task is used for this purpose. A custom ``DataLoader`` is created to load the `COCO <https://cocodataset.org/>`__ dataset for object detection task and the implementation of mAP COCO is used for the model evaluation. The code of the example is available on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection>`__.
+
+How to prepare the data
+#######################
+
+To run this example, you will need to download the validation part of the `COCO <https://cocodataset.org/>`__. The images should be placed in a separate folder, which will be later referred to as ``<IMAGES_DIR>`` and the annotation file ``instances_val2017.json`` later referred to as ``<ANNOTATION_FILE>``.
+
+How to Run the example
+######################
+
+1. Launch :doc:`Model Downloader <omz_tools_downloader>` tool to download ``ssd_mobilenet_v1_fpn_coco`` model from the Open Model Zoo repository.
+
+   .. code-block:: sh
+
+      omz_downloader --name ssd_mobilenet_v1_fpn_coco
+
+
+2. Launch :doc:`Model Converter <omz_tools_downloader>` tool to generate Intermediate Representation (IR) files for the model:
+
+   .. code-block:: sh
+
+      omz_converter --name ssd_mobilenet_v1_fpn_coco --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
+
+
 3. Launch the example script from the example directory:
-   ```sh
-   python ./object_detection_example.py -m <PATH_TO_IR_XML> -d <IMAGES_DIR> --annotation-path <ANNOTATION_FILE>
-   ```
-   
-*  Optional: you can specify .bin file of IR directly using the `-w`, `--weights` options.
+
+   .. code-block:: sh
+
+      python ./object_detection_example.py -m <PATH_TO_IR_XML> -d <IMAGES_DIR> --annotation-path <ANNOTATION_FILE>
+
+
+*  Optional: you can specify .bin file of IR directly using the ``-w``, ``--weights`` options.
+
+@endsphinxdirective
\ No newline at end of file
diff --git a/tools/pot/openvino/tools/pot/api/samples/segmentation/README.md b/tools/pot/openvino/tools/pot/api/samples/segmentation/README.md
index 9201e3062d81a2..c70be837ac1c9c 100644
--- a/tools/pot/openvino/tools/pot/api/samples/segmentation/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/segmentation/README.md
@@ -1,29 +1,39 @@
 # Quantizing Semantic Segmentation Model {#pot_example_segmentation_README}
 
-This example demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a segmentation model.
-The [DeepLabV3](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3) model from TensorFlow* is used for this purpose.
-A custom `DataLoader` is created to load the [Pascal VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) dataset for semantic segmentation task 
-and the implementation of Mean Intersection Over Union metric is used for the model evaluation. The code of the example is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/segmentation).
+This example demonstrates the use of the :doc:`Post-training Optimization Tool API <pot_compression_api_README>` for the task of quantizing a segmentation model.
+The `DeepLabV3 <https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3>` model from TensorFlow is used for this purpose.
+A custom `DataLoader` is created to load the `Pascal VOC 2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`__ dataset for semantic segmentation task 
+and the implementation of Mean Intersection Over Union metric is used for the model evaluation. The code of the example is available on `GitHub <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/segmentation>`__.
 
-## How to prepare the data
+How to prepare the data
+#######################
 
 To run this example, you will need to download the validation part of the Pascal VOC 2012 image database http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#data.
-Images are placed in the `JPEGImages` folder, ImageSet file with the list of image names for the segmentation task can be found at `ImageSets/Segmentation/val.txt` 
-and segmentation masks are kept in the `SegmentationClass` directory.
+Images are placed in the ``JPEGImages`` folder, ImageSet file with the list of image names for the segmentation task can be found at ``ImageSets/Segmentation/val.txt`` 
+and segmentation masks are kept in the ``SegmentationClass`` directory.
 
+How to Run the example
+######################
+
+1. Launch :doc:`Model Downloader <omz_tools_downloader>` tool to download ``deeplabv3`` model from the Open Model Zoo repository.
+
+   .. code-block:: sh
+
+      omz_downloader --name deeplabv3
+
+
+2. Launch :doc:`Model Converter <omz_tools_downloader>` tool to generate Intermediate Representation (IR) files for the model:
+
+   .. code-block:: sh
+
+      omz_converter --name deeplabv3 --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
 
-## How to Run the example
 
-1. Launch [Model Downloader](@ref omz_tools_downloader) tool to download `deeplabv3` model from the Open Model Zoo repository.
-   ```sh
-   omz_downloader --name deeplabv3
-   ```
-2. Launch [Model Converter](@ref omz_tools_downloader) tool to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   omz_converter --name deeplabv3 --mo <PATH_TO_MODEL_OPTIMIZER>/mo.py
-   ```
 3. Launch the example script from the example directory:
-   ```sh
-   python3 ./segmentation_example.py -m <PATH_TO_IR_XML> -d <VOCdevkit/VOC2012/JPEGImages> --imageset-file <VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt> --mask-dir <VOCdevkit/VOC2012/SegmentationClass>
-   ```
-   Optional: you can specify .bin file of IR directly using the `-w`, `--weights` options.
+
+   .. code-block:: sh
+
+      python3 ./segmentation_example.py -m <PATH_TO_IR_XML> -d <VOCdevkit/VOC2012/JPEGImages> --imageset-file <VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt> --mask-dir <VOCdevkit/VOC2012/SegmentationClass>
+
+
+   Optional: you can specify .bin file of IR directly using the ``-w``, ``--weights`` options.
diff --git a/tools/pot/openvino/tools/pot/api/samples/speech/README.md b/tools/pot/openvino/tools/pot/api/samples/speech/README.md
index 1f237e97974d98..d88f68bce68b7c 100644
--- a/tools/pot/openvino/tools/pot/api/samples/speech/README.md
+++ b/tools/pot/openvino/tools/pot/api/samples/speech/README.md
@@ -1,33 +1,46 @@
 # Quantizing for GNA Device {#pot_example_speech_README}
 
-This example demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a speech model for [GNA](@ref openvino_docs_OV_UG_supported_plugins_GNA) device. 
-Quantization for GNA is different from CPU quantization due to device specific: GNA supports quantized inputs in INT16 and INT32 (for activations) precision and quantized weights in INT8 and INT16 precision.
+@sphinxdirective
 
-This example contains pre-selected quantization options based on the DefaultQuantization algorithm and created for models from [Kaldi](http://kaldi-asr.org/doc/) framework, and its data format.
-A custom `ArkDataLoader` is created to load the dataset from files with .ark extension for speech analysis task. 
+This example demonstrates the use of the :doc:`Post-training Optimization Tool API <pot_compression_api_README>` for the task of quantizing a speech model for :doc:`GNA <openvino_docs_OV_UG_supported_plugins_GNA>` device. Quantization for GNA is different from CPU quantization due to device specific: GNA supports quantized inputs in INT16 and INT32 (for activations) precision and quantized weights in INT8 and INT16 precision.
 
-## How to prepare the data
+This example contains pre-selected quantization options based on the DefaultQuantization algorithm and created for models from `Kaldi <http://kaldi-asr.org/doc/>`__ framework, and its data format.
+A custom ``ArkDataLoader`` is created to load the dataset from files with .ark extension for speech analysis task.
 
-To run this example, you will need to use the .ark files for each model input from your `<DATA_FOLDER>`.
-For generating data from original formats to .ark, please, follow the [Kaldi data preparation tutorial](https://kaldi-asr.org/doc/data_prep.html).
+How to prepare the data
+#######################
+
+To run this example, you will need to use the .ark files for each model input from your ``<DATA_FOLDER>``.
+For generating data from original formats to .ark, please, follow the `Kaldi data preparation tutorial <https://kaldi-asr.org/doc/data_prep.html>`__.
+
+How to Run the example
+######################
+
+1. Launch :doc:`Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>` with the necessary options (for details follow the :doc:`instructions for Kaldi <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi>` to generate Intermediate Representation (IR) files for the model:
+
+   .. code-block:: sh
+
+      mo --input_model <PATH_TO_KALDI_MODEL> [MODEL_OPTIMIZER_OPTIONS]
 
-## How to Run the example
 
-1. Launch [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) with the necessary options (for details follow the [instructions for Kaldi](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi) to generate Intermediate Representation (IR) files for the model:
-   ```sh
-   mo --input_model <PATH_TO_KALDI_MODEL> [MODEL_OPTIMIZER_OPTIONS]
-   ```
 2. Launch the example script:
-   ```sh
-   python3 <POT_DIR>/api/examples/speech/gna_example.py -m <PATH_TO_IR_XML> -w <PATH_TO_IR_BIN> -d <DATA_FOLDER> --input_names [LIST_OF_MODEL_INPUTS] --files_for_input [LIST_OF_INPUT_FILES]
-   ```
+
+   .. code-block:: sh
+
+      python3 <POT_DIR>/api/examples/speech/gna_example.py -m <PATH_TO_IR_XML> -w <PATH_TO_IR_BIN> -d <DATA_FOLDER> --input_names [LIST_OF_MODEL_INPUTS] --files_for_input [LIST_OF_INPUT_FILES]
+
+
    Required parameters:
-   - `-i`, `--input_names` option. Defines list of model inputs;
-   - `-f`, `--files_for_input` option. Defines list of filenames (.ark) mapped with input names. You should define names without extension, for example: FILENAME_1, FILENAME_2 maps with INPUT_1, INPUT_2.  
-  
-  
+
+   - ``-i``, ``--input_names`` option. Defines list of model inputs;
+   - ``-f``, ``--files_for_input`` option. Defines list of filenames (.ark) mapped with input names. You should define names without extension, for example: FILENAME_1, FILENAME_2 maps with INPUT_1, INPUT_2.
+
    Optional parameters:
-    - `-p`, `--preset` option. Defines preset for quantization: `performance` for INT8 weights, `accuracy` for INT16 weights;
-    - `-s`, `--subset_size` option. Defines subset size for calibration;
-    - `-o`, `--output` option. Defines output folder for quantized model.
-3. Validate your INT8 model using `./speech_example` from the Inference Engine examples. Follow the [speech example description link](@ref openvino_inference_engine_samples_speech_sample_README) for details.
+
+    - ``-p``, ``--preset`` option. Defines preset for quantization: ``performance`` for INT8 weights, ``accuracy`` for INT16 weights;
+    - ``-s``, ``--subset_size`` option. Defines subset size for calibration;
+    - ``-o``, ``--output`` option. Defines output folder for quantized model.
+
+3. Validate your INT8 model using ``./speech_example`` from the Inference Engine examples. Follow the :doc:`speech example description link <openvino_inference_engine_samples_speech_sample_README>` for details.
+
+@endsphinxdirective

From 02b35d7984c4e01d0c8574d2eb8ad6032ae09152 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 08:29:10 +0200
Subject: [PATCH 215/296] DOCS shift to rst - POT CLI Example (#16649)

---
 tools/pot/docs/E2eExample.md | 328 ++++++++++++++++++++---------------
 1 file changed, 186 insertions(+), 142 deletions(-)

diff --git a/tools/pot/docs/E2eExample.md b/tools/pot/docs/E2eExample.md
index 775ee527b4a404..8170e8edbcc70f 100644
--- a/tools/pot/docs/E2eExample.md
+++ b/tools/pot/docs/E2eExample.md
@@ -1,178 +1,222 @@
 # End-to-end Command-line Interface Example {#pot_configs_examples_README}
 
+@sphinxdirective
+
 This tutorial describes an example of running post-training quantization for **MobileNet v2 model from PyTorch** framework, 
 particularly by the DefaultQuantization algorithm.
 The example covers the following steps:
+
 - Environment setup
-- Model preparation and converting it to the OpenVINO™ Intermediate Representation (IR) format 
+- Model preparation and converting it to the OpenVINO™ Intermediate Representation (IR) format
 - Performance benchmarking of the original full-precision model
 - Dataset preparation
 - Accuracy validation of the full-precision model in the IR format
 - Model quantization by the DefaultQuantization algorithm and accuracy validation of the quantized model
 - Performance benchmarking of the quantized model
 
-All the steps are based on the tools and samples of configuration files 
-distributed with the Intel&reg; Distribution of OpenVINO&trade; toolkit. 
+All the steps are based on the tools and samples of configuration files distributed with the Intel® Distribution of OpenVINO™ toolkit.
+
+The example has been verified in Ubuntu 18.04 Operating System with Python 3.6 installed.
+
+In case of issues while running the example, refer to :doc:`POT Frequently Asked Questions <pot_docs_FrequentlyAskedQuestions>` for help.
+
+Model Preparation
+####################
+
+1. Navigate to ``<EXAMPLE_DIR>``.
+
+2. Download the MobileNet v2 PyTorch model using :doc:`Model Downloader <omz_tools_downloader>` tool from the Open Model Zoo repository:
+
+   .. code-block:: sh
 
-The example has been verified in Ubuntu 18.04 Operating System with Python 3.6 installed. 
+      omz_downloader --name mobilenet-v2-pytorch
 
-In case of issues while running the example, refer to [POT Frequently Asked Questions](@ref pot_docs_FrequentlyAskedQuestions) for help.
 
-## Model Preparation
+   After that the original full-precision model is located in ``<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/``.
 
-1. Navigate to `<EXAMPLE_DIR>`.
+3. Convert the model to the OpenVINO™ Intermediate Representation (IR) format using :doc:`Model Converter <omz_tools_downloader>` tool:
 
-2. Download the MobileNet v2 PyTorch model using [Model Downloader](@ref omz_tools_downloader) tool from the Open Model Zoo repository:
-   ```sh
-   omz_downloader --name mobilenet-v2-pytorch
-   ```
-   After that the original full-precision model is located in `<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/`.
+   .. code-block:: sh
 
-3. Convert the model to the OpenVINO™ Intermediate Representation (IR) format using [Model Converter](@ref omz_tools_downloader) tool:
-   ```sh
-   omz_converter --name mobilenet-v2-pytorch
-   ```
-   After that the full-precision model in the IR format is located in `<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/FP32/`.
-   
-For more information about the Model Optimizer, refer to its [documentation](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide).
+      omz_converter --name mobilenet-v2-pytorch
 
-## Performance Benchmarking of Full-Precision Models
 
-Check the performance of the full-precision model in the IR format using [Deep Learning Benchmark](@ref openvino_inference_engine_tools_benchmark_tool_README) tool:
-   ```sh
+   After that the full-precision model in the IR format is located in ``<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/FP32/``.
+
+For more information about the Model Optimizer, refer to its :doc:`documentation <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
+
+Performance Benchmarking of Full-Precision Models
+#################################################
+
+Check the performance of the full-precision model in the IR format using :doc:`Deep Learning Benchmark <openvino_inference_engine_tools_benchmark_tool_README>` tool:
+
+.. code-block:: sh
+
    benchmark_app -m <EXAMPLE_DIR>/public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml
-   ```
-   Note that the results might be different dependently on characteristics of your machine. On a machine with Intel&reg; Core&trade; i9-10920X CPU @ 3.50GHz it is like:
-   ```sh
+
+Note that the results might be different dependently on characteristics of your machine. On a machine with Intel® Core™ i9-10920X CPU @ 3.50GHz it is like:
+
+.. code-block:: sh
+
    Latency:    4.14 ms
    Throughput: 1436.55 FPS
-   ```
 
-## Dataset Preparation
+
+Dataset Preparation
+####################
 
 To perform the accuracy validation as well as quantization of a model, the dataset should be prepared. This example uses a real dataset called ImageNet. 
 
 To download images:
 
-1. Go to the [ImageNet](http://www.image-net.org/) homepage.
-2. If you do not have an account, click the `Signup` button in the right upper corner, provide your data, and wait for a confirmation email.
-3. Log in after receiving the confirmation email or if you already have an account. Go to the `Download` tab.
-4. Select `Download Original Images`.
-5. You will be redirected to the `Terms of Access` page. If you agree to the Terms, continue by clicking `Agree and Sign`.
-6. Click one of the links in the `Download as one tar file` section.
-7. Unpack the downloaded archive into `<EXAMPLE_DIR>/ImageNet/`.
+1. Go to the `ImageNet <http://www.image-net.org/>`__ homepage.
+2. If you do not have an account, click the ``Signup`` button in the right upper corner, provide your data, and wait for a confirmation email.
+3. Log in after receiving the confirmation email or if you already have an account. Go to the ``Download`` tab.
+4. Select ``Download Original Images``.
+5. You will be redirected to the ``Terms of Access`` page. If you agree to the Terms, continue by clicking ``Agree and Sign``.
+6. Click one of the links in the ``Download as one tar file`` section.
+7. Unpack the downloaded archive into ``<EXAMPLE_DIR>/ImageNet/``.
+
+Note that the registration process might be quite long.
 
-Note that the registration process might be quite long.  
 Note that the ImageNet size is 50 000 images and takes around 6.5 GB of the disk space.
 
 To download the annotation file:
 
-  1. Download [archive](http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz).
-  2. Unpack `val.txt` from the archive into `<EXAMPLE_DIR>/ImageNet/`.
-
-After that the `<EXAMPLE_DIR>/ImageNet/` dataset folder should have a lot of image files like `ILSVRC2012_val_00000001.JPEG` and the `val.txt` annotation file.
-
-## Accuracy Validation of Full-Precision Model in IR Format
-
-1. Create a new file in `<EXAMPLE_DIR>` and name it `mobilenet_v2_pytorch.yaml`. This is the Accuracy Checker configuration file. 
-
-2. Put the following text into `mobilenet_v2_pytorch.yaml`:
-```sh
-models:
-  - name: mobilenet-v2-pytorch
-
-    launchers:
-      - framework: dlsdk
-        device: CPU
-        adapter: classification
-
-    datasets:
-      - name: classification_dataset
-        data_source: ./ImageNet
-        annotation_conversion:
-          converter: imagenet
-          annotation_file: ./ImageNet/val.txt
-        reader: pillow_imread
-
-        preprocessing:
-          - type: resize
-            size: 256
-            aspect_ratio_scale: greater
-            use_pillow: True
-          - type: crop
-            size: 224
-            use_pillow: True
-          - type: bgr_to_rgb
-
-        metrics:
-          - name: accuracy@top1
-            type: accuracy
-            top_k: 1
-
-          - name: accuracy@top5
-            type: accuracy
-            top_k: 5
-
-```
-where `data_source: ./ImageNet` is the dataset and `annotation_file: ./ImageNet/val.txt` is the annotation file prepared on the previous step. 
-For more information about the Accuracy Checker configuration file refer to [Accuracy Checker Tool documentation](@ref omz_tools_accuracy_checker).
-
-3. Evaluate the accuracy of the full-precision model in the IR format by executing the following command in `<EXAMPLE_DIR>`:
-   ```sh
-   accuracy_check -c mobilenet_v2_pytorch.yaml -m ./public/mobilenet-v2-pytorch/FP32/
-   ```
-   The actual result should be like **71.81**% of the accuracy top-1 metric on VNNI based CPU.   
+1. Download `archive <http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz>`__.
+2. Unpack ``val.txt`` from the archive into ``<EXAMPLE_DIR>/ImageNet/``.
+
+After that the ``<EXAMPLE_DIR>/ImageNet/`` dataset folder should have a lot of image files like ``ILSVRC2012_val_00000001.JPEG`` and the ``val.txt`` annotation file.
+
+Accuracy Validation of Full-Precision Model in IR Format
+########################################################
+
+1. Create a new file in ``<EXAMPLE_DIR>`` and name it ``mobilenet_v2_pytorch.yaml``. This is the Accuracy Checker configuration file.
+
+2. Put the following text into ``mobilenet_v2_pytorch.yaml`` :
+
+   .. code-block:: sh
+
+      models:
+        - name: mobilenet-v2-pytorch
+
+          launchers:
+            - framework: dlsdk
+              device: CPU
+              adapter: classification
+
+          datasets:
+            - name: classification_dataset
+              data_source: ./ImageNet
+              annotation_conversion:
+                converter: imagenet
+                annotation_file: ./ImageNet/val.txt
+              reader: pillow_imread
+
+              preprocessing:
+                - type: resize
+                  size: 256
+                  aspect_ratio_scale: greater
+                  use_pillow: True
+                - type: crop
+                  size: 224
+                  use_pillow: True
+                - type: bgr_to_rgb
+
+              metrics:
+                - name: accuracy@top1
+                  type: accuracy
+                  top_k: 1
+
+                - name: accuracy@top5
+                  type: accuracy
+                  top_k: 5
+
+
+   where ``data_source: ./ImageNet`` is the dataset and ``annotation_file: ./ImageNet/val.txt`` 
+   is the annotation file prepared on the previous step. For more information about 
+   the Accuracy Checker configuration file refer to :doc:`Accuracy Checker Tool documentation <omz_tools_accuracy_checker>`.
+
+3. Evaluate the accuracy of the full-precision model in the IR format by executing the following command in ``<EXAMPLE_DIR>`` :
+
+   .. code-block:: sh
+
+      accuracy_check -c mobilenet_v2_pytorch.yaml -m ./public/mobilenet-v2-pytorch/FP32/
+
+
+   The actual result should be like **71.81%** of the accuracy top-1 metric on VNNI based CPU.
    Note that the results might be different on CPUs with different instruction sets.
-   
-## Model Quantization
-
-1. Create a new file in `<EXAMPLE_DIR>` and name it `mobilenet_v2_pytorch_int8.json`. This is the POT configuration file. 
-
-2. Put the following text into `mobilenet_v2_pytorch_int8.json`:
-```sh
-{
-    "model": {
-        "model_name": "mobilenet-v2-pytorch",
-        "model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml",
-        "weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"
-    },
-    "engine": {
-        "config": "./mobilenet_v2_pytorch.yaml"
-    },
-    "compression": {
-        "algorithms": [
-            {
-                "name": "DefaultQuantization",
-                "params": {
-                    "preset": "mixed",
-                    "stat_subset_size": 300
-                }
-            }
-        ]
-    }
-}
-```
-where `"model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml"` and `"weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"` 
-specify the full-precision model in the IR format, `"config": "./mobilenet_v2_pytorch.yaml"` is the Accuracy Checker configuration file, and 
-`"name": "DefaultQuantization"` is the algorithm name.
-
-3. Perform model quantization by executing the following command in `<EXAMPLE_DIR>`: 
-   ```sh
-   pot -c mobilenet_v2_pytorch_int8.json -e
-   ```
-   The quantized model is placed into the subfolder with your current date and time in the name under the `./results/mobilenetv2_DefaultQuantization/` directory.
-   The accuracy validation of the quantized model is performed right after the quantization. The actual result should be like **71.556**% of the accuracy top-1 metric on VNNI based CPU.   
+
+
+Model Quantization
+####################
+
+1. Create a new file in ``<EXAMPLE_DIR>`` and name it ``mobilenet_v2_pytorch_int8.json``. This is the POT configuration file.
+
+2. Put the following text into ``mobilenet_v2_pytorch_int8.json`` :
+
+   .. code-block:: sh
+
+      {
+          "model": {
+              "model_name": "mobilenet-v2-pytorch",
+              "model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml",
+              "weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"
+          },
+          "engine": {
+              "config": "./mobilenet_v2_pytorch.yaml"
+          },
+          "compression": {
+              "algorithms": [
+                  {
+                      "name": "DefaultQuantization",
+                      "params": {
+                          "preset": "mixed",
+                          "stat_subset_size": 300
+                      }
+                  }
+              ]
+          }
+      }
+
+
+   where ``"model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml"`` and 
+   ``"weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"`` specify 
+   the full-precision model in the IR format, ``"config": "./mobilenet_v2_pytorch.yaml"`` 
+   is the Accuracy Checker configuration file, and  ``"name": "DefaultQuantization"`` is the algorithm name.
+
+3. Perform model quantization by executing the following command in ``<EXAMPLE_DIR>`` :
+
+   .. code-block:: sh
+
+      pot -c mobilenet_v2_pytorch_int8.json -e
+
+
+   The quantized model is placed into the subfolder with your current date and time 
+   in the name under the ``./results/mobilenetv2_DefaultQuantization/`` directory.
+   The accuracy validation of the quantized model is performed right after the quantization. 
+   The actual result should be like **71.556%** of the accuracy top-1 metric on VNNI based CPU.
    Note that the results might be different on CPUs with different instruction sets.
-   
-## Performance Benchmarking of Quantized Model
-
-Check the performance of the quantized model using [Deep Learning Benchmark](@ref openvino_inference_engine_tools_benchmark_tool_README) tool:
-```sh
-benchmark_app -m <INT8_MODEL>
-```
-where `<INT8_MODEL>` is the path to the quantized model.  
-Note that the results might be different dependently on characteristics of your machine. On a machine with Intel&reg; Core&trade; i9-10920X CPU @ 3.50GHz it is like:
-```sh
-Latency:    1.54 ms
-Throughput: 3814.18 FPS
-```
+
+
+Performance Benchmarking of Quantized Model
+###########################################
+
+Check the performance of the quantized model using :doc:`Deep Learning Benchmark <openvino_inference_engine_tools_benchmark_tool_README>` tool:
+
+.. code-block:: sh
+
+   benchmark_app -m <INT8_MODEL>
+
+
+where ``<INT8_MODEL>`` is the path to the quantized model.
+Note that the results might be different dependently on characteristics of your 
+machine. On a machine with Intel® Core™ i9-10920X CPU @ 3.50GHz it is like:
+
+.. code-block:: sh
+
+   Latency:    1.54 ms
+   Throughput: 3814.18 FPS
+
+
+@endsphinxdirective

From b2e4857a6435076ed6cf32b6746a301139c9e519 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 08:29:40 +0200
Subject: [PATCH 216/296] DOCS shift to rst - AccuracyAwareQuantization
 Parameters (#16663)

---
 .../quantization/accuracy_aware/README.md     | 120 +++++++++++-------
 1 file changed, 72 insertions(+), 48 deletions(-)

diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md
index 7db5a842519540..6973f7c0f7f543 100644
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md
@@ -1,63 +1,87 @@
 # AccuracyAwareQuantization Parameters {#accuracy_aware_README}
 
-## Introduction
-Accuracy-aware Quantization algorithm is aimed at accurate quantization and allows the model's accuracy to stay within the 
-pre-defined range. This may cause a degradation in performance in comparison to [Default Quantization](../default/README.md) algorithm because some layers can be reverted back to the original precision.
-
-## Parameters
-Since the [Default Quantization](../default/README.md) algorithm is used as an initialization, all its parameters are also valid and can be specified. Here is an example of the definition of the Accuracy-aware Quantization method and its parameters:
-```json
-{
-    "name": "AccuracyAwareQuantization", // the name of optimization algorithm 
-    "params": {
-        ...
-    }
-}
-```
+@sphinxdirective
+
+Introduction
+####################
+
+Accuracy-aware Quantization algorithm is aimed at accurate quantization and allows the model's 
+accuracy to stay within the pre-defined range. This may cause a degradation in performance 
+in comparison to :ref:`Default Quantization <doxid-pot-compression-algorithms-quantization-default-r-e-a-d-m-e>` 
+algorithm because some layers can be reverted back to the original precision.
+
+Parameters
+####################
+
+Since the :doc:`Default Quantization <pot_compression_algorithms_quantization_default_README>` 
+algorithm is used as an initialization, all its parameters are also valid and can be specified. 
+Here is an example of the definition of the Accuracy-aware Quantization method and its parameters:
+
+.. code-block:: js
+
+   {
+       "name": "AccuracyAwareQuantization", // the name of optimization algorithm 
+       "params": {
+           ...
+       }
+   }
+
 
 Below are the descriptions of AccuracyAwareQuantization-specific parameters:
-- `"ranking_subset_size"` - size of a subset that is used to rank layers by their contribution to the accuracy drop. 
-Default value is `300`, and more samples it has the better ranking, potentially.
-- `"max_iter_num"` - the maximum number of iterations of the algorithm. In other words, the maximum number of layers that may
- be reverted back to floating-point precision. By default, it is limited by the overall number of quantized layers.
-- `"maximal_drop"` - the maximum accuracy drop which has to be achieved after the quantization. The default value is `0.01` (1%).
-- `"drop_type"` - a drop type of the accuracy metric: 
-    - `"absolute"` - the (default) absolute drop with respect to the results of the full-precision model.
-    - `"relative"` - relative to the results of the full-precision model.
-- `"use_prev_if_drop_increase"` - the use of network snapshot from the previous iteration when a drop 
-increases. The default value is `True`.
-- `"base_algorithm"` - name of the algorithm that is used to quantize a model at the beginning. The default value is 
-    "DefaultQuantization".
-- `"convert_to_mixed_preset"` - set to convert the model to "mixed" mode if the accuracy criteria for the model
- quantized with "performance" preset are not satisfied. This option can help to reduce number of layers that are reverted
- to floating-point precision. Keep in mind that this is an **experimental** feature.
-- `"metrics"` - an optional list of metrics that are taken into account during optimization. It consists of tuples with the 
-following parameters:
-    - `"name"` - name of the metric to optimize.
-    - `"baseline_value"` - (optional parameter) a baseline metric value of the original model. The validations on
-    The validation will be initiated entirely in the beginning if nothing specified.
-- `"metric_subset_ratio"` - a part of the validation set that is used to compare original full-precision and 
-fully quantized models when creating a ranking subset in case of predefined metric values of the original model.
-The default value is `0.5`.
-- `"tune_hyperparams"` - enables tuning of quantization parameters as a preliminary step before reverting layers back
-to the floating-point precision. It can bring an additional boost in performance and accuracy, at the cost of increased overall 
-quantization time. The default value is `False`.
-
-## Additional Resources
+
+- ``"ranking_subset_size"`` - size of a subset that is used to rank layers by their 
+  contribution to the accuracy drop. Default value is ``300``, and more samples it 
+  has the better ranking, potentially.
+- ``"max_iter_num"`` - the maximum number of iterations of the algorithm. In other 
+  words, the maximum number of layers that may be reverted back to floating-point 
+  precision. By default, it is limited by the overall number of quantized layers.
+- ``"maximal_drop"`` - the maximum accuracy drop which has to be achieved after the 
+  quantization. The default value is ``0.01`` (1%).
+- ``"drop_type"`` - a drop type of the accuracy metric:
+
+  - ``"absolute"`` - the (default) absolute drop with respect to the results of the full-precision model.
+  - ``"relative"`` - relative to the results of the full-precision model.
+
+- ``"use_prev_if_drop_increase"`` - the use of network snapshot from the previous iteration when a drop 
+  increases. The default value is ``True``.
+- ``"base_algorithm"`` - name of the algorithm that is used to quantize a model at the 
+  beginning. The default value is "DefaultQuantization".
+- ``"convert_to_mixed_preset"`` - set to convert the model to "mixed" mode if the accuracy 
+  criteria for the modelquantized with "performance" preset are not satisfied. 
+  This option can help to reduce number of layers that are reverted to floating-point 
+  precision.Keep in mind that this is an **experimental** feature.
+- ``"metrics"`` - an optional list of metrics that are taken into account during optimization. 
+  It consists of tuples with the following parameters:
+
+  - ``"name"`` - name of the metric to optimize.
+  - ``"baseline_value"`` - (optional parameter) a baseline metric value of the original 
+    model. The validations onThe validation will be initiated entirely in the beginning if nothing specified.
+
+- ``"metric_subset_ratio"`` - a part of the validation set that is used to compare 
+  original full-precision and fully quantized models when creating a ranking subset 
+  in case of predefined metric values of the original model. The default value is ``0.5``.
+- ``"tune_hyperparams"`` - enables tuning of quantization parameters as a preliminary 
+  step before reverting layers back to the floating-point precision. It can bring 
+  an additional boost in performance and accuracy, at the cost of increased overall 
+  quantization time. The default value is ``False``.
+
+Additional Resources
+####################
 
 Example:
- * [Quantization of Object Detection model with control of accuracy](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection)
 
- A template and full specification for AccuracyAwareQuantization algorithm for POT command-line interface:
- * [Template](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/openvino/tools/pot/configs/templates/accuracy_aware_quantization_template.json)
- * [Full specification](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/accuracy_aware_quantization_spec.json)
+* `Quantization of Object Detection model with control of accuracy <https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection>`__
+
+A template and full specification for AccuracyAwareQuantization algorithm for POT command-line interface:
+
+* `Template <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/openvino/tools/pot/configs/templates/accuracy_aware_quantization_template.json>`__
+* `Full specification <https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/accuracy_aware_quantization_spec.json>`__
 
-  @sphinxdirective
 
 .. dropdown:: Template
 
    .. code-block:: javascript
-      
+
         /* This configuration file is the fastest way to get started with the accuracy aware
         quantization algorithm. It contains only mandatory options with commonly used
         values. All other options can be considered as an advanced mode and requires

From f4fca2d57861455fa407c54cbc3419eb4479b57c Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Mon, 3 Apr 2023 11:38:27 +0400
Subject: [PATCH 217/296] [TF FE] Activate TopK layer test with the second
 output in the pre-commit (#16691)

* [TF FE] Test the second output for TopK operation

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Switch off no sorted case

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
index a061a357b10cc4..ece6f08471a643 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py
@@ -36,9 +36,9 @@ def create_topk_v2_net(self, input_shape, input_type, k, sorted, is_first_output
         dict(input_shape=[10], input_type=tf.float32, k=5, sorted=True, is_first_output=True, is_second_output=False),
         dict(input_shape=[2, 3, 10], input_type=tf.int32, k=10, sorted=True, is_first_output=True,
              is_second_output=False),
-        # Expect stable mode support by the CPU plugin. See 101503
-        pytest.param(dict(input_shape=[4, 12], input_type=tf.float32, k=10, sorted=True, is_first_output=True,
-                          is_second_output=True), marks=pytest.mark.xfail(reason="101503")),
+        dict(input_shape=[4, 12], input_type=tf.float32, k=10, sorted=True, is_first_output=True,
+             is_second_output=True),
+        # Expect stable mode implementation for sort_type=indices in OpenVINO. See 101503
         pytest.param(dict(input_shape=[5, 10], input_type=tf.int32, k=8, sorted=False, is_first_output=True,
                           is_second_output=True), marks=pytest.mark.xfail(reason="101503")),
     ]

From 44330b22bddbef6eca297a3c639d6531e3cb0f09 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Mon, 3 Apr 2023 12:09:18 +0400
Subject: [PATCH 218/296] Update open_model_zoo submodule (#16678)

Upgrade onnx to 1.13. Ticket 102716
---
 thirdparty/open_model_zoo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo
index 8fb7a04f6257d2..ec394493f50366 160000
--- a/thirdparty/open_model_zoo
+++ b/thirdparty/open_model_zoo
@@ -1 +1 @@
-Subproject commit 8fb7a04f6257d274676a5ab308acf89c23cf6971
+Subproject commit ec394493f5036642f3f02045ebe8d00e56d82b7e

From 016d36f03259531c69a46efee58252144eeea13b Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Mon, 3 Apr 2023 12:10:12 +0400
Subject: [PATCH 219/296] Add `mmap` notes in dldt_depl_optimization_latency.md
 (#16682)

---
 docs/optimization_guide/dldt_deployment_optimization_latency.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/optimization_guide/dldt_deployment_optimization_latency.md b/docs/optimization_guide/dldt_deployment_optimization_latency.md
index 66a5818f7d0ea9..a8702a539e237a 100644
--- a/docs/optimization_guide/dldt_deployment_optimization_latency.md
+++ b/docs/optimization_guide/dldt_deployment_optimization_latency.md
@@ -38,6 +38,8 @@ For example, when the model is used exactly once, or when it is unloaded and rel
 Such a "first-inference latency" scenario may pose an additional limitation on the model load\compilation time, as inference accelerators (other than the CPU) usually require a certain level of model compilation upon loading.
 The :doc:`model caching <openvino_docs_OV_UG_Model_caching_overview>` option is a way to lessen the impact over multiple application runs. If model caching is not possible, for example, it may require write permissions for the application, the CPU offers the fastest model load time almost every time. 
 
+To improve common "first-inference latency" scenario, model reading was replaced with model mapping (using `mmap`) into a memory. But in some use cases (first of all, if model is located on removable or network drive) mapping may lead to latency increase. To switch mapping to reading, specify ``ov::enable_mmap(false)`` property for the ``ov::Core``.
+
 Another way of dealing with first-inference latency is using the :doc:`AUTO device selection inference mode <openvino_docs_OV_UG_supported_plugins_AUTO>`. It starts inference on the CPU, while waiting for the actual accelerator to load the model. At that point, it shifts to the new device seamlessly.
 
 Finally, note that any :doc:`throughput-oriented options <openvino_docs_deployment_optimization_guide_tput>` may significantly increase the model uptime.

From e7c1cdf9821db81581963b9fc64af2366b49ffda Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Mon, 3 Apr 2023 11:41:28 +0200
Subject: [PATCH 220/296] DOCS shift to rst (#16686)

---
 .../Convert_Model_From_PyTorch.md             |   8 +-
 .../kaldi_specific/Aspire_Tdnn_Model.md       | 205 ++++++-----
 .../mxnet_specific/Convert_GluonCV_Models.md  |  59 ++--
 .../Convert_Style_Transfer_From_MXNet.md      | 262 ++++++++------
 .../pytorch_specific/Convert_Bert_ner.md      | 104 +++---
 .../Convert_Cascade_RCNN_res101.md            |  45 ++-
 .../pytorch_specific/Convert_F3Net.md         |  60 ++--
 .../pytorch_specific/Convert_QuartzNet.md     |  59 ++--
 .../pytorch_specific/Convert_RCAN.md          |  47 ++-
 .../pytorch_specific/Convert_RNNT.md          | 195 ++++++-----
 .../pytorch_specific/Convert_YOLACT.md        | 327 ++++++++++--------
 11 files changed, 786 insertions(+), 585 deletions(-)

diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
index c05f15314d4fa6..77d31bd1514bd1 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
@@ -4,10 +4,10 @@
 
 The PyTorch framework is supported through export to the ONNX format. In order to optimize and deploy a model that was trained with it:
 
-1. `Export a PyTorch model to ONNX <#Exporting-a-PyTorch-Model-to-ONNX-Format>`__.
+1. `Export a PyTorch model to ONNX <#exporting-a-pytorch-model-to-onnx-format>`__.
 2. :doc:`Convert the ONNX model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX>` to produce an optimized :doc:`Intermediate Representation <openvino_docs_MO_DG_IR_and_opsets>` of the model based on the trained network topology, weights, and biases values.
 
-Exporting a PyTorch Model to ONNX Format 
+Exporting a PyTorch Model to ONNX Format
 ########################################
 
 PyTorch models are defined in Python. To export them, use the ``torch.onnx.export()`` method. The code to
@@ -17,7 +17,7 @@ For more information, refer to the `Exporting PyTorch models to ONNX format <htt
 
 To export a PyTorch model, you need to obtain the model as an instance of ``torch.nn.Module`` class and call the ``export`` function.
 
-.. code-block:: python
+.. code-block:: py
 
    import torch
 
@@ -32,7 +32,7 @@ To export a PyTorch model, you need to obtain the model as an instance of ``torc
 
 
 Known Issues
-############
+####################
 
 As of version 1.8.1, not all PyTorch operations can be exported to ONNX opset 9 which is used by default.
 It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use ``opset_version`` option of the ``torch.onnx.export``. For more information about ONNX opset, refer to the `Operator Schemas <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`__ page.
diff --git a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
index f02c5e54bf72be..7292a1ac3089da 100644
--- a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
@@ -1,110 +1,145 @@
 # Converting a Kaldi ASpIRE Chain Time Delay Neural Network (TDNN) Model {#openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model}
 
-At the beginning, you should [download a pre-trained model](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz)
+@sphinxdirective
+
+At the beginning, you should `download a pre-trained model <https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz>`__
 for the ASpIRE Chain Time Delay Neural Network (TDNN) from the Kaldi project official website.
 
-## Converting an ASpIRE Chain TDNN Model to IR
+Converting an ASpIRE Chain TDNN Model to IR
+###########################################
 
 Generate the Intermediate Representation of the model by running Model Optimizer with the following parameters:
-```sh
- mo --input_model exp/chain/tdnn_7b/final.mdl --output output
-```
 
-The IR will have two inputs: `input` for data, and `ivector` for ivectors.
+.. code-block:: sh
+
+   mo --input_model exp/chain/tdnn_7b/final.mdl --output output
+
 
-## Example: Running ASpIRE Chain TDNN Model with the Speech Recognition Sample
+The IR will have two inputs: ``input`` for data, and ``ivector`` for ivectors.
 
-> **NOTE**: Before you continue with this part of the article, get familiar with the [Speech Recognition sample](../../../../../samples/cpp/speech_sample/README.md).
+Example: Running ASpIRE Chain TDNN Model with the Speech Recognition Sample
+###########################################################################
 
-In this example, the input data contains one utterance from one speaker. 
+.. note::
+
+   Before you continue with this part of the article, get familiar with the
+   :doc:`Speech Recognition sample <openvino_inference_engine_samples_speech_sample_README>`.
+
+In this example, the input data contains one utterance from one speaker.
 
 To run the ASpIRE Chain TDNN Model with Speech Recognition sample, You need to prepare environment. Do it by following the steps below :
-1. Download a [Kaldi repository](https://github.com/kaldi-asr/kaldi).
-2. Build it by following instructions in `README.md` from the repository.
-3. Download the [model archive](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz) from Kaldi website.
-4. Extract the downloaded model archive to the `egs/aspire/s5` folder of the Kaldi repository.
+
+1. Download a `Kaldi repository <https://github.com/kaldi-asr/kaldi>`__.
+2. Build it by following instructions in ``README.md`` from the repository.
+3. Download the `model archive <https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz>`__ from Kaldi website.
+4. Extract the downloaded model archive to the ``egs/aspire/s5`` folder of the Kaldi repository.
 
 Once everything has been prepared, you can start a proper run:
 
-1. Prepare the model for decoding. Refer to the `README.txt` file from the downloaded model archive for instructions.
-2. Convert data and ivectors to `.ark` format. Refer to the corresponding sections below for instructions.
+1. Prepare the model for decoding. Refer to the ``README.txt`` file from the downloaded model archive for instructions.
+2. Convert data and ivectors to ``.ark`` format. Refer to the corresponding sections below for instructions.
 
-### Preparing Data
+Preparing Data
+++++++++++++++++++++
 
-If you have a `.wav` data file, convert it to the `.ark` format using the following command:
-```sh
-<path_to_kaldi_repo>/src/featbin/compute-mfcc-feats --config=<path_to_kaldi_repo>/egs/aspire/s5/conf/mfcc_hires.conf scp:./wav.scp ark,scp:feats.ark,feats.scp
-```
-Add the `feats.ark` absolute path to `feats.scp` to avoid errors in later commands.
+If you have a ``.wav`` data file, convert it to the ``.ark`` format using the following command:
 
-### Preparing Ivectors
+.. code-block:: sh
+
+   <path_to_kaldi_repo>/src/featbin/compute-mfcc-feats --config=<path_to_kaldi_repo>/egs/aspire/s5/conf/mfcc_hires.conf scp:./wav.scp ark,scp:feats.ark,feats.scp
+
+
+Add the ``feats.ark`` absolute path to ``feats.scp`` to avoid errors in later commands.
+
+Preparing Ivectors
+++++++++++++++++++++
 
 Prepare ivectors for the Speech Recognition sample:
 
-1. Copy the `feats.scp` file to the `egs/aspire/s5/` directory of the built Kaldi repository and navigate there:
-```sh
-cp feats.scp <path_to_kaldi_repo>/egs/aspire/s5/
-cd <path_to_kaldi_repo>/egs/aspire/s5/
-```
+1. Copy the ``feats.scp`` file to the ``egs/aspire/s5/`` directory of the built Kaldi repository and navigate there:
+
+   .. code-block:: sh
+
+      cp feats.scp <path_to_kaldi_repo>/egs/aspire/s5/
+      cd <path_to_kaldi_repo>/egs/aspire/s5/
+
 
 2. Extract ivectors from the data:
-```sh
-./steps/online/nnet2/extract_ivectors_online.sh --nj 1 --ivector_period <max_frame_count_in_utterance> <data folder> exp/tdnn_7b_chain_online/ivector_extractor <ivector folder>
-```
-You can simplify the preparation of ivectors for the Speech Recognition sample. To do it, specify the maximum number of frames in utterances as a parameter for `--ivector_period`
-to get only one ivector per utterance.
-
-To get the maximum number of frames in utterances, use the following command line:
-```sh
-../../../src/featbin/feat-to-len scp:feats.scp ark,t: | cut -d' ' -f 2 - | sort -rn | head -1
-```
-As a result, you will find the `ivector_online.1.ark` file in `<ivector folder>`.
-
-3. Go to the `<ivector folder>`:
-```sh
-cd <ivector folder>
-```
-
-4. Convert the `ivector_online.1.ark` file to text format, using the `copy-feats` tool. Run the following command:
-```sh
-<path_to_kaldi_repo>/src/featbin/copy-feats --binary=False ark:ivector_online.1.ark ark,t:ivector_online.1.ark.txt
-```
-
-5. For the Speech Recognition sample, the `.ark` file must contain an ivector
-for each frame. Copy the ivector `frame_count` times by running the below script in the Python command prompt:
-```python
-import subprocess
-
-subprocess.run(["<path_to_kaldi_repo>/src/featbin/feat-to-len", "scp:<path_to_kaldi_repo>/egs/aspire/s5/feats.scp", "ark,t:feats_length.txt"])
-
-f = open("ivector_online.1.ark.txt", "r")
-g = open("ivector_online_ie.ark.txt", "w")
-length_file = open("feats_length.txt", "r")
-for line in f:
-    if "[" not in line:
-        for i in range(frame_count):
-            line = line.replace("]", " ")
-            g.write(line)
-    else:
-        g.write(line)
-        frame_count = int(length_file.read().split(" ")[1])
-g.write("]")
-f.close()
-g.close()
-length_file.close()
-```
-
-6. Create an `.ark` file from `.txt`:
-```sh
-<path_to_kaldi_repo>/src/featbin/copy-feats --binary=True ark,t:ivector_online_ie.ark.txt ark:ivector_online_ie.ark
-```
-
-### Running the Speech Recognition Sample
-
-Run the Speech Recognition sample with the created ivector `.ark` file:
-```sh
-speech_sample -i feats.ark,ivector_online_ie.ark -m final.xml -d CPU -o prediction.ark -cw_l 17 -cw_r 12
-```
+
+   .. code-block:: sh
+
+      ./steps/online/nnet2/extract_ivectors_online.sh --nj 1 --ivector_period <max_frame_count_in_utterance> <data folder> exp/tdnn_7b_chain_online/ivector_extractor <ivector    folder>
+
+
+   You can simplify the preparation of ivectors for the Speech Recognition sample. To do it, specify the maximum number of frames in utterances as a parameter for    ``--ivector_period`` to get only one ivector per utterance.
+
+   To get the maximum number of frames in utterances, use the following command line:
+
+   .. code-block:: sh
+
+      ../../../src/featbin/feat-to-len scp:feats.scp ark,t: | cut -d' ' -f 2 - | sort -rn | head -1
+
+
+   As a result, you will find the ``ivector_online.1.ark`` file in ``<ivector folder>``.
+
+3. Go to the ``<ivector folder>``:
+
+   .. code-block:: sh
+
+      cd <ivector folder>
+
+
+4. Convert the ``ivector_online.1.ark`` file to text format, using the ``copy-feats`` tool. Run the following command:
+
+   .. code-block:: sh
+
+      <path_to_kaldi_repo>/src/featbin/copy-feats --binary=False ark:ivector_online.1.ark ark,t:ivector_online.1.ark.txt
+
+
+5. For the Speech Recognition sample, the ``.ark`` file must contain an ivector
+for each frame. Copy the ivector ``frame_count`` times by running the below script in the Python command prompt:
+
+   .. code-block:: python
+
+      import subprocess
+
+      subprocess.run(["<path_to_kaldi_repo>/src/featbin/feat-to-len", "scp:<path_to_kaldi_repo>/egs/aspire/s5/feats.scp", "ark,t:feats_length.txt"])
+
+      f = open("ivector_online.1.ark.txt", "r")
+      g = open("ivector_online_ie.ark.txt", "w")
+      length_file = open("feats_length.txt", "r")
+      for line in f:
+          if "[" not in line:
+              for i in range(frame_count):
+                  line = line.replace("]", " ")
+                  g.write(line)
+          else:
+              g.write(line)
+              frame_count = int(length_file.read().split(" ")[1])
+      g.write("]")
+      f.close()
+      g.close()
+      length_file.close()
+
+
+6. Create an ``.ark`` file from ``.txt``:
+
+   .. code-block:: sh
+
+      <path_to_kaldi_repo>/src/featbin/copy-feats --binary=True ark,t:ivector_online_ie.ark.txt ark:ivector_online_ie.ark
+
+
+Running the Speech Recognition Sample
++++++++++++++++++++++++++++++++++++++
+
+Run the Speech Recognition sample with the created ivector ``.ark`` file:
+
+.. code-block:: sh
+
+   speech_sample -i feats.ark,ivector_online_ie.ark -m final.xml -d CPU -o prediction.ark -cw_l 17 -cw_r 12
+
 
 Results can be decoded as described in "Use of Sample in Kaldi Speech Recognition Pipeline"
-in the [Speech Recognition Sample description](../../../../../samples/cpp/speech_sample/README.md) article.
+in the :doc:`Speech Recognition Sample description <openvino_inference_engine_samples_speech_sample_README>` article.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
index 98f2c924b0adbd..823adf3329edd7 100644
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
@@ -1,26 +1,41 @@
 # Converting MXNet GluonCV Models {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models}
 
-This article provides the instructions and examples on how to use Model Optimizer to convert [GluonCV SSD and YOLO-v3 models](https://gluon-cv.mxnet.io/model_zoo/detection.html) to IR.
-
-1. Choose the topology available from the [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/detection.html) and export to the MXNet format using the GluonCV API. For example, for the `ssd_512_mobilenet1.0` topology:
-```python
-from gluoncv import model_zoo, data, utils
-from gluoncv.utils import export_block
-net = model_zoo.get_model('ssd_512_mobilenet1.0_voc', pretrained=True)
-export_block('ssd_512_mobilenet1.0_voc', net, preprocess=True, layout='HWC')
-```
-As a result, you will get an MXNet model representation in `ssd_512_mobilenet1.0.params` and `ssd_512_mobilenet1.0.json` files generated in the current directory.
-2. Run the Model Optimizer tool, specifying the `--enable_ssd_gluoncv` option. Make sure the `--input_shape` parameter is set to the input shape layout of your model (NHWC or NCHW). The examples below illustrate running the Model Optimizer for the SSD and YOLO-v3 models trained with the NHWC layout and located in the `<model_directory>`:
+@sphinxdirective
+
+This article provides the instructions and examples on how to use Model Optimizer to convert `GluonCV SSD and YOLO-v3 models <https://gluon-cv.mxnet.io/model_zoo/detection.html>`__ to IR.
+
+1. Choose the topology available from the `GluonCV Model Zoo <https://gluon-cv.mxnet.io/model_zoo/detection.html>`__ and export to the MXNet format using the GluonCV API. For example, for the ``ssd_512_mobilenet1.0`` topology:
+
+   .. code-block:: python
+
+      from gluoncv import model_zoo, data, utils
+      from gluoncv.utils import export_block
+      net = model_zoo.get_model('ssd_512_mobilenet1.0_voc', pretrained=True)
+      export_block('ssd_512_mobilenet1.0_voc', net, preprocess=True, layout='HWC')
+
+   As a result, you will get an MXNet model representation in ``ssd_512_mobilenet1.0.params`` and ``ssd_512_mobilenet1.0.json`` files generated in the current directory.
+
+2. Run the Model Optimizer tool, specifying the ``--enable_ssd_gluoncv`` option. Make sure the ``--input_shape`` parameter is set to the input shape layout of your model (NHWC or NCHW). The examples below illustrate running the Model Optimizer for the SSD and YOLO-v3 models trained with the NHWC layout and located in the ``<model_directory>``:
+
 * **For GluonCV SSD topologies:**
-```sh
- mo --input_model <model_directory>/ssd_512_mobilenet1.0.params --enable_ssd_gluoncv --input_shape [1,512,512,3] --input data --output_dir <OUTPUT_MODEL_DIR>
-```
+
+  .. code-block:: sh
+
+     mo --input_model <model_directory>/ssd_512_mobilenet1.0.params --enable_ssd_gluoncv --input_shape [1,512,512,3] --input data --output_dir <OUTPUT_MODEL_DIR>
+
 * **For YOLO-v3 topology:**
-   * To convert the model:
-   ```sh
-    mo --input_model <model_directory>/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --output_dir <OUTPUT_MODEL_DIR>
-   ```
-   * To convert the model with replacing the subgraph with RegionYolo layers:
-   ```sh
-    mo --input_model <model_directory>/models/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --transformations_config "front/mxnet/yolo_v3_mobilenet1_voc.json" --output_dir <OUTPUT_MODEL_DIR>
-   ```
+
+  * To convert the model:
+
+    .. code-block:: sh
+
+       mo --input_model <model_directory>/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --output_dir <OUTPUT_MODEL_DIR>
+
+  * To convert the model with replacing the subgraph with RegionYolo layers:
+
+    .. code-block:: sh
+
+       mo --input_model <model_directory>/models/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --transformations_config "front/mxnet/yolo_v3_mobilenet1_voc.  json" --output_dir <OUTPUT_MODEL_DIR>
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
index a87026e109edd8..0163e9e5ee7e8b 100644
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
@@ -1,117 +1,171 @@
 # Converting an MXNet Style Transfer Model {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet}
 
+@sphinxdirective
+
 This article provides instructions on how to generate a model for style transfer, using the public MXNet neural style transfer sample.
 
-**Step 1**: Download or clone the repository [Zhaw's Neural Style Transfer repository](https://github.com/zhaw/neural_style) with an MXNet neural style transfer sample.
+**Step 1**: Download or clone the repository `Zhaw's Neural Style Transfer repository <https://github.com/zhaw/neural_style>`__ with an MXNet neural style transfer sample.
 
 **Step 2**: Prepare the environment required to work with the cloned repository:
 
-> **NOTE**: Python-tk installation is needed only for Linux. Python for Windows includes it by default.
+.. note::
+
+   Python-tk installation is needed only for Linux. Python for Windows includes it by default.
+
+
+1. Install packages dependency.
+
+   .. code-block:: sh
+
+      sudo apt-get install python-tk
 
-1. Install packages dependency.<br>
-```sh
-sudo apt-get install python-tk
-```
 
 2. Install Python requirements:
-```sh
-pip3 install --user mxnet
-pip3 install --user matplotlib
-pip3 install --user scikit-image
-```
-
-**Step 3**: Download the pretrained [VGG19 model](https://github.com/dmlc/web-data/raw/master/mxnet/neural-style/model/vgg19.params) and save it to the root directory of the cloned repository. The sample expects the model `vgg19.params` file to be in that directory.<br>
-
-**Step 4**: Modify source code files of style transfer sample from the cloned repository:<br>
-
-1. Go to the `fast_mrf_cnn` subdirectory.
-```sh
-cd ./fast_mrf_cnn
-```
-
-2. Open the `symbol.py` file and modify the `decoder_symbol()` function. You should see the following code there:
-```py
-def decoder_symbol():
-    data = mx.sym.Variable('data')
-    data = mx.sym.Convolution(data=data, num_filter=256, kernel=(3,3), pad=(1,1), stride=(1, 1), name='deco_conv1')
-```
-Replace the code above with the following:<br>
-```py
-def decoder_symbol_with_vgg(vgg_symbol):
-    data = mx.sym.Convolution(data=vgg_symbol, num_filter=256, kernel=(3,3), pad=(1,1), stride=(1, 1), name='deco_conv1')
-```
-
-3. Save and close the `symbol.py` file.
-
-4. Open and edit the `make_image.py` file. Go to the `__init__()` function in the `Maker` class:<br>
-```py
-decoder = symbol.decoder_symbol()
-```
-Modfiy it with the following code:<br>
-```py
-decoder = symbol.decoder_symbol_with_vgg(vgg_symbol)
-```
-
-5. To join the pretrained weights with the decoder weights, make the following changes:
-   After the code lines for loading the decoder weights:<br>
-   ```py
-   args = mx.nd.load('%s_decoder_args.nd'%model_prefix)
-   auxs = mx.nd.load('%s_decoder_auxs.nd'%model_prefix)
-   ```
-
-   Add the following line:<br>
-   ```py
-   arg_dict.update(args)
-   ```
-
-6. Use `arg_dict` instead of `args` as a parameter of the `decoder.bind()` function. Find the line below:<br>
-```py
-self.deco_executor = decoder.bind(ctx=mx.gpu(), args=args, aux_states=auxs)
-```
-Replace it with the following:<br>
-```py
-self.deco_executor = decoder.bind(ctx=mx.cpu(), args=arg_dict, aux_states=auxs)
-```
-7. Add the following code to the end of the `generate()` function in the `Maker` class to save the result model as a `.json` file:<br>
-```py
-self.vgg_executor._symbol.save('{}-symbol.json'.format('vgg19'))
-self.deco_executor._symbol.save('{}-symbol.json'.format('nst_vgg19'))
-```
-8. Save and close the `make_image.py` file.
-
-**Step 5**: Follow the instructions from the `README.md` file in the `fast_mrf_cnn` directory of the cloned repository and run the sample with a decoder model.
-For example, use the following code to run the sample with the pretrained decoder weights from the `models` folder and output shape:<br>
-```py
-import make_image
-maker = make_image.Maker('models/13', (1024, 768))
-maker.generate('output.jpg', '../images/tubingen.jpg')
-```
-The `models/13` string in the code above is composed of the following substrings:
-* `models/` -- path to the folder that contains `.nd` files with pretrained styles weights.
-* `13` -- prefix pointing to the default decoder for the repository, `13_decoder`.
-
-> **NOTE**: If an error prompts with "No module named `cPickle`", try running the script from Step 5 in Python 2. After that return to Python 3 for the remaining steps.
-
-Any style can be selected from [collection of pretrained weights](https://pan.baidu.com/s/1skMHqYp). On the Chinese-language page, click the down arrow next to a size in megabytes. Then wait for an overlay box to appear, and click the blue button in it to download. The `generate()` function generates `nst_vgg19-symbol.json` and `vgg19-symbol.json` files for the specified shape. In the code, it is [1024 x 768] for a 4:3 ratio. You can specify another, for example, [224,224] for a square ratio.
+
+   .. code-block:: sh
+
+      pip3 install --user mxnet
+      pip3 install --user matplotlib
+      pip3 install --user scikit-image
+
+
+**Step 3**: Download the pre-trained `VGG19 model <https://github.com/dmlc/web-data/raw/master/mxnet/neural-style/model/vgg19.params>`__ and save it to the root directory of the cloned repository. The sample expects the model ``vgg19.params`` file to be in that directory.
+
+**Step 4**: Modify source code files of style transfer sample from the cloned repository:
+
+1. Go to the ``fast_mrf_cnn`` subdirectory.
+
+   .. code-block:: sh
+
+      cd ./fast_mrf_cnn
+
+
+2. Open the ``symbol.py`` file and modify the ``decoder_symbol()`` function. You should see the following code there:
+
+   .. code-block:: py
+
+      def decoder_symbol():
+          data = mx.sym.Variable('data')
+          data = mx.sym.Convolution(data=data, num_filter=256, kernel=(3,3), pad=(1,1), stride=(1, 1), name='deco_conv1')
+
+
+   Replace the code above with the following:
+
+   .. code-block:: py
+
+      def decoder_symbol_with_vgg(vgg_symbol):
+          data = mx.sym.Convolution(data=vgg_symbol, num_filter=256, kernel=(3,3), pad=(1,1), stride=(1, 1), name='deco_conv1')
+
+
+3. Save and close the ``symbol.py`` file.
+
+4. Open and edit the ``make_image.py`` file. Go to the ``__init__()`` function in the ``Maker`` class:
+
+   .. code-block:: py
+
+      decoder = symbol.decoder_symbol()
+
+
+   Modify it with the following code:
+
+   .. code-block:: py
+
+      decoder = symbol.decoder_symbol_with_vgg(vgg_symbol)
+
+
+5. To join the pre-trained weights with the decoder weights, make the following changes:
+   After the code lines for loading the decoder weights:
+
+   .. code-block:: py
+
+      args = mx.nd.load('%s_decoder_args.nd'%model_prefix)
+      auxs = mx.nd.load('%s_decoder_auxs.nd'%model_prefix)
+
+
+   Add the following line:
+
+   .. code-block:: py
+
+      arg_dict.update(args)
+
+
+6. Use ``arg_dict`` instead of ``args`` as a parameter of the ``decoder.bind()`` function. Find the line below:
+
+   .. code-block:: py
+
+      self.deco_executor = decoder.bind(ctx=mx.gpu(), args=args, aux_states=auxs)
+
+
+   Replace it with the following:
+
+   .. code-block:: py
+
+      self.deco_executor = decoder.bind(ctx=mx.cpu(), args=arg_dict, aux_states=auxs)
+
+
+7. Add the following code to the end of the ``generate()`` function in the ``Maker`` class to save the result model as a ``.json`` file:
+
+   .. code-block:: py
+
+      self.vgg_executor._symbol.save('{}-symbol.json'.format('vgg19'))
+      self.deco_executor._symbol.save('{}-symbol.json'.format('nst_vgg19'))
+
+
+8. Save and close the ``make_image.py`` file.
+
+**Step 5**: Follow the instructions from the ``README.md`` file in the ``fast_mrf_cnn`` directory of the cloned repository and run the sample with a decoder model.
+For example, use the following code to run the sample with the pre-trained decoder weights from the ``models`` folder and output shape:
+
+.. code-block:: py
+
+   import make_image
+   maker = make_image.Maker('models/13', (1024, 768))
+   maker.generate('output.jpg', '../images/tubingen.jpg')
+
+
+The ``models/13`` string in the code above is composed of the following substrings:
+
+* ``models/`` -- path to the folder that contains ``.nd`` files with pre-trained styles weights.
+* ``13`` -- prefix pointing to the default decoder for the repository, ``13_decoder``.
+
+.. note::
+
+   If an error prompts with "No module named ``cPickle``", try running the script from Step 5 in Python 2. After that return to Python 3 for the remaining steps.
+
+Any style can be selected from `collection of pre-trained weights <https://pan.baidu.com/s/1skMHqYp>`__. On the Chinese-language page, click the down arrow next to a size in megabytes. Then wait for an overlay box to appear, and click the blue button in it to download. The ``generate()`` function generates ``nst_vgg19-symbol.json`` and ``vgg19-symbol.json`` files for the specified shape. In the code, it is ``[1024 x 768]`` for a 4:3 ratio. You can specify another, for example, ``[224,224]`` for a square ratio.
 
 **Step 6**: Run the Model Optimizer to generate an Intermediate Representation (IR):
 
-1. Create a new directory. For example:<br>
-```sh
-mkdir nst_model
-```
-2. Copy the initial and generated model files to the created directory. For example, to copy the pretrained decoder weights from the `models` folder to the `nst_model` directory, run the following commands:<br>
-```sh
-cp nst_vgg19-symbol.json nst_model
-cp vgg19-symbol.json nst_model
-cp ../vgg19.params nst_model/vgg19-0000.params
-cp models/13_decoder_args.nd nst_model
-cp models/13_decoder_auxs.nd nst_model
-```
-> **NOTE**: Make sure that all the `.params` and `.json` files are in the same directory as the `.nd` files. Otherwise, the conversion process fails.
-
-3. Run the Model Optimizer for Apache MXNet. Use the `--nd_prefix_name` option to specify the decoder prefix and `--input_shape` to specify input shapes in [N,C,W,H] order. For example:<br>
-```sh
-mo --input_symbol <path/to/nst_model>/nst_vgg19-symbol.json --framework mxnet --output_dir <path/to/output_dir> --input_shape [1,3,224,224] --nd_prefix_name 13_decoder --pretrained_model <path/to/nst_model>/vgg19-0000.params
-```
-4. The IR is generated (`.bin`, `.xml` and `.mapping` files) in the specified output directory, and ready to be consumed by the OpenVINO Runtime.
+1. Create a new directory. For example:
+
+   .. code-block:: sh
+
+      mkdir nst_model
+
+
+2. Copy the initial and generated model files to the created directory. For example, to copy the pre-trained decoder weights from the ``models`` folder to the ``nst_model`` directory, run the following commands:
+
+   .. code-block:: sh
+
+      cp nst_vgg19-symbol.json nst_model
+      cp vgg19-symbol.json nst_model
+      cp ../vgg19.params nst_model/vgg19-0000.params
+      cp models/13_decoder_args.nd nst_model
+      cp models/13_decoder_auxs.nd nst_model
+
+
+   .. note::
+
+      Make sure that all the ``.params`` and ``.json`` files are in the same directory as the ``.nd`` files. Otherwise, the conversion process fails.
+
+
+3. Run the Model Optimizer for Apache MXNet. Use the ``--nd_prefix_name`` option to specify the decoder prefix and ``--input_shape`` to specify input shapes in ``[N,C,W,H]`` order. For example:
+
+   .. code-block:: sh
+
+      mo --input_symbol <path/to/nst_model>/nst_vgg19-symbol.json --framework mxnet --output_dir <path/to/output_dir> --input_shape [1,3,224,224] --nd_prefix_name 13_decoder --pretrained_model <path/to/nst_model>/vgg19-0000.params
+
+
+4. The IR is generated (``.bin``, ``.xml`` and ``.mapping`` files) in the specified output directory, and ready to be consumed by the OpenVINO Runtime.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md
index 846ffebdefa928..962165553398fa 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md
@@ -1,58 +1,66 @@
 # Converting a PyTorch BERT-NER Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner}
 
+@sphinxdirective
+
 The goal of this article is to present a step-by-step guide on how to convert PyTorch BERT-NER model to OpenVINO IR. First, you need to download the model and convert it to ONNX.
 
 
-## Downloading and Converting the Model to ONNX
+Downloading and Converting the Model to ONNX
+############################################
 
 To download a pretrained model or train the model yourself, refer
-to the [instructions](https://github.com/kamalkraj/BERT-NER/blob/dev/README.md) in the
-BERT-NER model repository. The model with configuration files is stored in the `out_base` directory.
+to the `instructions <https://github.com/kamalkraj/BERT-NER/blob/dev/README.md>`__ in the
+BERT-NER model repository. The model with configuration files is stored in the ``out_base`` directory.
 
 To convert the model to ONNX format, create and run the following script in the root
 directory of the model repository. If you download the pretrained model, you need
-to download [bert.py](https://github.com/kamalkraj/BERT-NER/blob/dev/bert.py) to run the script.
-The instructions were tested with the commit-SHA: `e5be564156f194f1becb0d82aeaf6e762d9eb9ed`.
-
-```python
-import torch
-
-from bert import Ner
-
-ner = Ner("out_base")
-
-input_ids, input_mask, segment_ids, valid_positions = ner.preprocess('Steve went to Paris')
-input_ids = torch.tensor([input_ids], dtype=torch.long, device=ner.device)
-input_mask = torch.tensor([input_mask], dtype=torch.long, device=ner.device)
-segment_ids = torch.tensor([segment_ids], dtype=torch.long, device=ner.device)
-valid_ids = torch.tensor([valid_positions], dtype=torch.long, device=ner.device)
-
-ner_model, tknizr, model_config = ner.load_model("out_base")
-
-with torch.no_grad():
-    logits = ner_model(input_ids, segment_ids, input_mask, valid_ids)
-torch.onnx.export(ner_model,
-                  (input_ids, segment_ids, input_mask, valid_ids),
-                  "bert-ner.onnx",
-                  input_names=['input_ids', 'segment_ids', 'input_mask', 'valid_ids'],
-                  output_names=['output'],
-                  dynamic_axes={
-                      "input_ids": {0: "batch_size"},
-                      "segment_ids": {0: "batch_size"},
-                      "input_mask": {0: "batch_size"},
-                      "valid_ids": {0: "batch_size"},
-                      "output": {0: "output"}
-                  },
-                  opset_version=11,
-                  )
-```
-
-The script generates ONNX model file `bert-ner.onnx`.
-
-## Converting an ONNX BERT-NER model to IR
-
-```bash
-mo --input_model bert-ner.onnx --input "input_mask[1,128],segment_ids[1,128],input_ids[1,128]"
-```
-
-where `1` is `batch_size` and `128` is `sequence_length`.
+to download `bert.py <https://github.com/kamalkraj/BERT-NER/blob/dev/bert.py>`__ to run the script.
+The instructions were tested with the commit-SHA: ``e5be564156f194f1becb0d82aeaf6e762d9eb9ed``.
+
+.. code-block:: python
+
+   import torch
+
+   from bert import Ner
+
+   ner = Ner("out_base")
+
+   input_ids, input_mask, segment_ids, valid_positions = ner.preprocess('Steve went to Paris')
+   input_ids = torch.tensor([input_ids], dtype=torch.long, device=ner.device)
+   input_mask = torch.tensor([input_mask], dtype=torch.long, device=ner.device)
+   segment_ids = torch.tensor([segment_ids], dtype=torch.long, device=ner.device)
+   valid_ids = torch.tensor([valid_positions], dtype=torch.long, device=ner.device)
+
+   ner_model, tknizr, model_config = ner.load_model("out_base")
+
+   with torch.no_grad():
+       logits = ner_model(input_ids, segment_ids, input_mask, valid_ids)
+   torch.onnx.export(ner_model,
+                     (input_ids, segment_ids, input_mask, valid_ids),
+                     "bert-ner.onnx",
+                     input_names=['input_ids', 'segment_ids', 'input_mask', 'valid_ids'],
+                     output_names=['output'],
+                     dynamic_axes={
+                         "input_ids": {0: "batch_size"},
+                         "segment_ids": {0: "batch_size"},
+                         "input_mask": {0: "batch_size"},
+                         "valid_ids": {0: "batch_size"},
+                         "output": {0: "output"}
+                     },
+                     opset_version=11,
+                     )
+
+
+The script generates ONNX model file ``bert-ner.onnx``.
+
+Converting an ONNX BERT-NER model to IR
+#######################################
+
+.. code-block:: bash
+
+   mo --input_model bert-ner.onnx --input "input_mask[1,128],segment_ids[1,128],input_ids[1,128]"
+
+
+where ``1`` is ``batch_size`` and ``128`` is ``sequence_length``.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md
index de8063f8499e35..bb0649bf172b11 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md
@@ -1,30 +1,41 @@
 # Converting a PyTorch Cascade RCNN R-101 Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Cascade_RCNN_res101}
 
+@sphinxdirective
+
 The goal of this article is to present a step-by-step guide on how to convert a PyTorch Cascade RCNN R-101 model to OpenVINO IR. First, you need to download the model and convert it to ONNX.
 
-## Downloading and Converting Model to ONNX
+Downloading and Converting Model to ONNX
+########################################
+
+* Clone the `repository <https://github.com/open-mmlab/mmdetection>`__ :
+
+.. code-block:: sh
+
+   git clone https://github.com/open-mmlab/mmdetection
+   cd mmdetection
+
+
+.. note::
+
+   To set up an environment, refer to the `instructions <https://github.com/open-mmlab/mmdetection/blob/master/docs/en/get_started.md#installation>`__.
+
+* Download the pre-trained `model <https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth>`__. The model is also available `here <https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn/README.md>`__.
+
+* To convert the model to ONNX format, use this `script <https://github.com/open-mmlab/mmdetection/blob/master/tools/deployment/pytorch2onnx.py>`__.
 
-* Clone the [repository](https://github.com/open-mmlab/mmdetection):
+  .. code-block:: sh
 
-```bash
-git clone https://github.com/open-mmlab/mmdetection
-cd mmdetection
-```
+     python3 tools/deployment/pytorch2onnx.py configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth --output-file    cascade_rcnn_r101_fpn_1x_coco.onnx
 
-> **NOTE**: To set up an environment, refer to the [instructions](https://github.com/open-mmlab/mmdetection/blob/master/docs/en/get_started.md#installation).
 
-* Download the pretrained [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth). The model is also available [here](https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn/README.md).
+The script generates ONNX model file ``cascade_rcnn_r101_fpn_1x_coco.onnx`` in the directory ``tools/deployment/``. If required, specify the model name or output directory, using ``--output-file <path-to-dir>/<model-name>.onnx``.
 
-* To convert the model to ONNX format, use this [script](https://github.com/open-mmlab/mmdetection/blob/master/tools/deployment/pytorch2onnx.py).
+Converting an ONNX Cascade RCNN R-101 Model to OpenVINO IR
+##########################################################
 
-```bash
-python3 tools/deployment/pytorch2onnx.py configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth --output-file cascade_rcnn_r101_fpn_1x_coco.onnx
-```
+.. code-block:: sh
 
-The script generates ONNX model file `cascade_rcnn_r101_fpn_1x_coco.onnx` in the directory `tools/deployment/`. If required, specify the model name or output directory, using `--output-file <path-to-dir>/<model-name>.onnx`.
+   mo --input_model cascade_rcnn_r101_fpn_1x_coco.onnx --mean_values [123.675,116.28,103.53] --scale_values [58.395,57.12,57.375]
 
-## Converting an ONNX Cascade RCNN R-101 Model to OpenVINO IR
 
-```bash
-mo --input_model cascade_rcnn_r101_fpn_1x_coco.onnx --mean_values [123.675,116.28,103.53] --scale_values [58.395,57.12,57.375]
-```
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
index c301558689c8b2..d8be169689c822 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
@@ -1,33 +1,45 @@
 # Converting a PyTorch F3Net Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net}
 
-[F3Net](https://github.com/weijun88/F3Net): Fusion, Feedback and Focus for Salient Object Detection
+@sphinxdirective
 
-## Cloning the F3Net Repository
+`F3Net <https://github.com/weijun88/F3Net>`__ : Fusion, Feedback and Focus for Salient Object Detection
+
+Cloning the F3Net Repository
+############################
 
 To clone the repository, run the following command:
 
-```sh
-git clone http://github.com/weijun88/F3Net.git
-```
+.. code-block:: sh
+
+   git clone http://github.com/weijun88/F3Net.git
 
-## Downloading and Converting the Model to ONNX
+
+Downloading and Converting the Model to ONNX
+############################################
 
 To download the pretrained model or train the model yourself, refer to the
-[instructions](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. First, convert the model to ONNX format. Create and run the following Python script in the `src` directory of the model repository:
-```python
-import torch
-from dataset import Config
-from net import F3Net
-
-cfg = Config(mode='test', snapshot=<path_to_checkpoint_dir>)
-net = F3Net(cfg)
-image = torch.zeros([1, 3, 352, 352])
-torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11)
-```
-The script generates the ONNX model file `f3net.onnx`. The model conversion was tested with the commit-SHA: `eecace3adf1e8946b571a4f4397681252f9dc1b8`.
-
-## Converting an ONNX F3Net Model to IR
-
-```sh
-mo --input_model <MODEL_DIR>/f3net.onnx
-```
+`instructions <https://github.com/weijun88/F3Net/blob/master/README.md>`__ in the F3Net model repository. First, convert the model to ONNX format. Create and run the following Python script in the ``src`` directory of the model repository:
+
+.. code-block:: python
+
+   import torch
+   from dataset import Config
+   from net import F3Net
+
+   cfg = Config(mode='test', snapshot=<path_to_checkpoint_dir>)
+   net = F3Net(cfg)
+   image = torch.zeros([1, 3, 352, 352])
+   torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11)
+
+
+The script generates the ONNX model file ``f3net.onnx``. The model conversion was tested with the commit-SHA: ``eecace3adf1e8946b571a4f4397681252f9dc1b8``.
+
+Converting an ONNX F3Net Model to IR
+####################################
+
+.. code-block:: sh
+
+   mo --input_model <MODEL_DIR>/f3net.onnx
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md
index d7131df957bbb8..451431b735870a 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md
@@ -1,40 +1,51 @@
 # Converting a PyTorch QuartzNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet}
 
-[NeMo project](https://github.com/NVIDIA/NeMo) provides the QuartzNet model.
+@sphinxdirective
 
-## Downloading the Pretrained QuartzNet Model
+`NeMo project <https://github.com/NVIDIA/NeMo>`__ provides the QuartzNet model.
 
-To download the pretrained model, refer to the [NeMo Speech Models Catalog](https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels).
+Downloading the Pre-trained QuartzNet Model
+###########################################
+
+To download the pre-trained model, refer to the `NeMo Speech Models Catalog <https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels>`__.
 Here are the instructions on how to obtain QuartzNet in ONNX format.
 
-1. Install the NeMo toolkit, using the [instructions](https://github.com/NVIDIA/NeMo/tree/main#installation).
+1. Install the NeMo toolkit, using the `instructions <https://github.com/NVIDIA/NeMo/tree/main#installation>`__.
 
 2. Run the following code:
 
-```python
-import nemo
-import nemo.collections.asr as nemo_asr
+.. code-block:: python
+
+   import nemo
+   import nemo.collections.asr as nemo_asr
+
+   quartznet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
+   # Export QuartzNet model to ONNX format
+   quartznet.decoder.export('decoder_qn.onnx')
+   quartznet.encoder.export('encoder_qn.onnx')
+   quartznet.export('qn.onnx')
 
-quartznet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
-# Export QuartzNet model to ONNX format
-quartznet.decoder.export('decoder_qn.onnx')
-quartznet.encoder.export('encoder_qn.onnx')
-quartznet.export('qn.onnx')
-```
 
-This code produces 3 ONNX model files: `encoder_qn.onnx`, `decoder_qn.onnx`, `qn.onnx`.
-They are `decoder`, `encoder`, and a combined `decoder(encoder(x))` models, respectively.
+This code produces 3 ONNX model files: ``encoder_qn.onnx``, ``decoder_qn.onnx``, ``qn.onnx``.
+They are ``decoder``, ``encoder``, and a combined ``decoder(encoder(x))`` models, respectively.
 
-## Converting an ONNX QuartzNet model to IR
+Converting an ONNX QuartzNet model to IR
+########################################
 
 If using a combined model:
-```sh
-mo --input_model <MODEL_DIR>/qt.onnx --input_shape [B,64,X]
-```
+
+.. code-block:: sh
+
+   mo --input_model <MODEL_DIR>/qt.onnx --input_shape [B,64,X]
+
 If using separate models:
-```sh
-mo --input_model <MODEL_DIR>/encoder_qt.onnx --input_shape [B,64,X]
-mo --input_model <MODEL_DIR>/decoder_qt.onnx --input_shape [B,1024,Y]
-```
 
-Where shape is determined by the audio file Mel-Spectrogram length: B - batch dimension, X - dimension based on the input length, Y - determined by encoder output, usually `X / 2`.
+.. code-block:: sh
+
+   mo --input_model <MODEL_DIR>/encoder_qt.onnx --input_shape [B,64,X]
+   mo --input_model <MODEL_DIR>/decoder_qt.onnx --input_shape [B,1024,Y]
+
+
+Where shape is determined by the audio file Mel-Spectrogram length: ``B`` - batch dimension, ``X`` - dimension based on the input length, ``Y`` - determined by encoder output, usually ``X / 2``.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md
index 3cea756301ca6a..3e89de179d4c86 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md
@@ -1,30 +1,39 @@
 # Converting a PyTorch RCAN Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN}
 
-[RCAN](https://github.com/yulunzhang/RCAN): Image Super-Resolution Using Very Deep Residual Channel Attention Networks
+@sphinxdirective
 
-## Downloading and Converting the Model to ONNX
+`RCAN <https://github.com/yulunzhang/RCAN>`__ : Image Super-Resolution Using Very Deep Residual Channel Attention Networks
 
-To download the pretrained model or train the model yourself, refer to the [instruction](https://github.com/yulunzhang/RCAN/blob/master/README.md) in the RCAN model repository. First, convert the model to ONNX format. Create and run the script with the following content in the root
+Downloading and Converting the Model to ONNX
+############################################
+
+To download the pre-trained model or train the model yourself, refer to the `instruction <https://github.com/yulunzhang/RCAN/blob/master/README.md>`__ in the RCAN model repository. First, convert the model to ONNX format. Create and run the script with the following content in the root
 directory of the model repository:
 
-```python
-from argparse import Namespace
+.. code-block:: python
+
+   from argparse import Namespace
+
+   import torch
+
+   from RCAN_TestCode.code.model.rcan import RCAN
+
+   config = Namespace(n_feats=64, n_resblocks=4, n_resgroups=2, reduction=16, scale=[2], data_train='DIV2K', res_scale=1,
+                      n_colors=3, rgb_range=255)
+   net = RCAN(config)
+   net.eval()
+   dummy_input = torch.randn(1, 3, 360, 640)
+   torch.onnx.export(net, dummy_input, 'RCAN.onnx')
+
+
+The script generates the ONNX model file ``RCAN.onnx``. More information about model parameters (``n_resblocks``, ``n_resgroups``, and others) and their different values can be found in the model repository. The model conversion was tested with the commit-SHA: ``3339ebc59519c3bb2b5719b87dd36515ec7f3ba7``.
 
-import torch
+Converting an ONNX RCAN Model to IR
+###################################
 
-from RCAN_TestCode.code.model.rcan import RCAN
+.. code-block:: sh
 
-config = Namespace(n_feats=64, n_resblocks=4, n_resgroups=2, reduction=16, scale=[2], data_train='DIV2K', res_scale=1,
-                   n_colors=3, rgb_range=255)
-net = RCAN(config)
-net.eval()
-dummy_input = torch.randn(1, 3, 360, 640)
-torch.onnx.export(net, dummy_input, 'RCAN.onnx')
-```
-The script generates the ONNX model file `RCAN.onnx`. More information about model parameters (`n_resblocks`, `n_resgroups`, and others) and their different values can be found in the model repository. The model conversion was tested with the commit-SHA: `3339ebc59519c3bb2b5719b87dd36515ec7f3ba7`.
+   mo --input_model RCAN.onnx
 
-## Converting an ONNX RCAN Model to IR
 
-```sh
-mo --input_model RCAN.onnx
-```
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
index b3c50255533c9b..822df82fff5c08 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@@ -1,106 +1,127 @@
 # Converting a PyTorch RNN-T Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
 
-This guide covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow
+@sphinxdirective
+
+This guide covers conversion of RNN-T model from `MLCommons <https://github.com/mlcommons>`__ repository. Follow
 the instructions below to export a PyTorch model into ONNX, before converting it to IR:
 
 **Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull
 only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
-```bash
-git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
-cd rnnt_for_openvino
-git checkout HEAD speech_recognition/rnnt
-```
+
+.. code-block:: sh
+
+   git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
+   cd rnnt_for_openvino
+   git checkout HEAD speech_recognition/rnnt
+
 
 **Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for
 pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to
 your full clone at **Step 5**. Skip this step if you have a shallow clone.
 
-```bash
-mkdir rnnt_for_openvino
-cd rnnt_for_openvino
-```
+.. code-block:: sh
+
+   mkdir rnnt_for_openvino
+   cd rnnt_for_openvino
+
+
+**Step 3**. Download pre-trained weights for PyTorch implementation from `here <https://zenodo.org/record/3662521#.YG21DugzZaQ>`__.
+For UNIX-like systems, you can use ``wget``:
+
+.. code-block:: sh
+
+   wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
+
 
-**Step 3**. Download pretrained weights for PyTorch implementation from [here](https://zenodo.org/record/3662521#.YG21DugzZaQ).
-For UNIX-like systems, you can use `wget`:
-```bash
-wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
-```
-The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as
-if you were following the [guide](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt).
+The link was taken from ``setup.sh`` in the ``speech_recoginitin/rnnt`` subfolder. You will get exactly the same weights as
+if you were following the `guide <https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt>`__.
 
 **Step 4**. Install required Python packages:
-```bash
-pip3 install torch toml
-```
+
+.. code-block:: sh
+
+   pip3 install torch toml
+
 
 **Step 5**. Export RNN-T model into ONNX, using the script below. Copy the code below into a file named
-`export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:
-
-> **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
-> specify the `mlcommons_inference_path` variable.
-
-```python
-import toml
-import torch
-import sys
-
-
-def load_and_migrate_checkpoint(ckpt_path):
-    checkpoint = torch.load(ckpt_path, map_location="cpu")
-    migrated_state_dict = {}
-    for key, value in checkpoint['state_dict'].items():
-        key = key.replace("joint_net", "joint.net")
-        migrated_state_dict[key] = value
-    del migrated_state_dict["audio_preprocessor.featurizer.fb"]
-    del migrated_state_dict["audio_preprocessor.featurizer.window"]
-    return migrated_state_dict
-
-
-mlcommons_inference_path = './'  # specify relative path for MLCommons inferene
-checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
-config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
-config = toml.load(config_toml)
-rnnt_vocab = config['labels']['labels']
-sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
-
-from model_separable_rnnt import RNNT
-
-model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
-model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
-
-seq_length, batch_size, feature_length = 157, 1, 240
-inp = torch.randn([seq_length, batch_size, feature_length])
-feature_length = torch.LongTensor([seq_length])
-x_padded, x_lens = model.encoder(inp, feature_length)
-torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
-                  input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'],
-                  dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}})
-
-symbol = torch.LongTensor([[20]])
-hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
-g, hidden = model.prediction.forward(symbol, hidden)
-torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
-                  input_names=['symbol', 'hidden_in_1', 'hidden_in_2'],
-                  output_names=['g', 'hidden_out_1', 'hidden_out_2'],
-                  dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}})
-
-f = torch.randn([batch_size, 1, 1024])
-model.joint.forward(f, g)
-torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
-                  input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
-```
-
-```bash
-python3 export_rnnt_to_onnx.py
-```
-
-After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx`, and `rnnt_joint.onnx` will be saved in the current directory.
+``export_rnnt_to_onnx.py`` and run it in the current directory ``rnnt_for_openvino``:
+
+.. note::
+
+   If you already have a full clone of MLCommons inference repository, you need 
+   to specify the ``mlcommons_inference_path`` variable.
+
+.. code-block:: python
+
+   import toml
+   import torch
+   import sys
+
+
+   def load_and_migrate_checkpoint(ckpt_path):
+       checkpoint = torch.load(ckpt_path, map_location="cpu")
+       migrated_state_dict = {}
+       for key, value in checkpoint['state_dict'].items():
+           key = key.replace("joint_net", "joint.net")
+           migrated_state_dict[key] = value
+       del migrated_state_dict["audio_preprocessor.featurizer.fb"]
+       del migrated_state_dict["audio_preprocessor.featurizer.window"]
+       return migrated_state_dict
+
+
+   mlcommons_inference_path = './'  # specify relative path for MLCommons inferene
+   checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
+   config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
+   config = toml.load(config_toml)
+   rnnt_vocab = config['labels']['labels']
+   sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
+
+   from model_separable_rnnt import RNNT
+
+   model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
+   model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
+
+   seq_length, batch_size, feature_length = 157, 1, 240
+   inp = torch.randn([seq_length, batch_size, feature_length])
+   feature_length = torch.LongTensor([seq_length])
+   x_padded, x_lens = model.encoder(inp, feature_length)
+   torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
+                     input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'],
+                     dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}})
+
+   symbol = torch.LongTensor([[20]])
+   hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
+   g, hidden = model.prediction.forward(symbol, hidden)
+   torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
+                     input_names=['symbol', 'hidden_in_1', 'hidden_in_2'],
+                     output_names=['g', 'hidden_out_1', 'hidden_out_2'],
+                     dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}})
+
+   f = torch.randn([batch_size, 1, 1024])
+   model.joint.forward(f, g)
+   torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
+                     input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
+
+
+.. code-block:: sh
+
+   python3 export_rnnt_to_onnx.py
+
+
+After completing this step, the files ``rnnt_encoder.onnx``, ``rnnt_prediction.onnx``, and ``rnnt_joint.onnx`` will be saved in the current directory.
 
 **Step 6**. Run the conversion commands:
 
-```sh
-mo --input_model rnnt_encoder.onnx --input "input[157,1,240],feature_length->157"
-mo --input_model rnnt_prediction.onnx --input "symbol[1,1],hidden_in_1[2,1,320],hidden_in_2[2,1,320]"
-mo --input_model rnnt_joint.onnx --input "0[1,1,1024],1[1,1,320]"
-```
-> **NOTE**: The hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves network [reshapeability](@ref openvino_docs_OV_UG_ShapeInference). Therefore, input shapes can be changed manually to any value during either conversion or inference.
+.. code-block:: sh
+
+   mo --input_model rnnt_encoder.onnx --input "input[157,1,240],feature_length->157"
+   mo --input_model rnnt_prediction.onnx --input "symbol[1,1],hidden_in_1[2,1,320],hidden_in_2[2,1,320]"
+   mo --input_model rnnt_joint.onnx --input "0[1,1,1024],1[1,1,320]"
+
+
+.. note::
+
+   The hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves network :doc:`reshapeability <openvino_docs_OV_UG_ShapeInference>`. Therefore, input shapes can be changed manually to any value during either conversion or inference.
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
index e1526519172794..9ea6aa47ec21c6 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
@@ -1,187 +1,212 @@
 # Converting a PyTorch YOLACT Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT}
 
+@sphinxdirective
+
 You Only Look At CoefficienTs (YOLACT) is a simple, fully convolutional model for real-time instance segmentation.
-The PyTorch implementation is publicly available in [this GitHub repository](https://github.com/dbolya/yolact).
+The PyTorch implementation is publicly available in `this GitHub repository <https://github.com/dbolya/yolact>`__.
 The YOLACT++ model is not supported, because it uses deformable convolutional layers that cannot be represented in ONNX format.
 
-## Creating a Patch File <a name="patch-file-yolact"></a>
+.. _patch-file-yolact:
+
+Creating a Patch File
+#####################
 
 Before converting the model, create a patch file for the repository.
 The patch modifies the framework code by adding a special command-line argument to the framework options. The argument enables inference graph dumping:
 
-1. Go to a writable directory and create a `YOLACT_onnx_export.patch` file.
+1. Go to a writable directory and create a ``YOLACT_onnx_export.patch`` file.
 2. Copy the following diff code to the file:
-```git
-From 76deb67d4f09f29feda1a633358caa18335d9e9f Mon Sep 17 00:00:00 2001
-From: "OpenVINO" <openvino@intel.com>
-Date: Fri, 12 Mar 2021 00:27:35 +0300
-Subject: [PATCH] Add export to ONNX
-
----
- eval.py                |  5 ++++-
- utils/augmentations.py |  7 +++++--
- yolact.py              | 29 +++++++++++++++++++----------
- 3 files changed, 28 insertions(+), 13 deletions(-)
-
-diff --git a/eval.py b/eval.py
-index 547bc0a..bde0680 100644
---- a/eval.py
-+++ b/eval.py
-@@ -593,9 +593,12 @@ def badhash(x):
-     return x
-
- def evalimage(net:Yolact, path:str, save_path:str=None):
--    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
-+    frame = torch.from_numpy(cv2.imread(path)).float()
-+    if torch.cuda.is_available():
-+        frame = frame.cuda()
-     batch = FastBaseTransform()(frame.unsqueeze(0))
-     preds = net(batch)
-+    torch.onnx.export(net, batch, "yolact.onnx", opset_version=11)
-
-     img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
-
-diff --git a/utils/augmentations.py b/utils/augmentations.py
-index cc7a73a..2420603 100644
---- a/utils/augmentations.py
-+++ b/utils/augmentations.py
-@@ -623,8 +623,11 @@ class FastBaseTransform(torch.nn.Module):
-     def __init__(self):
-         super().__init__()
-
--        self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None]
--        self.std  = torch.Tensor( STD ).float().cuda()[None, :, None, None]
-+        self.mean = torch.Tensor(MEANS).float()[None, :, None, None]
-+        self.std  = torch.Tensor( STD ).float()[None, :, None, None]
-+        if torch.cuda.is_available():
-+            self.mean.cuda()
-+            self.std.cuda()
-         self.transform = cfg.backbone.transform
-
-     def forward(self, img):
-diff --git a/yolact.py b/yolact.py
-index d83703b..f8c787c 100644
---- a/yolact.py
-+++ b/yolact.py
-@@ -17,19 +17,22 @@ import torch.backends.cudnn as cudnn
- from utils import timer
- from utils.functions import MovingAverage, make_net
-
--# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions.
--# See the bug report here: https://github.com/pytorch/pytorch/issues/17108
--torch.cuda.current_device()
--
--# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules
--use_jit = torch.cuda.device_count() <= 1
--if not use_jit:
--    print('Multiple GPUs detected! Turning off JIT.')
-+use_jit = False
-
- ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module
- script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn
-
-
-+def decode(loc, priors):
-+    variances = [0.1, 0.2]
-+    boxes = torch.cat((priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
-+
-+    boxes_result1 = boxes[:, :, :2] - boxes[:, :, 2:] / 2
-+    boxes_result2 = boxes[:, :, 2:] + boxes_result1
-+    boxes_result = torch.cat((boxes_result1, boxes_result2), 2)
-+
-+    return boxes_result
-+
-
- class Concat(nn.Module):
-     def __init__(self, nets, extra_params):
-@@ -476,7 +479,10 @@ class Yolact(nn.Module):
-
-     def load_weights(self, path):
-         """ Loads weights from a compressed save file. """
--        state_dict = torch.load(path)
-+        if torch.cuda.is_available():
-+            state_dict = torch.load(path)
-+        else:
-+            state_dict = torch.load(path, map_location=torch.device('cpu'))
-
-         # For backward compatability, remove these (the new variable is called layers)
-         for key in list(state_dict.keys()):
-@@ -673,8 +679,11 @@ class Yolact(nn.Module):
-                 else:
-                     pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
-
--            return self.detect(pred_outs, self)
-+            pred_outs['boxes'] = decode(pred_outs['loc'], pred_outs['priors']) # decode output boxes
-
-+            pred_outs.pop('priors') # remove unused in postprocessing layers
-+            pred_outs.pop('loc') # remove unused in postprocessing layers
-+            return pred_outs
-
-
-
---
-```
+
+   .. code-block:: console
+
+      From 76deb67d4f09f29feda1a633358caa18335d9e9f Mon Sep 17 00:00:00 2001
+      From: "OpenVINO" <openvino@intel.com>
+      Date: Fri, 12 Mar 2021 00:27:35 +0300
+      Subject: [PATCH] Add export to ONNX
+
+      ---
+       eval.py                |  5 ++++-
+       utils/augmentations.py |  7 +++++--
+       yolact.py              | 29 +++++++++++++++++++----------
+       3 files changed, 28 insertions(+), 13 deletions(-)
+
+      diff --git a/eval.py b/eval.py
+      index 547bc0a..bde0680 100644
+      --- a/eval.py
+      +++ b/eval.py
+      @@ -593,9 +593,12 @@ def badhash(x):
+           return x
+
+       def evalimage(net:Yolact, path:str, save_path:str=None):
+      -    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
+      +    frame = torch.from_numpy(cv2.imread(path)).float()
+      +    if torch.cuda.is_available():
+      +        frame = frame.cuda()
+           batch = FastBaseTransform()(frame.unsqueeze(0))
+           preds = net(batch)
+      +    torch.onnx.export(net, batch, "yolact.onnx", opset_version=11)
+
+           img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
+
+      diff --git a/utils/augmentations.py b/utils/augmentations.py
+      index cc7a73a..2420603 100644
+      --- a/utils/augmentations.py
+      +++ b/utils/augmentations.py
+      @@ -623,8 +623,11 @@ class FastBaseTransform(torch.nn.Module):
+           def __init__(self):
+               super().__init__()
+
+      -        self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None]
+      -        self.std  = torch.Tensor( STD ).float().cuda()[None, :, None, None]
+      +        self.mean = torch.Tensor(MEANS).float()[None, :, None, None]
+      +        self.std  = torch.Tensor( STD ).float()[None, :, None, None]
+      +        if torch.cuda.is_available():
+      +            self.mean.cuda()
+      +            self.std.cuda()
+               self.transform = cfg.backbone.transform
+
+           def forward(self, img):
+      diff --git a/yolact.py b/yolact.py
+      index d83703b..f8c787c 100644
+      --- a/yolact.py
+      +++ b/yolact.py
+      @@ -17,19 +17,22 @@ import torch.backends.cudnn as cudnn
+       from utils import timer
+       from utils.functions import MovingAverage, make_net
+
+      -# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions.
+      -# See the bug report here: https://github.com/pytorch/pytorch/issues/17108
+      -torch.cuda.current_device()
+      -
+      -# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules
+      -use_jit = torch.cuda.device_count() <= 1
+      -if not use_jit:
+      -    print('Multiple GPUs detected! Turning off JIT.')
+      +use_jit = False
+
+       ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module
+       script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn
+
+
+      +def decode(loc, priors):
+      +    variances = [0.1, 0.2]
+      +    boxes = torch.cat((priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
+      +
+      +    boxes_result1 = boxes[:, :, :2] - boxes[:, :, 2:] / 2
+      +    boxes_result2 = boxes[:, :, 2:] + boxes_result1
+      +    boxes_result = torch.cat((boxes_result1, boxes_result2), 2)
+      +
+      +    return boxes_result
+      +
+
+       class Concat(nn.Module):
+           def __init__(self, nets, extra_params):
+      @@ -476,7 +479,10 @@ class Yolact(nn.Module):
+
+           def load_weights(self, path):
+               """ Loads weights from a compressed save file. """
+      -        state_dict = torch.load(path)
+      +        if torch.cuda.is_available():
+      +            state_dict = torch.load(path)
+      +        else:
+      +            state_dict = torch.load(path, map_location=torch.device('cpu'))
+
+               # For backward compatability, remove these (the new variable is called layers)
+               for key in list(state_dict.keys()):
+      @@ -673,8 +679,11 @@ class Yolact(nn.Module):
+                       else:
+                           pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
+
+      -            return self.detect(pred_outs, self)
+      +            pred_outs['boxes'] = decode(pred_outs['loc'], pred_outs['priors']) # decode output boxes
+
+      +            pred_outs.pop('priors') # remove unused in postprocessing layers
+      +            pred_outs.pop('loc') # remove unused in postprocessing layers
+      +            return pred_outs
+
+
+
+      --
+
+
 3. Save and close the file.
 
-## Converting a YOLACT Model to the OpenVINO IR format
+Converting a YOLACT Model to the OpenVINO IR format
+###################################################
 
 **Step 1**. Clone the GitHub repository and check out the commit:
 
 1. Clone the YOLACT repository:
-```sh
-git clone https://github.com/dbolya/yolact
-```
+
+   .. code-block:: sh
+
+      git clone https://github.com/dbolya/yolact
+
+
 2. Check out the necessary commit:
-```sh
-git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239
-```
 
-3. Set up the environment as described in `README.md`.
+   .. code-block:: sh
+
+      git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239
+
 
-**Step 2**. Download a pretrained model from the list attached in the `Evaluation` section of `README.md` document, for example `yolact_base_54_800000.pth`.
+3. Set up the environment as described in ``README.md``.
+
+**Step 2**. Download a pre-trained model from the list attached in the ``Evaluation`` section of ``README.md`` document, for example ``yolact_base_54_800000.pth``.
 
 **Step 3**. Export the model to ONNX format.
 
-1. Apply the `YOLACT_onnx_export.patch` patch to the repository. Refer to the <a href="#patch-file-yolact">Create a Patch File</a> instructions if you do not have it:
-```sh
-git apply /path/to/patch/YOLACT_onnx_export.patch
-```
+1. Apply the `YOLACT_onnx_export.patch` patch to the repository. Refer to the :ref:`Create a Patch File <patch-file-yolact>` instructions if you do not have it:
+
+   .. code-block:: sh
+
+      git apply /path/to/patch/YOLACT_onnx_export.patch
+
 
 2. Evaluate the YOLACT model to export it to ONNX format:
 
-```sh
-python3 eval.py \
-    --trained_model=/path/to/yolact_base_54_800000.pth \
-    --score_threshold=0.3 \
-    --top_k=10 \
-    --image=/path/to/image.jpg \
-    --cuda=False
-```
+   .. code-block:: sh
+
+      python3 eval.py \
+          --trained_model=/path/to/yolact_base_54_800000.pth \
+          --score_threshold=0.3 \
+          --top_k=10 \
+          --image=/path/to/image.jpg \
+          --cuda=False
 
-3. The script may fail, but you should get `yolact.onnx` file.
+
+3. The script may fail, but you should get ``yolact.onnx`` file.
 
 **Step 4**. Convert the model to the IR:
-```sh
-mo --input_model /path/to/yolact.onnx
-```
+
+.. code-block:: sh
+
+   mo --input_model /path/to/yolact.onnx
+
 
 **Step 4**. Embed input preprocessing into the IR:
 
 To get performance gain by offloading to the OpenVINO application of mean/scale values and RGB->BGR conversion, use the following options of the Model Optimizer (MO):
 
 * If the backbone of the model is Resnet50-FPN or Resnet101-FPN, use the following MO command line:
-```sh
-mo \
-    --input_model /path/to/yolact.onnx \
-    --reverse_input_channels \
-    --mean_values "[123.68, 116.78, 103.94]" \
-    --scale_values "[58.40, 57.12, 57.38]"
-```
+
+  .. code-block:: sh
+
+     mo \
+         --input_model /path/to/yolact.onnx \
+         --reverse_input_channels \
+         --mean_values "[123.68, 116.78, 103.94]" \
+         --scale_values "[58.40, 57.12, 57.38]"
+
 
 * If the backbone of the model is Darknet53-FPN, use the following MO command line:
-```sh
-mo \
-    --input_model /path/to/yolact.onnx \
-    --reverse_input_channels \
-    --scale 255
-```
+
+  .. code-block:: sh
+
+     mo \
+         --input_model /path/to/yolact.onnx \
+         --reverse_input_channels \
+         --scale 255
+
+
+@endsphinxdirective

From 86da15e621b5078f81ec034538272ffd296ed3ad Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 3 Apr 2023 18:14:33 +0800
Subject: [PATCH 221/296] enable new property ov::hint::use_cpu_pinning
 (#16383)

* enable ov::hint::use_cpu_pinning

* update test case for comments

* update header file

* update header file

* Delete cpu_streams_calculation.hpp

* Revert "Delete cpu_streams_calculation.hpp"

This reverts commit a1074ca843dc4535346b46a30919b6412449e636.

* update config name

* fix code styple issue

* update for merge conflict
---
 .../c/include/openvino/c/ov_property.h        |  8 +++++
 src/bindings/c/src/ov_property.cpp            |  1 +
 src/bindings/c/tests/ov_core_test.cpp         | 29 +++++++++++++++++--
 .../pyopenvino/core/properties/properties.cpp |  1 +
 .../tests/test_runtime/test_properties.py     | 12 ++++++++
 .../include/openvino/runtime/properties.hpp   | 20 +++++++++++--
 src/plugins/intel_cpu/src/config.cpp          | 11 +++++++
 src/plugins/intel_cpu/src/config.h            |  2 ++
 .../intel_cpu/src/cpu_streams_calculation.hpp |  3 +-
 src/plugins/intel_cpu/src/exec_network.cpp    |  4 +++
 src/plugins/intel_cpu/src/plugin.cpp          |  4 +++
 .../behavior/ov_plugin/core_integration.cpp   |  2 ++
 .../behavior/ov_plugin/core_integration.hpp   | 18 ++++++++++++
 13 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 743233df8fec80..192665fbb018ae 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -119,6 +119,14 @@ ov_property_key_affinity;
 OPENVINO_C_VAR(const char*)
 ov_property_key_inference_num_threads;
 
+/**
+ * @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
+ * during inference
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_hint_use_cpu_pinning;
+
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using hyper threading processors during CPU inference
  * @ingroup ov_property_c_api
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 1506d94c1f1e08..999162df72048a 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,6 +23,7 @@ const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
+const char* ov_property_key_hint_use_cpu_pinning = "USE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
 const char* ov_property_key_hint_use_hyper_threading = "USE_HYPER_THREADING";
 const char* ov_property_key_hint_inference_precision = "INFERENCE_PRECISION_HINT";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 2b3a2143652182..b9045ffeca4e6d 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -260,6 +260,21 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STRNE(invalid_mode, ret);
     ov_free(ret);
 
+    const char* key_pin = ov_property_key_hint_use_cpu_pinning;
+    const char* val_pin = "YES";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, val_pin));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_pin, &ret));
+    EXPECT_STREQ(val_pin, ret);
+    ov_free(ret);
+
+    const char* invalid_val = "INVALID_VAL";
+    OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, invalid_val));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_pin, &ret));
+    EXPECT_STRNE(invalid_val, ret);
+    ov_free(ret);
+
     const char* key_type = ov_property_key_hint_scheduling_core_type;
     const char* val_type = "PCORE_ONLY";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
@@ -268,10 +283,11 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* invalid_val = "INVALID_VAL";
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
+    EXPECT_STRNE(invalid_val, ret);
+    ov_free(ret);
 
     const char* key_ht = ov_property_key_hint_use_hyper_threading;
     const char* val_ht = "YES";
@@ -284,7 +300,6 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, invalid_val));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
-
     EXPECT_STRNE(invalid_val, ret);
     ov_free(ret);
 
@@ -305,12 +320,21 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     EXPECT_STREQ(affinity, ret);
     ov_free(ret);
 
+    const char* key_pin = ov_property_key_hint_use_cpu_pinning;
+    const char* val_pin = "YES";
+    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, val_pin));
+    ret = nullptr;
+    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_pin, &ret));
+    EXPECT_STREQ(val_pin, ret);
+    ov_free(ret);
+
     const char* key_type = ov_property_key_hint_scheduling_core_type;
     const char* val_type = "PCORE_ONLY";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
     EXPECT_STREQ(val_type, ret);
+    ov_free(ret);
 
     const char* key_ht = ov_property_key_hint_use_hyper_threading;
     const char* val_ht = "YES";
@@ -318,7 +342,6 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     ret = nullptr;
     OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
     EXPECT_STREQ(val_ht, ret);
-
     ov_free(ret);
 
     ov_core_free(core);
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index d5002d382cf571..923ae8eb88b570 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -72,6 +72,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::inference_precision, "inference_precision");
     wrap_property_RW(m_hint, ov::hint::model_priority, "model_priority");
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
+    wrap_property_RW(m_hint, ov::hint::use_cpu_pinning, "use_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
     wrap_property_RW(m_hint, ov::hint::use_hyper_threading, "use_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index cc8af6d5429837..0e96ffedf598ba 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -225,6 +225,16 @@ def test_properties_ro(ov_property_ro, expected_value):
             "PERFORMANCE_HINT",
             ((properties.hint.PerformanceMode.UNDEFINED, properties.hint.PerformanceMode.UNDEFINED),),
         ),
+        (
+            properties.hint.use_cpu_pinning,
+            "USE_CPU_PINNING",
+            (
+                (True, True),
+                (False, False),
+                (1, True),
+                (0, False),
+            ),
+        ),
         (
             properties.hint.scheduling_core_type,
             "SCHEDULING_CORE_TYPE",
@@ -450,6 +460,7 @@ def test_single_property_setting(device):
                 properties.affinity(properties.Affinity.NONE),
                 properties.hint.inference_precision(Type.f32),
                 properties.hint.performance_mode(properties.hint.PerformanceMode.LATENCY),
+                properties.hint.use_cpu_pinning(True),
                 properties.hint.scheduling_core_type(properties.hint.SchedulingCoreType.PCORE_ONLY),
                 properties.hint.use_hyper_threading(True),
                 properties.hint.num_requests(12),
@@ -464,6 +475,7 @@ def test_single_property_setting(device):
             properties.affinity(): properties.Affinity.NONE,
             properties.hint.inference_precision(): Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
+            properties.hint.use_cpu_pinning(): True,
             properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY,
             properties.hint.use_hyper_threading(): True,
             properties.hint.num_requests(): 12,
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index ed70eb8d438f10..cfdb47866915df 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -409,10 +409,26 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
 /**
- * @brief This property allows hyper threading during inference.
+ * @brief This property allows CPU threads pinning during inference.
  * @ingroup ov_runtime_cpp_prop_api
  *
- * Developer can use this property to use or not use hyper threading during inference. If user does not explicitly set
+ * Developer can use this property to use or not use CPU threads pinning during inference. If user does not explicitly
+ * set value for this property, OpenVINO may choose any desired value based on internal logic.
+ *
+ * The following code is example to use this property.
+ *
+ * @code
+ * ie.set_property(ov::hint::use_cpu_pinning(true));
+ * ie.set_property(ov::hint::use_cpu_pinning(false));
+ * @endcode
+ */
+static constexpr Property<bool> use_cpu_pinning{"USE_CPU_PINNING"};
+
+/**
+ * @brief This property define if using hyper threading during inference.
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * Developer can use this property to use or not use CPU pinning during inference. If user does not explicitly set
  * value for this property, OpenVINO may choose any desired value based on internal logic.
  *
  * The following code is example to use this property.
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 50844e7557135c..10a0e3f04cfea3 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -79,6 +79,17 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             streamExecutorConfig.SetConfig(key, val);
         } else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) {
             perfHintsConfig.SetConfig(key, val);
+        } else if (key == ov::hint::use_cpu_pinning.name()) {
+            if (val == PluginConfigParams::YES) {
+                useCpuPinning = true;
+                changedCpuPinning = true;
+            } else if (val == PluginConfigParams::NO) {
+                useCpuPinning = false;
+                changedCpuPinning = true;
+            } else {
+                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::use_cpu_pinning.name()
+                           << ". Expected only true/false." << std::endl;
+            }
         } else if (key == ov::hint::scheduling_core_type.name()) {
             const auto core_type = ov::util::from_string(val, ov::hint::scheduling_core_type);
             if (core_type == ov::hint::SchedulingCoreType::ANY_CORE ||
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 319ed1d0cb0767..497603f3156b0d 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -51,6 +51,8 @@ struct Config {
     size_t rtCacheCapacity = 5000ul;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
+    bool useCpuPinning = true;
+    bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
     bool useHyperThreading = true;
     bool changedHyperThreading = false;
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index 98d1de7a7a8b03..daa52cc79776df 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -17,7 +17,6 @@
 
 namespace ov {
 namespace intel_cpu {
-
 /**
  * @brief      Generate streams information table according to processors type table.
  * @param[in]  input_streams is the targeted number of streams set by user via ov::num_streams or hints.
@@ -84,4 +83,4 @@ std::pair<std::string, StreamCfg> get_num_streams(const int streams,
                                                   const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig);
 
 }  // namespace intel_cpu
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index e1e86bc4031256..ae81a3e1083778 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -313,6 +313,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::hint::performance_mode.name()),
             RO_property(ov::hint::execution_mode.name()),
             RO_property(ov::hint::num_requests.name()),
+            RO_property(ov::hint::use_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
             RO_property(ov::hint::use_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
@@ -355,6 +356,9 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::use_cpu_pinning.name()) {
+        const bool use_pin = config.useCpuPinning;
+        return decltype(ov::hint::use_cpu_pinning)::value_type(use_pin);
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = config.schedulingCoreType;
         return core_type;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 9edd708453e804..1d75a0e5f1e251 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -596,6 +596,9 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
+    } else if (name == ov::hint::use_cpu_pinning) {
+        const bool pin_value = engConfig.useCpuPinning;
+        return decltype(ov::hint::use_cpu_pinning)::value_type(pin_value);
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
@@ -693,6 +696,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::hint::performance_mode.name()),
                                                     RW_property(ov::hint::execution_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
+                                                    RW_property(ov::hint::use_cpu_pinning.name()),
                                                     RW_property(ov::hint::scheduling_core_type.name()),
                                                     RW_property(ov::hint::use_hyper_threading.name()),
                                                     RW_property(ov::device::id.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index af5e0dc09ee045..866b6d51155b49 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -80,6 +80,8 @@ INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassGetAvailableDevices, OVClassGetAvailableDevices,
         ::testing::Values("CPU"));
 
+INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest, OVClassSetUseCpuPinningHintConfigTest, ::testing::Values("CPU"));
+
 INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassSetModelPriorityConfigTest, OVClassSetModelPriorityConfigTest,
         ::testing::Values("MULTI", "AUTO"));
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index fe62869619a780..ed21ea054cb37d 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -121,6 +121,7 @@ using OVClassLoadNetworkTest = OVClassQueryNetworkTest;
 using OVClassSetGlobalConfigTest = OVClassBaseTestP;
 using OVClassSetModelPriorityConfigTest = OVClassBaseTestP;
 using OVClassSetExecutionModeHintConfigTest = OVClassBaseTestP;
+using OVClassSetUseCpuPinningHintConfigTest = OVClassBaseTestP;
 using OVClassSetSchedulingCoreTypeHintConfigTest = OVClassBaseTestP;
 using OVClassSetUseHyperThreadingHintConfigTest = OVClassBaseTestP;
 using OVClassSetTBBForceTerminatePropertyTest = OVClassBaseTestP;
@@ -611,6 +612,23 @@ TEST_P(OVClassSetExecutionModeHintConfigTest, SetConfigNoThrow) {
     ASSERT_EQ(ov::hint::ExecutionMode::PERFORMANCE, ie.get_property(target_device, ov::hint::execution_mode));
 }
 
+TEST_P(OVClassSetUseCpuPinningHintConfigTest, SetConfigNoThrow) {
+    ov::Core ie = createCoreWithTemplate();
+
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::use_cpu_pinning);
+
+    bool defaultMode{};
+    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::use_cpu_pinning));
+    (void)defaultMode;
+
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_cpu_pinning));
+
+    ie.set_property(target_device, ov::hint::use_cpu_pinning(false));
+    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::use_cpu_pinning));
+    ie.set_property(target_device, ov::hint::use_cpu_pinning(true));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_cpu_pinning));
+}
+
 TEST_P(OVClassSetSchedulingCoreTypeHintConfigTest, SetConfigNoThrow) {
     ov::Core ie = createCoreWithTemplate();
 

From b7b788917d561051d6e6a2381407dedeb1fbcc56 Mon Sep 17 00:00:00 2001
From: River Li <river.li@intel.com>
Date: Mon, 3 Apr 2023 18:42:10 +0800
Subject: [PATCH 222/296] Fix double free in snippets (#16702)

---
 docs/snippets/gpu/context_sharing_va_c.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/snippets/gpu/context_sharing_va_c.cpp b/docs/snippets/gpu/context_sharing_va_c.cpp
index dd22e330bb5dcf..4501087a08c608 100644
--- a/docs/snippets/gpu/context_sharing_va_c.cpp
+++ b/docs/snippets/gpu/context_sharing_va_c.cpp
@@ -147,7 +147,6 @@ int main() {
     ov_infer_request_free(infer_request);
     ov_compiled_model_free(compiled_model);
     ov_model_free(model);
-    ov_model_free(new_model);
     ov_remote_context_free(shared_va_context);
     ov_core_free(core);
 

From f7d15e12c8344bccf97a99ca64a3e347313e29e7 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Mon, 3 Apr 2023 15:34:06 +0400
Subject: [PATCH 223/296] [GPU] Refactor dimensions jitter and max rank related
 code (#16603)

* [GPU] Refactor dimensions jitter and max rank related code
---
 .../include/intel_gpu/runtime/format.hpp      |   1 +
 .../include/intel_gpu/runtime/layout.hpp      |   1 +
 src/plugins/intel_gpu/src/graph/gather_nd.cpp |  10 +-
 .../graph_optimizer/add_required_reorders.cpp |  13 +--
 .../graph/graph_optimizer/compile_graph.cpp   |   4 +-
 .../graph/graph_optimizer/handle_reshape.cpp  |   6 +-
 .../graph_optimizer/prepare_buffer_fusing.cpp |   4 +-
 .../graph_optimizer/prepare_quantization.cpp  |   1 -
 .../src/graph/impls/ocl/shape_of.cpp          |   4 +-
 .../intel_gpu/src/graph/layout_optimizer.cpp  |  22 +---
 .../intel_gpu/src/graph/primitive_inst.cpp    |   9 +-
 .../intel_gpu/src/kernel_selector/jitter.cpp  | 108 +++++++-----------
 .../kernel_selector/kernel_selector_utils.h   |  18 +++
 .../arg_max_min/arg_max_min_kernel_axis.cpp   |  17 +--
 .../broadcast/broadcast_kernel_base.cpp       |   2 +-
 .../kernels/cum_sum/cum_sum_kernel_ref.cpp    |   2 +-
 .../kernels/eltwise/eltwise_kernel_base.cpp   |  10 +-
 .../kernels/eltwise/eltwise_kernel_ref.cpp    |  10 +-
 .../fully_connected_kernel_base.cpp           |   9 +-
 .../kernels/gemm/gemm_kernel_tiled_opt.cpp    |  10 +-
 .../kernels/mvn/mvn_kernel_bfyx_opt.cpp       |  15 +--
 .../non_zero/count_nonzero_kernel_ref.cpp     |  16 +--
 .../non_zero/gather_nonzero_kernel_ref.cpp    |  10 +-
 .../kernels/permute/permute_kernel_ref.cpp    |   9 +-
 .../kernels/reduce/reduce_kernel_base.cpp     |  36 ++----
 .../scatter_nd_update_kernel_ref.cpp          |   8 +-
 .../scatter_update_kernel_ref.cpp             |   4 +-
 .../kernels/softmax/softmax_kernel_bf.cpp     |  14 +--
 .../src/kernel_selector/tensor_type.h         |   2 +
 .../intel_gpu/src/plugin/infer_request.cpp    |   5 +-
 .../intel_gpu/src/plugin/ops/parameter.cpp    |  11 +-
 .../intel_gpu/src/plugin/ops/result.cpp       |  12 +-
 src/plugins/intel_gpu/src/runtime/format.cpp  |   4 +
 src/plugins/intel_gpu/src/runtime/layout.cpp  |   5 +-
 34 files changed, 162 insertions(+), 250 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
index f7443424e6cae4..b181ed483920b2 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
@@ -330,6 +330,7 @@ struct format {
     }
 
     static format get_default_format(size_t rank, bool is_weights = false, bool is_grouped = false);
+    static bool is_default_format(type fmt);
 
     static format adjust_to_rank(format fmt, size_t new_rank);
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
index 29a2357178d453..3f1eb260061ce0 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
@@ -527,6 +527,7 @@ struct layout {
     // for smaller buffer which, currently, should always be performed
     bool identical(const layout& other) const;
 
+    static size_t max_rank() { return 6; }
     static ov::PartialShape transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt);
 
     size_t hash() const {
diff --git a/src/plugins/intel_gpu/src/graph/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/gather_nd.cpp
index e5295573e404c3..06c92f8295f12d 100644
--- a/src/plugins/intel_gpu/src/graph/gather_nd.cpp
+++ b/src/plugins/intel_gpu/src/graph/gather_nd.cpp
@@ -59,15 +59,7 @@ layout gather_nd_inst::calc_output_layout(gather_nd_node const& node, kernel_imp
         }
     }
 
-    auto output_format = cldnn::format::any;
-    if (final_output_sizes.size() <= 4) {
-        output_format = cldnn::format::bfyx;
-    } else if (final_output_sizes.size() == 5) {
-        output_format = cldnn::format::bfzyx;
-    } else {
-        output_format = cldnn::format::bfwzyx;
-    }
-
+    auto output_format = format::get_default_format(final_output_sizes.size());
     auto output_sizes_tensor = tensor(tensor(final_output_sizes).sizes(output_format));
     auto padding = op->output_paddings[0];
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
index 9c866ba343fe56..9086912fcd4469 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@@ -219,17 +219,12 @@ void add_required_reorders::run(program& p) {
                 max_in_dims = std::max(cldnn::format::dimension(node.first->get_output_layout().format), max_in_dims);
             }
             // This list of preferred layouts has been selected arbitrary due to developers' experience
+            preferred_layout_formats = { cldnn::format::get_default_format(max_in_dims) };
             if (max_in_dims == 5) {
-                preferred_layout_formats = {
-                    cldnn::format::bfzyx,
-                    cldnn::format::bzyxf,
-                };
+                preferred_layout_formats.push_back(cldnn::format::bzyxf);
             } else if (max_in_dims == 4) {
-                preferred_layout_formats = {
-                    cldnn::format::bfyx,
-                    cldnn::format::yxfb,
-                    cldnn::format::byxf,
-                };
+                preferred_layout_formats.push_back(cldnn::format::yxfb);
+                preferred_layout_formats.push_back(cldnn::format::byxf);
             }
 
             if (original_layout.is_dynamic() && usr->type()->does_dynamic_implementation_exist(*usr)) {
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
index 698c2ad909bc6f..459d4be84e1749 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@@ -64,9 +64,7 @@ void compile_graph::run(program& p) {
             can_select_impl = false;
         }
 
-        bool is_planar = node->get_output_layout().format == format::bfyx ||
-                         node->get_output_layout().format == format::bfzyx ||
-                         node->get_output_layout().format == format::bfwzyx;
+        bool is_planar = format::is_default_format(node->get_output_layout().format);
 
         if (node->is_dynamic() && !is_planar)
             can_select_impl = false;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
index ba09ec0266523d..f8bff8cbf10534 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
@@ -124,11 +124,7 @@ void handle_reshape::run(program& p) {
                     auto& reorder_reshape_node = reorder_reshape_nodes[reshape_reorder_id];
                     auto reshape_in_layout = reorder_node->get_output_layout();
                     auto dims = cldnn::format::dimension(reshape_in_layout.format);
-                    auto format = cldnn::format::bfyx;
-                    if (dims == 5)
-                        format = cldnn::format::bfzyx;
-                    else if (dims == 6)
-                        format = cldnn::format::bfwzyx;
+                    auto format = cldnn::format::get_default_format(dims);
                     auto reshape_input = std::make_shared<reorder>(
                         "reorder:_reshape_input_" + reorder_node->id() + "_" + reorder_reshape_node->id(),
                         input_node.id(),
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index 8ce063d694b76d..8bbf2aa3b73760 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -325,9 +325,7 @@ void prepare_buffer_fusing::run(program& p) {
     */
     auto can_optimize = [](const program_node* node) {
         bool is_dynamic = node->get_output_layout().is_dynamic();
-        bool is_planar = node->get_output_layout().format == format::bfyx ||
-                         node->get_output_layout().format == format::bfzyx ||
-                         node->get_output_layout().format == format::bfwzyx;
+        bool is_planar = format::is_default_format(node->get_output_layout().format);
         bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
         // The condition below check only output layout as cases like
         // (dyn_shape) -> reshape -> (static_shape) -> some_static_primitive
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
index 69a30586069c16..a70285f27ab0c2 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
@@ -4,7 +4,6 @@
 
 #include "pooling_inst.h"
 #include "quantize_inst.h"
-#include "reshape_inst.h"
 #include "reorder_inst.h"
 #include "binary_convolution_inst.h"
 #include "eltwise_inst.h"
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
index f04dcf33323987..9a5d47246bbdfa 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
@@ -38,10 +38,10 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
         auto updated_impl_params = canonicalize_fused_shapes(impl_params);
 
         auto& input_layout = updated_impl_params.input_layouts[0];
-        input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape(), 6));
+        input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape(), layout::max_rank()));
 
         auto& output_layout = updated_impl_params.output_layouts[0];
-        output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape(), 6));
+        output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape(), layout::max_rank()));
 
         return updated_impl_params;
     }
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 9bae95843e8868..957fd24139d21c 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1629,13 +1629,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
             }
         } else if (only_gemm_users(node)) {
             // TODO: Gemm is not supporting fsv layouts
-            if (node.get_output_layout().format.dimension() == 6) {
-                expected = format::bfwzyx;
-            } else if (node.get_output_layout().format.dimension() == 5) {
-                expected = format::bfzyx;
-            } else if (node.get_output_layout().format.dimension() == 4) {
-                expected = format::bfyx;
-            }
+            expected = format::get_default_format(node.get_output_layout().format.dimension());
             // TODO: check other types for first conv
         } else if (layout.is_static() && layout.format.spatial_num() == 2 &&
                   (layout.data_type == data_types::i8 || layout.data_type == data_types::u8) &&
@@ -1677,13 +1671,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
             expected = node.get_output_layout().format;
         }
     } else if (node.is_type<reshape>()) {
-        if (node.get_output_layout().format.dimension() == 6) {
-            expected = format::bfwzyx;
-        } else if (node.get_output_layout().format.dimension() == 5) {
-            expected = format::bfzyx;
-        } else if (node.get_output_layout().format.dimension() == 4) {
-            expected = format::bfyx;
-        }
+        expected = format::get_default_format(node.get_output_layout().format.dimension());
     } else if (node.is_type<deconvolution>()) {
         auto& deconv_node = node.as<deconvolution>();
         auto weights_layout = deconv_node.weights().get_output_layout().convert_to_weights_layout(deconv_node.get_primitive()->grouped_weights_shape);
@@ -1717,10 +1705,8 @@ format layout_optimizer::get_preferred_format(program_node& node) {
         auto& reduce_node = node.as<reduce>();
         auto input_layout = reduce_node.input().get_output_layout();
         if (!use_onednn_impls && input_layout.is_dynamic()) {
-            if (input_layout.format.dimension() == 6) {
-                expected = format::bfwzyx;
-            } else if (input_layout.format.dimension() == 5) {
-                expected = format::bfzyx;
+            if (input_layout.format.dimension() > 4) {
+                expected = format::get_default_format(input_layout.format.dimension());
             } else if (input_layout.format.dimension() == 4) {
                 expected = format::any;
             }
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index b447e93c201469..f5f41bf074853a 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -326,7 +326,7 @@ bool primitive_inst::update_impl() {
                 auto pshape = params.get_input_layout(i).get_partial_shape();
                 auto input_shape = layout::transform(pshape,
                                                      format::get_default_format(pshape.size()),
-                                                     format::bfwzyx).to_shape();
+                                                     format::get_default_format(layout::max_rank())).to_shape();
 
                 for (size_t j = 0; j < input_shape.size(); j++)
                     lock[offset++] = static_cast<int32_t>(input_shape[j]);
@@ -338,7 +338,7 @@ bool primitive_inst::update_impl() {
                 auto pshape = params.get_output_layout(i).get_partial_shape();
                 auto output_shape = layout::transform(pshape,
                                                       format::get_default_format(pshape.size()),
-                                                      format::bfwzyx).to_shape();
+                                                      format::get_default_format(layout::max_rank())).to_shape();
 
                 for (size_t j = 0; j < output_shape.size(); j++)
                     lock[offset++] = static_cast<int32_t>(output_shape[j]);
@@ -625,10 +625,9 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
             _dynamic_impl = _impl->clone();
             // Actual shape info layout is the following:
             // input_0 -> input_1, ..., fused_dep_0, fused_dep1, ..., output_0, output_1, ...
-            // For each tensor we save 6 dimensions if [bfwzyx] order
+            // For each tensor we save max_rank dimensions in [bfvuwzyx] order
             const int64_t buffers_count = _node->get_dependencies().size() + _node->get_outputs_count();
-            const size_t tensor_dims_count = 6;
-            const int64_t shape_elements = buffers_count * tensor_dims_count;
+            const int64_t shape_elements = buffers_count * layout::max_rank();
             _shape_info_memory = _network.get_engine().allocate_memory(layout{{shape_elements}, data_types::i32, format::bfyx});
         }
     }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
index 9a39bc2001cac1..37e5354a70076f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
@@ -5,6 +5,7 @@
 #include <cinttypes>
 
 #include "jitter.h"
+#include "kernel_selector_utils.h"
 #include "tensor_type.h"
 #include <string>
 #include <memory>
@@ -215,8 +216,9 @@ std::string toCodeString(double val) {
     return buf;
 }
 
-std::string toShapeInfoString(size_t arg_idx, size_t data_idx_at_6d, bool is_output, size_t num_of_inputs) {
-    size_t actual_idx = (num_of_inputs * 6 * (is_output ? 1 : 0)) + (6 * arg_idx) + data_idx_at_6d;
+std::string toShapeInfoString(size_t arg_idx, size_t data_idx, bool is_output, size_t num_of_inputs) {
+    auto dims_rank = DataTensor::max_rank();
+    size_t actual_idx = (num_of_inputs * dims_rank * (is_output ? 1 : 0)) + (dims_rank * arg_idx) + data_idx;
     snprintf(buf, sizeof(buf), "shape_info[%zu]", actual_idx);
     return buf;
 }
@@ -317,83 +319,53 @@ class DataTensorJitConstant : public TensorBaseTJitConstant<Datatype, DataLayout
 JitDefinitions DataTensorJitConstant::GetDefinitions() const {
     JitDefinitions baseDefinitions = TensorBaseTJitConstant::GetDefinitions(_tensor);
 
-    const size_t idx_offset = _dyn_array_index * 6; // 6D max
     JitDefinitions definitions{};
+
+    DimensionAccessHelper dims(_tensor, _dyn_array_index);
+    DimensionAccessHelper dims_padded(_tensor, _dyn_array_index, true);
+
+    definitions = {
+        {_name + "_SIZE_X", dims.x},
+        {_name + "_SIZE_Y", dims.y},
+        {_name + "_SIZE_Z", dims.z},
+        {_name + "_SIZE_W", dims.w},
+        {_name + "_FEATURE_NUM", dims.f},
+        {_name + "_BATCH_NUM", dims.b},
+        {_name + "_PAD_BEFORE_SIZE_X", toCodeString(_tensor.X().pad.before)},
+        {_name + "_PAD_BEFORE_SIZE_Y", toCodeString(_tensor.Y().pad.before)},
+        {_name + "_PAD_BEFORE_SIZE_Z", toCodeString(_tensor.Z().pad.before)},
+        {_name + "_PAD_BEFORE_SIZE_W", toCodeString(_tensor.W().pad.before)},
+        {_name + "_PAD_BEFORE_FEATURE_NUM", toCodeString(_tensor.Feature().pad.before)},
+        {_name + "_PAD_BEFORE_BATCH_NUM", toCodeString(_tensor.Batch().pad.before)},
+        {_name + "_PAD_AFTER_SIZE_X", toCodeString(_tensor.X().pad.after)},
+        {_name + "_PAD_AFTER_SIZE_Y", toCodeString(_tensor.Y().pad.after)},
+        {_name + "_PAD_AFTER_SIZE_Z", toCodeString(_tensor.Z().pad.after)},
+        {_name + "_PAD_AFTER_SIZE_W", toCodeString(_tensor.W().pad.after)},
+        {_name + "_PAD_AFTER_FEATURE_NUM", toCodeString(_tensor.Feature().pad.after)},
+        {_name + "_PAD_AFTER_BATCH_NUM", toCodeString(_tensor.Batch().pad.after)},
+    };
+
     if (_tensor.is_dynamic()) {
-        auto x = toCodeString(_tensor.X(), idx_offset + 5);
-        auto y = toCodeString(_tensor.Y(), idx_offset + 4);
-        auto z = toCodeString(_tensor.Z(), idx_offset + 3);
-        auto w = toCodeString(_tensor.W(), idx_offset + 2);
-        auto f = toCodeString(_tensor.Feature(), idx_offset + 1);
-        auto b = toCodeString(_tensor.Batch(), idx_offset + 0);
-
-        auto x_padded = toCodeString(_tensor.X(), idx_offset + 5, true);
-        auto y_padded = toCodeString(_tensor.Y(), idx_offset + 4, true);
-        auto z_padded = toCodeString(_tensor.Z(), idx_offset + 3, true);
-        auto w_padded = toCodeString(_tensor.W(), idx_offset + 2, true);
-        auto f_padded = toCodeString(_tensor.Feature(), idx_offset + 1, true);
-        auto b_padded = toCodeString(_tensor.Batch(), idx_offset + 0, true);
-
-        definitions = {
-            {_name + "_SIZE_X", x},
-            {_name + "_SIZE_Y", y},
-            {_name + "_SIZE_Z", z},
-            {_name + "_SIZE_W", w},
-            {_name + "_FEATURE_NUM", f},
-            {_name + "_BATCH_NUM", b},
-            {_name + "_PAD_BEFORE_SIZE_X", toCodeString(_tensor.X().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_Y", toCodeString(_tensor.Y().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_Z", toCodeString(_tensor.Z().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_W", toCodeString(_tensor.W().pad.before)},
-            {_name + "_PAD_BEFORE_FEATURE_NUM", toCodeString(_tensor.Feature().pad.before)},
-            {_name + "_PAD_BEFORE_BATCH_NUM", toCodeString(_tensor.Batch().pad.before)},
-            {_name + "_PAD_AFTER_SIZE_X", toCodeString(_tensor.X().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_Y", toCodeString(_tensor.Y().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_Z", toCodeString(_tensor.Z().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_W", toCodeString(_tensor.W().pad.after)},
-            {_name + "_PAD_AFTER_FEATURE_NUM", toCodeString(_tensor.Feature().pad.after)},
-            {_name + "_PAD_AFTER_BATCH_NUM", toCodeString(_tensor.Batch().pad.after)},
-        };
         if (_tensor.GetLayout() == DataLayout::bf ||
             _tensor.GetLayout() == DataLayout::bfyx ||
             _tensor.GetLayout() == DataLayout::bfzyx ||
             _tensor.GetLayout() == DataLayout::bfwzyx) {
             definitions.push_back({_name + "_X_PITCH", "1"});
-            definitions.push_back({_name + "_Y_PITCH", x_padded});
-            definitions.push_back({_name + "_Z_PITCH", toVectorMulString({x_padded, y_padded})});
-            definitions.push_back({_name + "_W_PITCH", toVectorMulString({x_padded, y_padded, z_padded})});
-            definitions.push_back({_name + "_FEATURE_PITCH", toVectorMulString({x_padded, y_padded, z_padded, w_padded})});
-            definitions.push_back({_name + "_BATCH_PITCH", toVectorMulString({x_padded, y_padded, z_padded, w_padded, f_padded})});
+            definitions.push_back({_name + "_Y_PITCH", dims_padded.x});
+            definitions.push_back({_name + "_Z_PITCH", toVectorMulString({dims_padded.x, dims_padded.y})});
+            definitions.push_back({_name + "_W_PITCH", toVectorMulString({dims_padded.x, dims_padded.y, dims_padded.z})});
+            definitions.push_back({_name + "_FEATURE_PITCH", toVectorMulString({dims_padded.x, dims_padded.y, dims_padded.z, dims_padded.w})});
+            definitions.push_back({_name + "_BATCH_PITCH", toVectorMulString({dims_padded.x, dims_padded.y, dims_padded.z, dims_padded.w, dims_padded.f})});
         } else {
             OPENVINO_ASSERT(false, "[GPU] Jitter couldn't generate dynamic pitches for given layout");
         }
     } else {
-        definitions = {
-            {_name + "_SIZE_X", toCodeString(_tensor.X().v)},
-            {_name + "_SIZE_Y", toCodeString(_tensor.Y().v)},
-            {_name + "_SIZE_Z", toCodeString(_tensor.Z().v)},
-            {_name + "_SIZE_W", toCodeString(_tensor.W().v)},
-            {_name + "_FEATURE_NUM", toCodeString(_tensor.Feature().v)},
-            {_name + "_BATCH_NUM", toCodeString(_tensor.Batch().v)},
-            {_name + "_X_PITCH", toCodeString(_tensor.X().pitch)},
-            {_name + "_Y_PITCH", toCodeString(_tensor.Y().pitch)},
-            {_name + "_Z_PITCH", toCodeString(_tensor.Z().pitch)},
-            {_name + "_W_PITCH", toCodeString(_tensor.W().pitch)},
-            {_name + "_FEATURE_PITCH", toCodeString(_tensor.Feature().pitch)},
-            {_name + "_BATCH_PITCH", toCodeString(_tensor.Batch().pitch)},
-            {_name + "_PAD_BEFORE_SIZE_X", toCodeString(_tensor.X().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_Y", toCodeString(_tensor.Y().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_Z", toCodeString(_tensor.Z().pad.before)},
-            {_name + "_PAD_BEFORE_SIZE_W", toCodeString(_tensor.W().pad.before)},
-            {_name + "_PAD_BEFORE_FEATURE_NUM", toCodeString(_tensor.Feature().pad.before)},
-            {_name + "_PAD_BEFORE_BATCH_NUM", toCodeString(_tensor.Batch().pad.before)},
-            {_name + "_PAD_AFTER_SIZE_X", toCodeString(_tensor.X().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_Y", toCodeString(_tensor.Y().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_Z", toCodeString(_tensor.Z().pad.after)},
-            {_name + "_PAD_AFTER_SIZE_W", toCodeString(_tensor.W().pad.after)},
-            {_name + "_PAD_AFTER_FEATURE_NUM", toCodeString(_tensor.Feature().pad.after)},
-            {_name + "_PAD_AFTER_BATCH_NUM", toCodeString(_tensor.Batch().pad.after)},
-        };
+        definitions.push_back({_name + "_X_PITCH", toCodeString(_tensor.X().pitch)});
+        definitions.push_back({_name + "_Y_PITCH", toCodeString(_tensor.Y().pitch)});
+        definitions.push_back({_name + "_Z_PITCH", toCodeString(_tensor.Z().pitch)});
+        definitions.push_back({_name + "_W_PITCH", toCodeString(_tensor.W().pitch)});
+        definitions.push_back({_name + "_FEATURE_PITCH", toCodeString(_tensor.Feature().pitch)});
+        definitions.push_back({_name + "_BATCH_PITCH", toCodeString(_tensor.Batch().pitch)});
     }
 
     auto is_common_nd_layout = [](std::vector<Tensor::DataChannelName> common_channels, DataLayout l) -> bool {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h
index b9efaf6506b291..bb138df31608ff 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h
@@ -12,6 +12,24 @@ struct weight_bias_params;
 struct optional_params;
 struct WeightsReorderParams;
 
+struct DimensionAccessHelper {
+    static size_t dynamic_dimension_offset(Tensor::DataChannelName c, size_t tensor_id = 0) {
+        const auto shape_info_layout = DataLayout::bfwzyx;
+        auto idx = DataTensor::max_rank() - DataTensor::Channelndex(shape_info_layout, c) - 1;
+        return DataTensor::max_rank() * tensor_id + idx;
+    }
+
+    explicit DimensionAccessHelper(const DataTensor& t, size_t dyn_tensor_id = 0, bool padded = false)
+    : x(toCodeString(t.X(),       dynamic_dimension_offset(Tensor::DataChannelName::X, dyn_tensor_id), padded))
+    , y(toCodeString(t.Y(),       dynamic_dimension_offset(Tensor::DataChannelName::Y, dyn_tensor_id), padded))
+    , z(toCodeString(t.Z(),       dynamic_dimension_offset(Tensor::DataChannelName::Z, dyn_tensor_id), padded))
+    , w(toCodeString(t.W(),       dynamic_dimension_offset(Tensor::DataChannelName::W, dyn_tensor_id), padded))
+    , f(toCodeString(t.Feature(), dynamic_dimension_offset(Tensor::DataChannelName::FEATURE, dyn_tensor_id), padded))
+    , b(toCodeString(t.Batch(),   dynamic_dimension_offset(Tensor::DataChannelName::BATCH, dyn_tensor_id), padded)) { }
+
+    std::string x, y, z, w, f, b;
+};
+
 std::vector<size_t> GetImageSizes(const kernel_selector::WeightsTensor& dimensions, const WeightsLayout layout);
 bool CheckImageSize(const weight_bias_params& newParams, const WeightsLayout layout);
 bool UpdateWeightsParams(weight_bias_params& newParams,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
index 2e4ffc212b87c9..5e310440a32dd3 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -22,18 +22,13 @@ size_t getOperationNumber(const arg_max_min_params& params) {
 
 std::string getOperationNumberString(const arg_max_min_params& params) {
     const auto& output = params.outputs[0];
-    auto x = toCodeString(output.X(), 11);
-    auto y = toCodeString(output.Y(), 10);
-    auto z = toCodeString(output.Z(), 9);
-    auto w = toCodeString(output.W(), 8);
-    auto f = toCodeString(output.Feature(), 7);
-    auto b = toCodeString(output.Batch(), 6);
+    DimensionAccessHelper dims(output, 1);
     switch (params.argMaxMinAxis) {
-        case ArgMaxMinAxis::BATCH: return toVectorMulString({x, y, z, f});
-        case ArgMaxMinAxis::FEATURE: return toVectorMulString({x, y, z, b});
-        case ArgMaxMinAxis::Z: return toVectorMulString({y, z, f, b});
-        case ArgMaxMinAxis::Y: return toVectorMulString({x, z, f, b});
-        case ArgMaxMinAxis::X: return toVectorMulString({y, z, f, b});
+        case ArgMaxMinAxis::BATCH: return toVectorMulString({dims.x, dims.y, dims.z, dims.f});
+        case ArgMaxMinAxis::FEATURE: return toVectorMulString({dims.x, dims.y, dims.z, dims.b});
+        case ArgMaxMinAxis::Z: return toVectorMulString({dims.y, dims.z, dims.f, dims.b});
+        case ArgMaxMinAxis::Y: return toVectorMulString({dims.x, dims.z, dims.f, dims.b});
+        case ArgMaxMinAxis::X: return toVectorMulString({dims.y, dims.z, dims.f, dims.b});
         default:
             throw std::invalid_argument("Unsupported axis");
     }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
index 18774d79a09a0c..141346a5a0c721 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
@@ -45,7 +45,7 @@ static std::string GetInputBlockND(const broadcast_params& params) {
         for (int idx = (rank - 1); idx >= 0; idx--) {
             int shape_info_idx = idx;
             if (idx >= 2) {
-                shape_info_idx += (6 - rank);
+                shape_info_idx += (static_cast<int>(DataTensor::max_rank()) - rank);
             }
             block_nd_s[idx] = "(" + toCodeString(input.GetDims()[rank - idx - 1], shape_info_idx) + " * " + block_nd_s[idx + 1] + ")";
         }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp
index c29656b28e79c3..486004b1c23c57 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp
@@ -46,7 +46,7 @@ JitConstants CumSumKernelRef::GetJitConstants(const cum_sum_params& params, Disp
             auto idx = rank - axis_idx - 1;
             int shape_info_idx = idx;
             if (idx >= 2) {
-                shape_info_idx += (6 - rank);
+                shape_info_idx += (static_cast<int>(DataTensor::max_rank()) - rank);
             }
 
             size_t num_of_dynamic_inputs = 0;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
index 54e7c60292b1c6..2681091ef3f819 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
@@ -457,12 +457,12 @@ JitConstants EltwiseKernelBase::MakeIndexJitConstants(const eltwise_params& para
                 jit.AddConstant(MakeJitConstant(out_idx_order, GetIdxOrderStringForLayout(params.outputs[0].GetLayout(),
                                                                                           params.layoutBased || params.broadcast,
                                                                                           {1, 1, 1})));
-            } else if (out_c == 5) {
-                jit.AddConstant(MakeJitConstant(out_idx_order, "d5,d4,d3,d2,d1"));
-            } else if (out_c == 6) {
-                jit.AddConstant(MakeJitConstant(out_idx_order, "d6,d5,d4,d3,d2,d1"));
             } else {
-                assert(0);
+                std::string idx_order;
+                for (size_t i = 0; i < out_c; i++) {
+                    idx_order += "d" + std::to_string(out_c - i) + ((i == (out_c - 1)) ? "" : ",");
+                }
+                jit.AddConstant(MakeJitConstant(out_idx_order, idx_order));
             }
         }
     }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_ref.cpp
index ce8b7728613d49..5df7bb3bf89484 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_ref.cpp
@@ -71,10 +71,12 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
             } else {
                 idx_order = {"d4", "d3", "d2", "d1"};
             }
-        } else if (DataTensor::ChannelsCount(params.outputs[0].GetLayout()) == 5) {
-            idx_order = {"d5", "d4", "d3", "d2", "d1"};
-        } else if (DataTensor::ChannelsCount(params.outputs[0].GetLayout()) == 6) {
-            idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
+        } else {
+            size_t channels = DataTensor::ChannelsCount(params.outputs[0].GetLayout());
+            idx_order.resize(channels);
+            for (size_t i = 0; i < channels; i++) {
+                idx_order[i] = "d" + std::to_string(channels - i);
+            }
         }
 
         if (!params.layoutBased && !params.int8_quantization && !params.broadcast && CheckInputsOutputNoPitchSameDims(params)) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
index 41866e1e5eb5e9..6ab305de21f186 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -15,13 +15,8 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par
     JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
     const auto& input = params.inputs[0];
     if (input.is_dynamic()) {
-        auto x = toCodeString(input.X(), 5);
-        auto y = toCodeString(input.Y(), 4);
-        auto z = toCodeString(input.Z(), 3);
-        auto w = toCodeString(input.W(), 2);
-        auto f = toCodeString(input.Feature(), 1);
-
-        jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", toVectorMulString({x, y, z, w, f})));
+        DimensionAccessHelper dims(input, 0);
+        jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", toVectorMulString({dims.x, dims.y, dims.z, dims.w, dims.f})));
     } else {
         const auto x_size = input.LogicalSize() / input.Batch().v;
         jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
index e5a786f9260ad2..6a75d5394cfe4c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "gemm_kernel_tiled_opt.h"
+#include "kernel_selector_utils.h"
 #include <iostream>
 
 namespace kernel_selector {
@@ -101,9 +102,12 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
 
     jit.Merge(MakeTypeJitConstants(params.inputs[0].GetDType(), "ACCUMULATOR"));
     if (params.has_dynamic_tensors()) {
-        auto m_size = params.transpose_input0 ? toCodeString(params.inputs[0].X(), 5) : toCodeString(params.inputs[0].Y(), 4);
-        auto n_size = params.transpose_input1 ? toCodeString(params.inputs[1].Y(), 10) : toCodeString(params.inputs[1].X(), 11);
-        auto k_size = params.transpose_input0 ? toCodeString(params.inputs[0].Y(), 4) : toCodeString(params.inputs[0].X(), 5);
+        DimensionAccessHelper dims0(params.inputs[0], 0);
+        DimensionAccessHelper dims1(params.inputs[1], params.inputs[0].is_dynamic() ? 1 : 0);
+
+        auto m_size = params.transpose_input0 ? dims0.x : dims0.y;
+        auto n_size = params.transpose_input1 ? dims1.y : dims1.x;
+        auto k_size = params.transpose_input0 ? dims0.y : dims0.x;
         const std::string leftover_m = "(" + m_size + "%" + std::to_string(tuning_data.tile_m_size) + ")";
         const std::string leftover_n = "(" + n_size + "%" + std::to_string(tuning_data.tile_n_size) + ")";
         const std::string leftover_k = "(" + k_size + "%" + std::to_string(tuning_data.tile_k_size) + ")";
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_bfyx_opt.cpp
index 2ae04c8f84a523..324b65614b8802 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_bfyx_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_bfyx_opt.cpp
@@ -79,20 +79,15 @@ JitConstants MVNKernelBfyxOpt::GetJitConstants(const mvn_params& params, MVNKern
 
     if (params.has_dynamic_tensors()) {
         const auto& input = params.inputs[0];
-        auto x = toCodeString(input.X(), 5);
-        auto y = toCodeString(input.Y(), 4);
-        auto z = toCodeString(input.Z(), 3);
-        auto w = toCodeString(input.W(), 2);
-        auto f = toCodeString(input.Feature(), 1);
-        auto b = toCodeString(input.Batch(), 0);
+        DimensionAccessHelper dims(input, 0);
         std::string data_set_size;
         std::string data_set_count;
         if (params.mvnMode == MVNMode::WITHIN_CHANNELS) {
-            data_set_size = toVectorMulString({x, y, z});
-            data_set_count = toVectorMulString({f, b});
+            data_set_size = toVectorMulString({dims.x, dims.y, dims.z});
+            data_set_count = toVectorMulString({dims.f, dims.b});
         } else {
-            data_set_size = toVectorMulString({x, y, z, f});
-            data_set_count = b;
+            data_set_size = toVectorMulString({dims.x, dims.y, dims.z, dims.f});
+            data_set_count = dims.b;
         }
         const std::string lws_0 = "get_local_size(0)";
         jit.AddConstants({
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
index e58427b6d3eca0..cad3fad04c1b96 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
@@ -65,19 +65,11 @@ KernelsData CountNonzeroKernelRef::GetKernelsData(const Params& params, const op
     auto cldnn_jit = MakeBaseParamsJitConstants(newParams);
     if (newParams.has_dynamic_tensors()) {
         const auto& input = newParams.inputs[0];
-        auto x = toCodeString(input.X(), 5);
-        auto y = toCodeString(input.Y(), 4);
-        auto z = toCodeString(input.Z(), 3);
-        auto w = toCodeString(input.W(), 2);
-        auto f = toCodeString(input.Feature(), 1);
-        auto b = toCodeString(input.Batch(), 0);
-        cldnn_jit.AddConstants({
-            MakeJitConstant("DATA_SIZE", toVectorMulString({x, y, z, w, f, b}))
-        });
+        DimensionAccessHelper dims(input, 0);
+        const std::string total_data_size = toVectorMulString({dims.x, dims.y, dims.z, dims.w, dims.f, dims.b});
+        cldnn_jit.AddConstants({MakeJitConstant("DATA_SIZE", total_data_size)});
     } else {
-        cldnn_jit.AddConstants({
-            MakeJitConstant("DATA_SIZE", dispatchData.dataSize)
-        });
+        cldnn_jit.AddConstants({MakeJitConstant("DATA_SIZE", dispatchData.dataSize)});
     }
     auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
index 2bc3e2098191af..a747f7c60271cc 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
@@ -43,14 +43,8 @@ JitConstants GatherNonzeroKernelRef::GetJitConstants(const gather_nonzero_params
     jit.AddConstant(MakeJitConstant("OV_INPUT_RANK", params.ov_input_rank));
     auto max_local_mem_size = params.engineInfo.maxLocalMemSize / (params.outputs[0].ElementSize());
     if (input.is_dynamic()) {
-        auto x = toCodeString(input.X(), 5);
-        auto y = toCodeString(input.Y(), 4);
-        auto z = toCodeString(input.Z(), 3);
-        auto w = toCodeString(input.W(), 2);
-        auto f = toCodeString(input.Feature(), 1);
-        auto b = toCodeString(input.Batch(), 0);
-
-        const std::string total_data_size = toVectorMulString({x, y, z, w, f, b});
+        DimensionAccessHelper dims(input, 0);
+        const std::string total_data_size = toVectorMulString({dims.x, dims.y, dims.z, dims.w, dims.f, dims.b});
         jit.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", total_data_size));
         jit.AddConstant(MakeJitConstant("MAX_LOCAL_MEM_SIZE", max_local_mem_size));
     } else {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_ref.cpp
index 4d5b5657ebb8ad..213854285a52e8 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_ref.cpp
@@ -211,13 +211,8 @@ JitConstants PermuteKernelRef::GetJitConstants(const permute_params& params, con
     }
 
     if (!params.fused_ops.empty()) {
-        if (permute_out_idx.size() == 4) {
-            std::swap(permute_out_idx[2], permute_out_idx[3]);
-        } else if (permute_out_idx.size() == 5) {
-            std::swap(permute_out_idx[2], permute_out_idx[4]);
-        } else if (permute_out_idx.size() == 6) {
-            std::swap(permute_out_idx[2], permute_out_idx[5]);
-            std::swap(permute_out_idx[3], permute_out_idx[4]);
+        for (size_t i = 0; i < (permute_out_idx.size() - 2) / 2; i++) {
+            std::swap(permute_out_idx[2 + i], permute_out_idx[permute_out_idx.size() - 1 - i]);
         }
 
         if (reorder_to_different_dim) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
index 0a82464079f086..aa1656230c444c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
@@ -30,15 +30,9 @@ JitConstants ReduceKernelBase::GetJitConstants(const reduce_params& params) cons
 
     const auto& output = params.outputs[0];
     if (output.is_dynamic()) {
-        size_t output_offset = (1 + GetFusedPrimitiveInputsCount(params)) * 6;
-        auto x = toCodeString(output.X(), output_offset + 5);
-        auto y = toCodeString(output.Y(), output_offset + 4);
-        auto z = toCodeString(output.Z(), output_offset + 3);
-        auto w = toCodeString(output.W(), output_offset + 2);
-        auto f = toCodeString(output.Feature(), output_offset + 1);
-        auto b = toCodeString(output.Batch(), output_offset);
-
-        jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", toVectorMulString({x, y, z, w, f, b})));
+        size_t output_tensor_offset = 1 + GetFusedPrimitiveInputsCount(params);
+        DimensionAccessHelper dims(output, output_tensor_offset);
+        jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", toVectorMulString({dims.x, dims.y, dims.z, dims.w, dims.f, dims.b})));
     } else {
         jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", params.outputs[0].LogicalSize()));
     }
@@ -54,25 +48,11 @@ JitConstants ReduceKernelBase::GetJitConstants(const reduce_params& params) cons
         auto sz = inputDims.size();
 
         for (size_t i = 0; i < params.reduceAxes.size(); ++i) {
-            switch (params.reduceAxes[i]) {
-                case 0:
-                    res.push_back(0);
-                    break;
-                case 1:
-                    res.push_back(1);
-                    break;
-                case 2:
-                    res.push_back(sz == 6 ? 5 : sz == 5 ? 4 : 3);
-                    break;
-                case 3:
-                    res.push_back(sz == 6 ? 4 : sz == 5 ? 3 : 2);
-                    break;
-                case 4:
-                    res.push_back(sz == 6 ? 3 : 2);
-                    break;
-                case 5:
-                    res.push_back(2);
-                    break;
+            auto axis = params.reduceAxes[i];
+            if (axis < 2) {
+                res.push_back(axis);
+            } else {
+                res.push_back(static_cast<int32_t>(sz + 1 - axis));
             }
         }
         return res;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
index 7bc2a31e086ccc..71d1bc061027a0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
@@ -130,12 +130,12 @@ static std::string GetInputBlockND(const scatter_nd_update_params& params, size_
 
     std::vector<std::string> block_nd_s(rank + 1);
     block_nd_s[rank] = "1";
-    size_t input_offset = dyn_offset * 6;
+    size_t input_offset = dyn_offset * DataTensor::max_rank();
 
     for (int32_t idx = static_cast<int32_t>(rank) - 1; idx >= 0; --idx) {
         block_nd[idx] = input_dims[idx] * block_nd[idx + 1];
 
-        size_t dim_offset = idx < 2 ? idx : (6 - dims.size()) + idx; // convert to 6d bfwzyx idx
+        size_t dim_offset = idx < 2 ? idx : (DataTensor::max_rank() - dims.size()) + idx; // convert to idx in default planar format
         block_nd_s[idx] = "(" + toCodeString(dims[idx], input_offset + dim_offset) + "*" + block_nd_s[idx + 1] + ")";
     }
 
@@ -198,8 +198,8 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
                 std::reverse(dims.begin(), dims.end());
 
                 size_t last_idx = newParams.indices_rank - 1;
-                size_t dim_offset = last_idx < 2 ? last_idx : last_idx + 6 - newParams.indices_rank;
-                auto indices_last_dim = toCodeString(dims[last_idx], dim_offset + (newParams.inputs[0].is_dynamic() ? 6 : 0));
+                size_t dim_offset = last_idx < 2 ? last_idx : last_idx + DataTensor::max_rank() - newParams.indices_rank;
+                auto indices_last_dim = toCodeString(dims[last_idx], dim_offset + (newParams.inputs[0].is_dynamic() ? DataTensor::max_rank() : 0));
                 cldnn_jit.AddConstant(MakeJitConstant("INDICES_LAST_DIM", indices_last_dim));
             } else {
                 cldnn_jit.AddConstant(MakeJitConstant("INDICES_LAST_DIM", dispatchData.indicesLastDim));
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
index b6dbf0a34883c3..17fc2347bf7bca 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
@@ -194,7 +194,7 @@ static std::vector<std::string> GetPlanarPitches(const Tensor::NDims& dims) {
 
 static std::vector<std::string> GetDynamicPitches(const Tensor::NDims& dims, size_t tensor_idx) {
     std::vector<std::string> pitches(dims.size());
-    size_t shape_info_rank = 6;
+    size_t shape_info_rank = DataTensor::max_rank();
     std::string pitch = "1";
     for (size_t i = 0; i < pitches.size(); ++i) {
         size_t bf_idx = dims.size() - i - 1;
@@ -227,7 +227,7 @@ JitConstants ScatterUpdateKernelRef::GetJitConstants(const scatter_update_params
     const auto& input1 = params.inputs[1];
     if (input1.is_dynamic()) {
         const auto& input1_dims = input1.GetDims();
-        size_t input_offset = 6;
+        size_t input_offset = DataTensor::max_rank();
         std::string indices_size = "(";
 
         for (size_t i = 0; i < input1_dims.size(); ++i) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
index f285f7a3cb0f7c..fceaf65969a3e0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "softmax_kernel_bf.h"
+#include "kernel_selector_utils.h"
 #include <algorithm>
 
 namespace kernel_selector {
@@ -96,17 +97,12 @@ JitConstants SoftmaxKernel_bf::GetJitConstants(const softmax_params& params, Dis
 
     if (params.has_dynamic_tensors()) {
         const auto& input = params.inputs[0];
-        auto x = toCodeString(input.X(), 5);
-        auto y = toCodeString(input.Y(), 4);
-        auto z = toCodeString(input.Z(), 3);
-        auto w = toCodeString(input.W(), 2);
-        auto f = toCodeString(input.Feature(), 1);
-        auto b = toCodeString(input.Batch(), 0);
+        DimensionAccessHelper dims(input, 0);
         auto softmax_dim_y_bfyx = (params.dim == SoftmaxDim::Y && input.GetLayout() == DataLayout::bfyx);
-        const std::string flatten_bf = "(SOFTMAX_DIM_Y_BFYX&&(" + f + ">1))";
+        const std::string flatten_bf = "(SOFTMAX_DIM_Y_BFYX&&(" + dims.f + ">1))";
         const std::string lws_0 = "get_local_size(0)";
-        const std::string data_set_count = "(FLATTEN_BF?" + toVectorMulString({f, b}) + ":" + b + ")";
-        const std::string data_set_size = "(FLATTEN_BF?" + y + ":" + toVectorMulString({x, y, z, f}) + ")";
+        const std::string data_set_count = "(FLATTEN_BF?" + toVectorMulString({dims.f, dims.b}) + ":" + dims.b + ")";
+        const std::string data_set_size = "(FLATTEN_BF?" + dims.y + ":" + toVectorMulString({dims.x, dims.y, dims.z, dims.f}) + ")";
         // It can be expected that the maximum possible itemsNum will not exceed 32
         // Therefore, in dynamic shape, stack_size including additional buffer is set to 33
         constexpr size_t stack_size = 33; // The size of stack for my_chunk
diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h
index 430c617a2241e6..47d6f5c8622bac 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h
@@ -629,6 +629,8 @@ struct DataTensor : public TensorBaseT<Datatype, DataLayout> {
 
     static inline uint32_t ChannelsCount(DataLayout l) { return TensorBaseT::ChannelsCount(dataChannelArray, l); }
 
+    static size_t max_rank() { return static_cast<size_t>(DataChannelName::COUNT); }
+
 private:
     using DataChannelDesc = std::pair<DataLayout, std::array<int, static_cast<size_t>(DataChannelName::COUNT)>>;
     using DataChannelArray = std::array<DataChannelDesc, DataLayout::DataLayoutCount>;
diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
index 5f5fc4ad174c1b..caae18cd6809c5 100644
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@@ -544,13 +544,10 @@ void InferRequest::wait() {
             }
             auto layout_by_rank = [](size_t rank) {
                 switch (rank) {
-                    case 6: return InferenceEngine::Layout::BLOCKED;
                     case 5: return InferenceEngine::Layout::NCDHW;
                     case 4: return InferenceEngine::Layout::NCHW;
-                    case 3: return InferenceEngine::Layout::BLOCKED;
                     case 2: return InferenceEngine::Layout::NC;
-                    case 1: return InferenceEngine::Layout::BLOCKED;
-                    default: IE_THROW() << "[GPU] Unsupported out rank";
+                    default: return InferenceEngine::Layout::BLOCKED;
                 }
             };
             auto layout = layout_by_rank(out_rank);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
index e8444f6b157838..10d6f47f99a255 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
@@ -32,12 +32,11 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
     InferenceEngine::Layout l = inputDesc.getLayout();
     InferenceEngine::Precision ip = inputDesc.getPrecision();
 
-    cldnn::format inputFormat = cldnn::format::bfyx;
-    if (input_pshape.is_dynamic()) {
-        inputFormat = cldnn::format::get_default_format(input_pshape.size());
-    } else if (InferenceEngine::Layout::BLOCKED == l && 6 == input_pshape.size()) {
-        inputFormat = cldnn::format::bfwzyx;
-    } else {
+    cldnn::format inputFormat = cldnn::format::get_default_format(input_pshape.size());
+    std::vector<size_t> default_order(input_pshape.size());
+    std::iota(default_order.begin(), default_order.end(), 0);
+    // For legacy API we need to handle NHWC as well, so check non default order
+    if (inputDesc.getBlockingDesc().getOrder() != default_order) {
         inputFormat = FormatFromLayout(l);
     }
 
diff --git a/src/plugins/intel_gpu/src/plugin/ops/result.cpp b/src/plugins/intel_gpu/src/plugin/ops/result.cpp
index 90b33b662cbae7..d533d1fa411feb 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/result.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/result.cpp
@@ -58,6 +58,14 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
         outputlayout != SCALAR) {
         IE_THROW() << "Unsupported layout (" << outputlayout << ") in output: " << originalOutName;
     }
+    auto out_rank = op->get_output_partial_shape(0).size();
+    auto out_format = cldnn::format::get_default_format(out_rank);
+    std::vector<size_t> default_order(out_rank);
+    std::iota(default_order.begin(), default_order.end(), 0);
+    // For legacy API we need to handle NHWC as well, so check non default order
+    if (outputlayout == NHWC) {
+        out_format = FormatFromLayout(outputlayout);
+    }
 
     auto outLayerName = layer_type_name_ID(op);
     Precision precision = outputData->getPrecision();
@@ -69,14 +77,14 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
         && !ngraph::is_type<ngraph::op::v1::VariadicSplit>(prev)) {
         auto reorder_primitive = cldnn::reorder(outLayerName,
                                                 outputID,
-                                                FormatFromLayout(outputlayout),
+                                                out_format,
                                                 DataTypeFromPrecision(precision));
         p.add_primitive(*op, reorder_primitive, {originalOutName});
 
     } else {
         auto reorder_primitive = cldnn::reorder(outLayerName,
                                                 outputID,
-                                                FormatFromLayout(outputlayout),
+                                                out_format,
                                                 DataTypeFromPrecision(precision));
         p.add_primitive(*op, reorder_primitive, {originalOutName});
     }
diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp
index 0a16ec05dc211f..449c23b7d13671 100644
--- a/src/plugins/intel_gpu/src/runtime/format.cpp
+++ b/src/plugins/intel_gpu/src/runtime/format.cpp
@@ -249,6 +249,10 @@ format format::get_default_format(size_t rank, bool is_weights, bool is_grouped)
     return default_fmt;
 }
 
+bool format::is_default_format(type fmt) {
+    return fmt == get_default_format(dimension(fmt));
+}
+
 format format::adjust_to_rank(format fmt, size_t new_rank) {
     // TODO: remove as soon as rank extension is not needed anymore
     new_rank = std::max<size_t>(new_rank, 4);
diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp
index 3c46b8a3ac76e0..29076654686453 100644
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@@ -35,9 +35,8 @@ std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2)
         return {false, false};
     // Reorders between bfyx, bfzyx, bfwzyx can pe reinterpeted as reshape when
     // there is no padding and both hold same number of elements.
-    if ((l1.format == format::bfyx || l1.format == format::bfzyx || l1.format == format::bfwzyx) &&
-        (l2.format == format::bfyx || l2.format == format::bfzyx || l2.format == format::bfwzyx) && !l1_pad &&
-        !l2_pad && l1.get_linear_size() == l2.get_linear_size())
+    if (format::is_default_format(l1.format) && format::is_default_format(l2.format) &&
+        !l1_pad && !l2_pad && l1.get_linear_size() == l2.get_linear_size())
         return {false, true};
     if (l1_size != l2_size)
         return {false, false};

From 623786843736eb78b7d66b38ced4e38b8441ade8 Mon Sep 17 00:00:00 2001
From: Przemyslaw Wysocki <przemyslaw.wysocki@intel.com>
Date: Mon, 3 Apr 2023 13:49:27 +0200
Subject: [PATCH 224/296] Dependabot-ignore line (#16679)

---
 tools/openvino_dev/requirements_dev.txt.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/openvino_dev/requirements_dev.txt.in b/tools/openvino_dev/requirements_dev.txt.in
index 82d31489ca7bba..03e2105dbb92b3 100644
--- a/tools/openvino_dev/requirements_dev.txt.in
+++ b/tools/openvino_dev/requirements_dev.txt.in
@@ -1 +1 @@
-openvino-dev${EXTRAS}==${WHEEL_VERSION}
+openvino-dev${EXTRAS}==${WHEEL_VERSION}  # dependabot-ignore

From 03ab0e4388118d0210a1ed7118ef1020eb2b5c19 Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Mon, 3 Apr 2023 14:38:44 +0200
Subject: [PATCH 225/296] Add ConvolutionToGroupConvolutionFusion (#16688)

Fuses Split->series of Conv->Concat to GroupConvolution op.

Ticket: 105170
---
 ...onvolution_to_group_convolution_fusion.hpp |  32 +++
 .../convolution_to_group_convolution.cpp      | 164 ++++++++++++
 .../moc_transformations.cpp                   |   2 +
 ...onvolution_to_group_convolution_fusion.cpp | 235 ++++++++++++++++++
 4 files changed, 433 insertions(+)
 create mode 100644 src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp
 create mode 100644 src/common/transformations/src/transformations/common_optimizations/convolution_to_group_convolution.cpp
 create mode 100644 src/common/transformations/tests/common_optimizations/convolution_to_group_convolution_fusion.cpp

diff --git a/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp
new file mode 100644
index 00000000000000..aa5409acad635a
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief ConvolutionToGroupConvolutionFusion transformation replaces following graph:
+ *                    Split (or VariadicSplit)
+ *                  /       \
+ *                Conv ... Conv
+ *                  \       /
+ *                   \     /
+ *                    Concat
+ *
+ * to GroupConvolution
+ */
+class TRANSFORMATIONS_API ConvolutionToGroupConvolutionFusion : public MatcherPass {
+public:
+    OPENVINO_RTTI("ConvolutionToGroupConvolutionFusion", "0");
+    ConvolutionToGroupConvolutionFusion();
+};
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/src/transformations/common_optimizations/convolution_to_group_convolution.cpp b/src/common/transformations/src/transformations/common_optimizations/convolution_to_group_convolution.cpp
new file mode 100644
index 00000000000000..835d62cc56f42c
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/convolution_to_group_convolution.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/core/rt_info.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+
+#include "itt.hpp"
+#include "transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp"
+
+static bool compare_convolutions(const ov::opset10::Convolution* conv1, ov::Node* node) {
+    const auto conv2 = ov::as_type<ov::opset10::Convolution>(node);
+    if (!conv2)
+        return false;
+    return conv1->get_strides() == conv2->get_strides() && conv1->get_pads_begin() == conv2->get_pads_begin() &&
+           conv1->get_pads_end() == conv2->get_pads_end() && conv1->get_dilations() == conv2->get_dilations() &&
+           conv1->get_auto_pad() == conv2->get_auto_pad();
+}
+
+static int64_t get_split_axis(const std::shared_ptr<ov::Node>& split) {
+    const auto axis = ov::as_type<ov::opset10::Constant>(split->get_input_node_ptr(1));
+    if (!axis)
+        return -1;
+    auto axis_value = axis->cast_vector<int64_t>()[0];
+    if (axis_value < 0) {
+        const auto& input_rank = split->get_input_partial_shape(0).rank();
+        if (input_rank.is_dynamic())
+            return -1;
+        axis_value += input_rank.get_length();
+    }
+
+    return axis_value;
+}
+
+static std::shared_ptr<ov::opset10::Concat> create_new_weights(ov::pass::NodeRegistry& node_registry,
+                                                               const std::shared_ptr<ov::Node>& concat) {
+    const auto concat_input = concat->get_input_node_ptr(0);
+    if (concat_input->get_input_partial_shape(1).is_dynamic())
+        return nullptr;
+
+    // unsqueeze weights shape from (O, I, X, Y) to (1, O, I, X, Y)
+    const auto& weights_shape = concat_input->get_input_shape(1);
+    ov::Shape new_shape = weights_shape;
+    new_shape.insert(new_shape.begin(), 1);
+
+    const size_t num_inputs = concat->get_input_size();
+    ov::OutputVector weights_to_concat;
+    weights_to_concat.reserve(num_inputs);
+
+    for (size_t i = 0; i < num_inputs; i++) {
+        const auto conv = concat->get_input_node_shared_ptr(i);
+        const auto weights = conv->get_input_node_shared_ptr(1);
+        const auto& shape = weights->get_output_partial_shape(0);
+        if (shape.is_dynamic() || weights->get_output_shape(0) != weights_shape)
+            return nullptr;
+        if (auto constant = ov::as_type_ptr<ov::opset10::Constant>(weights)) {
+            weights_to_concat.push_back(node_registry.make<ov::opset10::Constant>(*constant, new_shape));
+        } else {
+            weights_to_concat.push_back(node_registry.make<ov::opset10::Unsqueeze>(
+                weights,
+                ov::opset10::Constant::create(ov::element::i32, ov::Shape{}, {0})));
+        }
+        weights_to_concat.back().get_node()->set_friendly_name(weights->get_friendly_name());
+    }
+
+    return node_registry.make<ov::opset10::Concat>(weights_to_concat, 0);
+}
+
+ov::pass::ConvolutionToGroupConvolutionFusion::ConvolutionToGroupConvolutionFusion() {
+    MATCHER_SCOPE(ConvolutionToGroupConvolutionFusion);
+
+    auto has_conv_inputs = [](const Output<Node>& node) -> bool {
+        const auto concat = node.get_node();
+        size_t num_inputs = concat->get_input_size();
+        if (num_inputs == 0)
+            return false;
+
+        const auto first_conv = as_type<opset10::Convolution>(concat->get_input_node_ptr(0));
+        if (!first_conv)
+            return false;
+
+        const auto split = first_conv->get_input_node_ptr(0);
+        if (!is_type<opset10::Split>(split) && !is_type<opset10::VariadicSplit>(split))
+            return false;
+
+        // go through Concat inputs and check
+        // - if all of them are Convolutions
+        // - if those Convolutions have the same Split input
+        for (size_t i = 1; i < concat->get_input_size(); i++) {
+            const auto conv = concat->get_input_node_ptr(i);
+            if (conv->get_input_node_ptr(0) != split)
+                return false;
+            if (!compare_convolutions(first_conv, conv))
+                return false;
+        }
+        return true;
+    };
+    auto concat_label = pattern::wrap_type<opset10::Concat>(has_conv_inputs);
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto& pattern_value_map = m.get_pattern_value_map();
+        const auto& concat = pattern_value_map.at(concat_label).get_node_shared_ptr();
+
+        const auto first_conv = as_type_ptr<opset10::Convolution>(concat->get_input_node_shared_ptr(0));
+        const auto split = first_conv->get_input_node_shared_ptr(0);
+        const bool is_split = is_type<opset10::Split>(split);
+        const bool is_variadic_split = is_type<opset10::VariadicSplit>(split);
+        if (!is_split && !is_variadic_split)
+            return false;
+
+        if (get_split_axis(split) != 1)
+            return false;
+
+        if (is_variadic_split) {
+            // split_lengths in VariadicSplit must have the same values
+            if (auto split_lengths = as_type<opset10::Constant>(split->get_input_node_ptr(1))) {
+                const auto split_lengths_values = split_lengths->cast_vector<int>();
+                const auto first_length = split_lengths_values[0];
+                if (!std::all_of(split_lengths_values.begin() + 1,
+                                 split_lengths_values.end(),
+                                 [first_length](int split_length) {
+                                     return split_length == first_length;
+                                 }))
+                    return false;
+            } else {
+                return false;
+            }
+        }
+
+        NodeRegistry node_registry;
+        const auto weights = create_new_weights(node_registry, concat);
+        if (!weights)
+            return false;
+
+        const auto conv = node_registry.make<opset10::GroupConvolution>(split->get_input_node_shared_ptr(0),
+                                                                        weights,
+                                                                        first_conv->get_strides(),
+                                                                        first_conv->get_pads_begin(),
+                                                                        first_conv->get_pads_end(),
+                                                                        first_conv->get_dilations(),
+                                                                        first_conv->get_auto_pad());
+        conv->set_friendly_name(concat->get_friendly_name());
+        register_new_node(conv);
+
+        const size_t concat_num_inputs = concat->get_input_size();
+        NodeVector from;
+        from.reserve(concat_num_inputs + 2);
+        from.push_back(split);
+        from.push_back(first_conv);
+        for (size_t i = 1; i < concat_num_inputs; i++) {
+            from.push_back(concat->get_input_node_shared_ptr(i));
+        }
+        from.push_back(concat);
+
+        copy_runtime_info(from, node_registry.get());
+        replace_node(concat, conv);
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(concat_label, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index 21c3e3a400f836..20f2ede4454786 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -15,6 +15,7 @@
 #include <transformations/common_optimizations/conv_to_binary_conv.hpp>
 #include <transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp>
 #include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
+#include <transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp>
 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
 #include <transformations/common_optimizations/dilated_convolution_converter.hpp>
 #include <transformations/common_optimizations/disable_random_uniform_constant_folding.hpp>
@@ -189,6 +190,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph::Fu
     ADD_MATCHER(common_fusions, RandomUniformFusion)
     ADD_MATCHER(common_fusions, ConvertTensorIteratorToSequence)
     ADD_MATCHER(common_fusions, SplitConcatPairToInterpolateFusion, m_use_shapes)
+    ADD_MATCHER(common_fusions, ConvolutionToGroupConvolutionFusion)
     if (m_use_shapes) {
         ADD_MATCHER(common_fusions, NearestNeighborUpsamplingFusion)
     }
diff --git a/src/common/transformations/tests/common_optimizations/convolution_to_group_convolution_fusion.cpp b/src/common/transformations/tests/common_optimizations/convolution_to_group_convolution_fusion.cpp
new file mode 100644
index 00000000000000..78acf1e27d5b21
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/convolution_to_group_convolution_fusion.cpp
@@ -0,0 +1,235 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ov;
+
+TEST_F(TransformationTestsF, ConvToGroupConvFusionSplit) {
+    Shape input_shape{2, 10, 14, 14};
+    size_t num_splits = 5;
+    int axis = 1;
+    Shape weights_shape{3, input_shape[1] / num_splits, 1, 1};
+    const auto spatial_dim_size = weights_shape.size() - 2;
+    Strides strides(spatial_dim_size, 1);
+    CoordinateDiff pads_begin(spatial_dim_size, 0);
+    CoordinateDiff pads_end(spatial_dim_size, 0);
+    Strides dilations(spatial_dim_size, 1);
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        const auto axis_node = opset10::Constant::create(element::i32, Shape{}, {axis});
+        const auto split = std::make_shared<opset10::Split>(data, axis_node, num_splits);
+        OutputVector concat_inputs;
+        concat_inputs.reserve(num_splits);
+        for (size_t i = 0; i < num_splits; i++) {
+            const auto weights = opset10::Constant::create(element::f32, weights_shape, {i + 1});
+            concat_inputs.push_back(std::make_shared<opset10::Convolution>(split->output(i),
+                                                                           weights,
+                                                                           strides,
+                                                                           pads_begin,
+                                                                           pads_end,
+                                                                           dilations));
+        }
+        const auto concat = std::make_shared<opset10::Concat>(concat_inputs, axis);
+        function = std::make_shared<Model>(concat, ParameterVector{data});
+        manager.register_pass<ov::pass::ConvolutionToGroupConvolutionFusion>();
+    }
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        OutputVector concat_inputs;
+        concat_inputs.reserve(num_splits);
+        Shape new_weights_shape = weights_shape;
+        new_weights_shape.insert(new_weights_shape.begin(), 1);
+        for (size_t i = 0; i < num_splits; i++) {
+            const auto weights = opset10::Constant::create(element::f32, new_weights_shape, {i + 1});
+            concat_inputs.push_back(weights);
+        }
+        const auto concat = std::make_shared<opset10::Concat>(concat_inputs, 0);
+        const auto conv =
+            std::make_shared<opset10::GroupConvolution>(data, concat, strides, pads_begin, pads_end, dilations);
+        function_ref = std::make_shared<Model>(conv, ParameterVector{data});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}
+
+TEST_F(TransformationTestsF, ConvToGroupConvFusionVariadicSplit) {
+    Shape input_shape{2, 10, 14, 14};
+    size_t num_splits = 5;
+    int axis = 1;
+    Shape weights_shape{3, input_shape[1] / num_splits, 1, 1};
+    const auto spatial_dim_size = weights_shape.size() - 2;
+    Strides strides(spatial_dim_size, 1);
+    CoordinateDiff pads_begin(spatial_dim_size, 0);
+    CoordinateDiff pads_end(spatial_dim_size, 0);
+    Strides dilations(spatial_dim_size, 1);
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        const auto axis_node = opset10::Constant::create(element::i32, Shape{}, {axis});
+        const auto split_lengths =
+            opset10::Constant::create(element::i32, Shape{num_splits}, std::vector<int>(num_splits, 2));
+        const auto split = std::make_shared<opset10::VariadicSplit>(data, axis_node, split_lengths);
+        OutputVector concat_inputs;
+        concat_inputs.reserve(num_splits);
+        for (size_t i = 0; i < num_splits; i++) {
+            const auto weights = opset10::Constant::create(element::f32, weights_shape, {i + 1});
+            concat_inputs.push_back(std::make_shared<opset10::Convolution>(split->output(i),
+                                                                           weights,
+                                                                           strides,
+                                                                           pads_begin,
+                                                                           pads_end,
+                                                                           dilations));
+        }
+        const auto concat = std::make_shared<opset10::Concat>(concat_inputs, axis);
+        function = std::make_shared<Model>(concat, ParameterVector{data});
+        manager.register_pass<ov::pass::ConvolutionToGroupConvolutionFusion>();
+    }
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        OutputVector concat_inputs;
+        concat_inputs.reserve(num_splits);
+        Shape new_weights_shape = weights_shape;
+        new_weights_shape.insert(new_weights_shape.begin(), 1);
+        for (size_t i = 0; i < num_splits; i++) {
+            const auto weights = opset10::Constant::create(element::f32, new_weights_shape, {i + 1});
+            concat_inputs.push_back(weights);
+        }
+        const auto concat = std::make_shared<opset10::Concat>(concat_inputs, 0);
+        const auto conv =
+            std::make_shared<opset10::GroupConvolution>(data, concat, strides, pads_begin, pads_end, dilations);
+        function_ref = std::make_shared<Model>(conv, ParameterVector{data});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}
+
+TEST_F(TransformationTestsF, NegativeConvToGroupConvFusionSplitInvalidAxis) {
+    Shape input_shape{2, 10, 14, 14};
+    int num_splits = 2;
+    int axis = 2;
+    Shape weights_shape{3, input_shape[1], 1, 1};
+    const auto spatial_dim_size = weights_shape.size() - 2;
+    Strides strides(spatial_dim_size, 1);
+    CoordinateDiff pads_begin(spatial_dim_size, 0);
+    CoordinateDiff pads_end(spatial_dim_size, 0);
+    Strides dilations(spatial_dim_size, 1);
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        const auto axis_node = opset10::Constant::create(element::i32, Shape{}, {axis});
+        const auto split = std::make_shared<opset10::Split>(data, axis_node, num_splits);
+        OutputVector concat_inputs;
+        concat_inputs.reserve(num_splits);
+        for (int i = 0; i < num_splits; i++) {
+            const auto weights = opset10::Constant::create(element::f32, weights_shape, {i + 1});
+            concat_inputs.push_back(std::make_shared<opset10::Convolution>(split->output(i),
+                                                                           weights,
+                                                                           strides,
+                                                                           pads_begin,
+                                                                           pads_end,
+                                                                           dilations));
+        }
+        const auto concat = std::make_shared<opset10::Concat>(concat_inputs, axis);
+        function = std::make_shared<Model>(concat, ParameterVector{data});
+        manager.register_pass<ov::pass::ConvolutionToGroupConvolutionFusion>();
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}
+
+TEST_F(TransformationTestsF, NegativeConvToGroupConvFusionSplitNotMatchingConvAttributes) {
+    Shape input_shape{2, 10, 14, 14};
+    size_t num_splits = 2;
+    int axis = 1;
+    const auto spatial_dim_size = 2;
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        const auto axis_node = opset10::Constant::create(element::i32, Shape{}, {axis});
+        const auto split = std::make_shared<opset10::Split>(data, axis_node, num_splits);
+
+        const auto weights1 = opset10::Constant::create(element::f32, Shape{3, input_shape[1] / num_splits, 2, 2}, {1});
+        Strides strides1(spatial_dim_size, 1);
+        CoordinateDiff pads_begin1(spatial_dim_size, 1);
+        CoordinateDiff pads_end1(spatial_dim_size, 1);
+        Strides dilations1(spatial_dim_size, 1);
+        const auto conv1 = std::make_shared<opset10::Convolution>(split->output(0),
+                                                                  weights1,
+                                                                  strides1,
+                                                                  pads_begin1,
+                                                                  pads_end1,
+                                                                  dilations1);
+
+        const auto weights2 = opset10::Constant::create(element::f32, Shape{3, input_shape[1] / num_splits, 4, 4}, {1});
+        Strides strides2(spatial_dim_size, 1);
+        CoordinateDiff pads_begin2(spatial_dim_size, 2);
+        CoordinateDiff pads_end2(spatial_dim_size, 2);
+        Strides dilations2(spatial_dim_size, 1);
+        const auto conv2 = std::make_shared<opset10::Convolution>(split->output(1),
+                                                                  weights2,
+                                                                  strides2,
+                                                                  pads_begin2,
+                                                                  pads_end2,
+                                                                  dilations2);
+        const auto concat = std::make_shared<opset10::Concat>(OutputVector{conv1, conv2}, axis);
+        function = std::make_shared<Model>(concat, ParameterVector{data});
+        manager.register_pass<ov::pass::ConvolutionToGroupConvolutionFusion>();
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}
+
+TEST_F(TransformationTestsF, NegativeConvToGroupConvFusionVariadicSplitUnevenSplitLengths) {
+    Shape input_shape{2, 10, 14, 14};
+    int axis = 1;
+    const auto spatial_dim_size = 2;
+    Strides strides(spatial_dim_size, 1);
+    CoordinateDiff pads_begin(spatial_dim_size, 0);
+    CoordinateDiff pads_end(spatial_dim_size, 0);
+    Strides dilations(spatial_dim_size, 1);
+
+    {
+        const auto data = std::make_shared<opset10::Parameter>(element::f32, input_shape);
+        const auto axis_node = opset10::Constant::create(element::i32, Shape{}, {axis});
+        const auto split_lengths = opset10::Constant::create(element::i32,
+                                                             Shape{2},
+                                                             std::vector<int>{3, static_cast<int>(input_shape[1]) - 3});
+        const auto split = std::make_shared<opset10::VariadicSplit>(data, axis_node, split_lengths);
+        const auto weights1 = opset10::Constant::create(element::f32, Shape{3, 3, 1, 1}, {1});
+        const auto conv1 = std::make_shared<opset10::Convolution>(split->output(0),
+                                                                  weights1,
+                                                                  strides,
+                                                                  pads_begin,
+                                                                  pads_end,
+                                                                  dilations);
+        const auto weights2 = opset10::Constant::create(element::f32, Shape{3, 7, 1, 1}, {2});
+        const auto conv2 = std::make_shared<opset10::Convolution>(split->output(1),
+                                                                  weights2,
+                                                                  strides,
+                                                                  pads_begin,
+                                                                  pads_end,
+                                                                  dilations);
+        const auto concat = std::make_shared<opset10::Concat>(OutputVector{conv1, conv2}, axis);
+        function = std::make_shared<Model>(concat, ParameterVector{data});
+        manager.register_pass<ov::pass::ConvolutionToGroupConvolutionFusion>();
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
+}

From dec425c4085f7b95ed564d591295a40ebb55476c Mon Sep 17 00:00:00 2001
From: River Li <river.li@intel.com>
Date: Mon, 3 Apr 2023 20:54:03 +0800
Subject: [PATCH 226/296] [C API] remove UNDEFINED property value (#16709)

---
 src/bindings/c/include/openvino/c/ov_property.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 192665fbb018ae..cf15cf272d843e 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -200,7 +200,6 @@ ov_property_key_device_priorities;
  * and turns into device-specific settings
  * Execution mode hint controls preferred optimization targets (performance or accuracy) for given model
  * It can be set to be below value:
- *   "UNDEFINED"     //!<  Undefined value, settings may vary from device to device
  *   "PERFORMANCE",  //!<  Optimize for max performance
  *   "ACCURACY",     //!<  Optimize for max accuracy
  * @ingroup ov_property_c_api

From 0a569276715a84beef8bf477abff1b3157265089 Mon Sep 17 00:00:00 2001
From: Marcin Kusmierski <marcin.kusmierski@intel.com>
Date: Mon, 3 Apr 2023 17:22:56 +0200
Subject: [PATCH 227/296] [GNA]Fix crash in gna_pluing when using POT (#16003)

---
 .../src/frontend/model_quantizer.hpp          | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
index 253f88780f9242..f4d1ef25fc9f31 100644
--- a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
+++ b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
@@ -148,13 +148,28 @@ class ModelQuantizer {
                 inf_loop_count++;
             } else {
                 if (inf_loop_count > 0 &&
-                    (inf_loop_history.size() % inf_loop_pattern.size() == 0 || sf.allLayersProcessed()) &&
-                    !std::equal(inf_loop_history.begin() + (inf_loop_history.size() - inf_loop_pattern.size()),
-                                inf_loop_history.end(),
-                                inf_loop_pattern.begin())) {
-                    inf_loop_count = 0;
-                    inf_loop_pattern.clear();
-                    log::debug() << "infinite loop fixed\n";
+                    (inf_loop_pattern.size() > 0 && inf_loop_history.size() % inf_loop_pattern.size() == 0 ||
+                     sf.allLayersProcessed())) {
+                    int32_t history_shift = 0;
+                    int32_t pattern_shift = 0;
+
+                    if (inf_loop_history.size() > inf_loop_pattern.size()) {
+                        history_shift = inf_loop_history.size() - inf_loop_pattern.size();
+                    } else {
+                        pattern_shift = inf_loop_pattern.size() - inf_loop_history.size();
+                    }
+
+                    if (!std::equal(inf_loop_history.begin() + history_shift,
+                                    inf_loop_history.end(),
+                                    inf_loop_pattern.begin() + pattern_shift)) {
+                        inf_loop_count = 0;
+                        log::debug() << "inf_loop_pattern:\n";
+                        for (const auto& s : inf_loop_pattern) {
+                            log::debug() << "\t " << s << '\n';
+                        }
+                        inf_loop_pattern.clear();
+                        log::debug() << "infinite loop fixed\n";
+                    }
                 }
             }
 

From f6c7213ae402cc7b386ef316a655ba55d50d411c Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Mon, 3 Apr 2023 23:33:18 +0800
Subject: [PATCH 228/296] support Ecore only in streams calculation (#16552)

* support Ecore only in streams calculation

* fix merge conflict
---
 .../intel_cpu/src/cpu_streams_calculation.cpp | 41 +++++----
 .../tests/unit/streams_info_table_test.cpp    | 92 ++++++++++++++++++-
 2 files changed, 115 insertions(+), 18 deletions(-)

diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index f4a2ee85634887..ee7bc69efcaa6f 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -32,8 +32,14 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
     if (1 == input_streams) {
         stream_info[NUMBER_OF_STREAMS] = 1;
         int limit_threads = (input_threads == 0) ? model_prefer_threads : input_threads;
-        if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
-            ((limit_threads == 0) || (limit_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
+        if (proc_type_table[0][ALL_PROC] == proc_type_table[0][EFFICIENT_CORE_PROC]) {
+            stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC;
+            stream_info[THREADS_PER_STREAM] = (input_threads == 0)
+                                                  ? proc_type_table[0][EFFICIENT_CORE_PROC]
+                                                  : std::min(proc_type_table[0][EFFICIENT_CORE_PROC], limit_threads);
+            streams_info_table.push_back(stream_info);
+        } else if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
+                   ((limit_threads == 0) || (limit_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
             stream_info[PROC_TYPE] = ALL_PROC;
             int n_threads = std::accumulate(proc_type_table[0].begin() + MAIN_CORE_PROC,
                                             proc_type_table[0].begin() + HYPER_THREADING_PROC,
@@ -69,6 +75,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
         int n_streams = 0;
         int n_threads = 0;
         int n_threads_per_stream = 0;
+        int base_type = MAIN_CORE_PROC;
 
         if (proc_type_table.size() == 1) {
             n_threads = (0 == input_threads) ? proc_type_table[0][ALL_PROC]
@@ -79,26 +86,28 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
         }
 
         if (0 != input_streams) {
+            base_type = (proc_type_table[0][MAIN_CORE_PROC] == 0) ? EFFICIENT_CORE_PROC : MAIN_CORE_PROC;
             n_streams = (input_infer_requests > 0) ? std::min(input_streams, input_infer_requests) : input_streams;
             if (n_streams >= n_threads) {
                 n_streams = n_threads;
                 n_threads_per_stream = 1;
             } else {
-                n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][base_type]);
                 if (proc_type_table.size() == 1) {
-                    if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
-                        (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
-                        n_threads_per_stream = proc_type_table[0][MAIN_CORE_PROC];
-                    } else if (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC]) {
+                    if ((n_threads_per_stream > proc_type_table[0][base_type]) &&
+                        (n_threads_per_stream < proc_type_table[0][base_type] * 2)) {
+                        n_threads_per_stream = proc_type_table[0][base_type];
+                    } else if (n_threads_per_stream < proc_type_table[0][base_type]) {
                         n_threads_per_stream = static_cast<int>(
-                            proc_type_table[0][MAIN_CORE_PROC] /
-                            ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
+                            proc_type_table[0][base_type] /
+                            ((proc_type_table[0][base_type] + n_threads_per_stream - 1) / n_threads_per_stream));
                     }
                 }
             }
         } else {
+            base_type = (proc_type_table[0][MAIN_CORE_PROC] == 0) ? EFFICIENT_CORE_PROC : MAIN_CORE_PROC;
             if (0 == model_prefer_threads) {
-                int n_proc = std::min(n_threads, proc_type_table[0][MAIN_CORE_PROC]);
+                int n_proc = std::min(n_threads, proc_type_table[0][base_type]);
                 if (0 == n_proc % 4) {
                     n_threads_per_stream = 4;
                 } else if (0 == n_proc % 5) {
@@ -114,17 +123,16 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                 if ((input_infer_requests > 0) && (n_streams > input_infer_requests)) {
                     n_streams = input_infer_requests;
                     n_threads_per_stream =
-                        std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                        std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][base_type]);
                 } else {
                     while (n_streams < n_threads_per_stream) {
                         if (1 == n_threads_per_stream) {
                             break;
                         } else {
                             n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
-                            n_threads_per_stream =
-                                static_cast<int>(proc_type_table[0][MAIN_CORE_PROC] /
-                                                 ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) /
-                                                  n_threads_per_stream));
+                            n_threads_per_stream = static_cast<int>(
+                                proc_type_table[0][base_type] /
+                                ((proc_type_table[0][base_type] + n_threads_per_stream - 1) / n_threads_per_stream));
                             n_streams = static_cast<int>(n_threads / n_threads_per_stream);
                         }
                     }
@@ -132,8 +140,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
             } else {
                 n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
                 n_streams = (input_infer_requests > 0) ? std::min(n_streams, input_infer_requests) : n_streams;
-                n_threads_per_stream =
-                    std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                n_threads_per_stream = std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][base_type]);
             }
         }
 
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
index c8b8d818dcb642..2fd91f86474733 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -772,6 +772,87 @@ StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
     {{6, MAIN_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
 };
 
+StreamsCalculationTestCase _1sockets_ecores_latency_1 = {
+    1,
+    0,
+    0,
+    0,
+    {{16, 0, 16, 0}},
+    {{1, EFFICIENT_CORE_PROC, 16}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_latency_2 = {
+    1,
+    4,
+    0,
+    0,
+    {{16, 0, 16, 0}},
+    {{1, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_latency_3 = {
+    1,
+    0,
+    4,
+    0,
+    {{16, 0, 16, 0}},
+    {{1, EFFICIENT_CORE_PROC, 16}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_latency_4 = {
+    1,
+    0,
+    0,
+    4,
+    {{16, 0, 16, 0}},
+    {{1, EFFICIENT_CORE_PROC, 16}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_tput_1 = {
+    0,
+    0,
+    0,
+    1,
+    {{16, 0, 16, 0}},
+    {{16, EFFICIENT_CORE_PROC, 1}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_tput_2 = {
+    0,
+    0,
+    0,
+    4,
+    {{16, 0, 16, 0}},
+    {{4, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_tput_3 = {
+    2,
+    0,
+    0,
+    0,
+    {{16, 0, 16, 0}},
+    {{2, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_tput_4 = {
+    8,
+    0,
+    4,
+    0,
+    {{16, 0, 16, 0}},
+    {{4, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_ecores_tput_5 = {
+    2,
+    0,
+    0,
+    4,
+    {{16, 0, 16, 0}},
+    {{2, EFFICIENT_CORE_PROC, 8}},
+};
+
 TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 
 INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
@@ -839,6 +920,15 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_6cores_tput_1,
                                          _1sockets_6cores_tput_2,
                                          _1sockets_6cores_tput_3,
-                                         _1sockets_6cores_tput_4));
+                                         _1sockets_6cores_tput_4,
+                                         _1sockets_ecores_latency_1,
+                                         _1sockets_ecores_latency_2,
+                                         _1sockets_ecores_latency_3,
+                                         _1sockets_ecores_latency_4,
+                                         _1sockets_ecores_tput_1,
+                                         _1sockets_ecores_tput_2,
+                                         _1sockets_ecores_tput_3,
+                                         _1sockets_ecores_tput_4,
+                                         _1sockets_ecores_tput_5));
 
 }  // namespace
\ No newline at end of file

From 06e6a69356900ae08ac785439d27b259d5718775 Mon Sep 17 00:00:00 2001
From: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
Date: Mon, 3 Apr 2023 20:12:57 +0200
Subject: [PATCH 229/296] Revert "[GNA]Fix crash in gna_pluing when using POT
 (#16003)" (#16719)

This reverts commit 0a569276715a84beef8bf477abff1b3157265089.
---
 .../src/frontend/model_quantizer.hpp          | 29 +++++--------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
index f4d1ef25fc9f31..253f88780f9242 100644
--- a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
+++ b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp
@@ -148,28 +148,13 @@ class ModelQuantizer {
                 inf_loop_count++;
             } else {
                 if (inf_loop_count > 0 &&
-                    (inf_loop_pattern.size() > 0 && inf_loop_history.size() % inf_loop_pattern.size() == 0 ||
-                     sf.allLayersProcessed())) {
-                    int32_t history_shift = 0;
-                    int32_t pattern_shift = 0;
-
-                    if (inf_loop_history.size() > inf_loop_pattern.size()) {
-                        history_shift = inf_loop_history.size() - inf_loop_pattern.size();
-                    } else {
-                        pattern_shift = inf_loop_pattern.size() - inf_loop_history.size();
-                    }
-
-                    if (!std::equal(inf_loop_history.begin() + history_shift,
-                                    inf_loop_history.end(),
-                                    inf_loop_pattern.begin() + pattern_shift)) {
-                        inf_loop_count = 0;
-                        log::debug() << "inf_loop_pattern:\n";
-                        for (const auto& s : inf_loop_pattern) {
-                            log::debug() << "\t " << s << '\n';
-                        }
-                        inf_loop_pattern.clear();
-                        log::debug() << "infinite loop fixed\n";
-                    }
+                    (inf_loop_history.size() % inf_loop_pattern.size() == 0 || sf.allLayersProcessed()) &&
+                    !std::equal(inf_loop_history.begin() + (inf_loop_history.size() - inf_loop_pattern.size()),
+                                inf_loop_history.end(),
+                                inf_loop_pattern.begin())) {
+                    inf_loop_count = 0;
+                    inf_loop_pattern.clear();
+                    log::debug() << "infinite loop fixed\n";
                 }
             }
 

From 4f7f7c31ee0b5a76c254e72619a559dc614b3a2d Mon Sep 17 00:00:00 2001
From: Maria Pushkina <mariia.pushkina@gmail.com>
Date: Mon, 3 Apr 2023 20:26:25 +0100
Subject: [PATCH 230/296] [CVS-104864] Action: Renamed group for concurrency
 (#16715)

---
 .github/workflows/build_doc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 6c7d2c26f86970..f0a34186ce4537 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -11,7 +11,7 @@ env:
   DOXYREST_VER: '2.1.3'
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
   cancel-in-progress: true
 
 jobs:

From 90615cf26a24647a3ea813bd64aba4af4315d421 Mon Sep 17 00:00:00 2001
From: Eddy Kim <eddy.kim@intel.com>
Date: Tue, 4 Apr 2023 10:24:40 +0900
Subject: [PATCH 231/296] [GPU] Fix OneDNN primitive attr serialization logic
 (#16654)

* fix onednn primitive attr serialization logic

* added an onednn fc fusing serialization test

* added gemm fusing serialization tests
---
 .../intel_gpu/graph/fused_primitive_desc.hpp  |  2 +
 .../impls/onednn/primitive_onednn_base.h      | 15 ++++--
 .../src/graph/kernel_impl_params.cpp          | 16 +++++-
 src/plugins/intel_gpu/src/graph/network.cpp   |  4 +-
 .../intel_gpu/src/graph/program_node.cpp      | 10 ++--
 .../fusions/fully_connected_fusion_test.cpp   | 31 ++++++++---
 .../tests/fusions/gemm_fusion_test.cpp        | 52 +++++++++++++++----
 7 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp
index 538378a8b747c7..b3b9df80727574 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/fused_primitive_desc.hpp
@@ -120,6 +120,8 @@ struct fused_primitive_desc_onednn {
     size_t mem_dep;              // memory dependency for working with fused node
     dnnl::memory::format_tag tag;
     bool flatten;
+    dnnl::memory::dims dims;
+    dnnl::memory::data_type dt;
 };
 #endif // ENABLE_ONEDNN_FOR_GPU
 } // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index f90d110e28227c..9672398647ad45 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -249,11 +249,16 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
                         dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
                         ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
 
-                        dnnl::memory::desc md = onednn::layout_to_memory_desc(
-                                                        impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
-                                                        fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
-
-                        _post_ops.append_binary(aalgorithm, md);
+                        if (fused_desc.at(idx).dims.size() > 0) {
+                            _post_ops.append_binary(aalgorithm,
+                                dnnl::memory::desc(fused_desc.at(idx).dims, fused_desc.at(idx).dt, fused_desc.at(idx).tag));
+                        } else {
+                            dnnl::memory::desc md = onednn::layout_to_memory_desc(
+                                                            impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
+                                                            fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
+
+                            _post_ops.append_binary(aalgorithm, md);
+                        }
                     } else if (_kind == dnnl::primitive::kind::prelu) {
                         int mask;
                         ib >> mask;
diff --git a/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp b/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
index c7b084dce99195..b31a8d4c7a3b8e 100644
--- a/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_impl_params.cpp
@@ -118,7 +118,13 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
     size_t num_fused_prims = fused_desc_onednn.size();
     ob << num_fused_prims;
     for (auto fused_prim : fused_desc_onednn) {
-        ob << make_data(&fused_prim, sizeof(fused_primitive_desc_onednn));
+        ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
+        ob << fused_prim.mem_offset;
+        ob << fused_prim.mem_dep;
+        ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
+        ob << fused_prim.flatten;
+        ob << fused_prim.dims;
+        ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
     }
 #endif // ENABLE_ONEDNN_FOR_GPU
     ob << primary_input_idx;
@@ -187,7 +193,13 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) {
     ib >> num_fused_prims;
     fused_desc_onednn.resize(num_fused_prims);
     for (size_t idx = 0; idx < num_fused_prims; ++idx) {
-        ib >> make_data(&fused_desc_onednn[idx], sizeof(fused_primitive_desc_onednn));
+        ib >> make_data(&fused_desc_onednn[idx].op_type, sizeof(onednn_post_op_type));
+        ib >> fused_desc_onednn[idx].mem_offset;
+        ib >> fused_desc_onednn[idx].mem_dep;
+        ib >> make_data(&fused_desc_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
+        ib >> fused_desc_onednn[idx].flatten;
+        ib >> fused_desc_onednn[idx].dims;
+        ib >> make_data(&fused_desc_onednn[idx].dt, sizeof(dnnl::memory::data_type));
     }
 #endif // ENABLE_ONEDNN_FOR_GPU
     ib >> primary_input_idx;
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 96d12795500ab9..893103494ac555 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -1108,7 +1108,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
                 for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
                     log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i),
                                        get_stream(),
-                                       "program" + std::to_string(get_program()->get_id()) +
+                                       "program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 1) +
                                        "_network" + std::to_string(get_id()) +
                                        "_" + layer_name + "_src" + std::to_string(i),
                                        debug_config->dump_layers_raw);
@@ -1125,7 +1125,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
                 for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) {
                     log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(i),
                                        get_stream(),
-                                       "program" + std::to_string(get_program()->get_id()) +
+                                       "program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 1) +
                                        "_network" + std::to_string(get_id()) +
                                        "_" + layer_name + "_dst" + std::to_string(i),
                                        debug_config->dump_layers_raw);
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 0cb388f54de723..53ebb26d9ed59e 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -918,8 +918,10 @@ void program_node::init_onednn_primitive_attributes() {
     // Add information about post-operation into the list, update indices
     auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep,
                                           dnnl::memory::format_tag tag = dnnl::memory::format_tag::undef,
-                                          bool flatten = false) {
-        fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten };
+                                          bool flatten = false,
+                                          dnnl::memory::dims dims = {},
+                                          dnnl::memory::data_type dt = dnnl::memory::data_type::undef) {
+        fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten, dims, dt };
         fused_ops.push_back(cur_op_desc);
 
         auto has_memory_buffers = type == onednn_post_op_type::binary_add ||
@@ -984,14 +986,14 @@ void program_node::init_onednn_primitive_attributes() {
                         cldnn::onednn::combine_bf_with_first_spatial_dim(in);
                     }
                     post_ops.append_binary(alg, onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab));
-                    update_onednn_post_op_list(op_type, dep_idx);
+                    update_onednn_post_op_list(op_type, dep_idx, dnnl::memory::format_tag::ab);
                 } else if (is_type<gemm>()) {
                     size_t rank = cldnn::format::dimension(in.format);
                     dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in.batch() == 1);
                     dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type);
                     dnnl::memory::format_tag fmt = onednn::convert_gemm_data_format(dims);
                     post_ops.append_binary(alg, dnnl::memory::desc(dims, dt, fmt));
-                    update_onednn_post_op_list(op_type, dep_idx);
+                    update_onednn_post_op_list(op_type, dep_idx, fmt, false, dims, dt);
                 } else {
                     post_ops.append_binary(alg, onednn::layout_to_memory_desc(in));
                     update_onednn_post_op_list(op_type, dep_idx);
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index cd9849986d55ca..3abdc8fadd10c4 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -78,7 +78,7 @@ class FullyConnectedFusingTest : public ::BaseFusingTest<fully_connected_test_pa
 #ifdef ENABLE_ONEDNN_FOR_GPU
 class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_test_params> {
 public:
-    void execute(fully_connected_test_params& p) {
+    void execute(fully_connected_test_params& p, bool is_caching_test = false) {
         // Onednn post operation has issue in a machine that does not support imad.
         if (!engine.get_device_info().supports_immad)
             return;
@@ -103,12 +103,12 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
             ov::intel_gpu::ImplementationDesc fc_ocl_impl = { ocl_forcing_format, p.ocl_kernel_name /*fully_connected_gpu_bfyx_ref*/};
             cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_ocl_impl } }));
         }
-        network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
-        network network_fused(this->engine, this->topology_fused, cfg_fused);
-        network_fused.set_input_data("input", input_prim);
-        network_not_fused.set_input_data("input", input_prim);
+        network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(), is_caching_test);
+        network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(), is_caching_test);
+        network_fused->set_input_data("input", input_prim);
+        network_not_fused->set_input_data("input", input_prim);
 
-        compare(network_not_fused, network_fused, p);
+        compare(*network_not_fused, *network_fused, p);
     }
 
     layout get_input_layout(fully_connected_test_params& p) {
@@ -440,6 +440,25 @@ TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
     execute(p);
 }
 
+TEST_P(fc_int8_inputs_fused_fp32_sum, basic_cached) {
+    auto p = GetParam();
+    auto shift_layout = layout{ ov::PartialShape{p.weights_shape[0]}, p.default_type, p.default_format };
+
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("shift_data", get_mem(shift_layout, 1)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
+        eltwise("shift", { input_info("fc_prim"), input_info("shift_data") }, eltwise_mode::sum, cldnn::data_types::f32),
+        crop("crop", input_info("shift"), get_output_layout(p).get_tensor(), { 0, 0, 0, 0 }),
+        reorder("reorder_bfyx", input_info("crop"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1.f;
+    execute(p, true);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_inputs_fused_fp32_sum, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
     // OneDNN has issue with small shapes - ticket 7064
     // fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 4 },
diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
index 847c9192dd83b4..0bf81628b299ba 100644
--- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp
@@ -40,7 +40,7 @@ struct gemm_test_params {
 class GemmFusingTest : public ::BaseFusingTest<gemm_test_params> {
 public:
 
-    void execute(gemm_test_params& p) {
+    void execute(gemm_test_params& p, bool is_caching_test = false) {
         auto input0_prim = get_mem(get_input_layout(p, 0));
         auto input1_prim = get_mem(get_input_layout(p, 1));
 
@@ -51,19 +51,19 @@ class GemmFusingTest : public ::BaseFusingTest<gemm_test_params> {
             cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_ref_impl} }));
         }
 
-        network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
-        network network_fused(this->engine, this->topology_fused, cfg_fused);
-        network_fused.set_input_data("input0", input0_prim);
-        network_not_fused.set_input_data("input0", input0_prim);
-        network_fused.set_input_data("input1", input1_prim);
-        network_not_fused.set_input_data("input1", input1_prim);
+        network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(), is_caching_test);
+        network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(), is_caching_test);
+        network_fused->set_input_data("input0", input0_prim);
+        network_not_fused->set_input_data("input0", input0_prim);
+        network_fused->set_input_data("input1", input1_prim);
+        network_not_fused->set_input_data("input1", input1_prim);
         if (p.in_shapes.size() > 2) {
             auto input2_prim = get_mem(get_input_layout(p, 2));
-            network_fused.set_input_data("input2", input2_prim);
-            network_not_fused.set_input_data("input2", input2_prim);
+            network_fused->set_input_data("input2", input2_prim);
+            network_not_fused->set_input_data("input2", input2_prim);
         }
 
-        compare(network_not_fused, network_fused, p);
+        compare(*network_not_fused, *network_fused, p);
     }
 
     layout get_input_layout(gemm_test_params& p, int in_no) {
@@ -317,6 +317,38 @@ TEST_P(gemm_2in_add, eltwise_postop) {
     execute(p);
 }
 
+TEST_P(gemm_2in_add, eltwise_postop_cached) {
+    auto p = GetParam();
+
+    if (engine.get_device_info().supports_immad) {
+        ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn };
+        cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } }));
+    }
+
+    auto add_data_layout = get_output_layout(p);
+    auto add_data_size = add_data_layout.get_tensor();
+    if (p.broadcast_kind == dim_vec_kind::batch)
+        add_data_size.batch[0] = 1;
+    else
+        add_data_size.feature[0] = 1;
+    add_data_layout.set_tensor(add_data_size);
+
+    auto in_layout0 = get_input_layout(p, 0);
+    auto in_layout1 = get_input_layout(p, 1);
+
+    create_topologies(
+        input_layout("input0", in_layout0),
+        input_layout("input1", in_layout1),
+        data("add_data", get_mem(add_data_layout, 1.0f/p.kernel.count())),
+        gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()),
+        eltwise("add_prim", { input_info("gemm_prim"), input_info("add_data") }, p.eltwise_m, p.default_type),
+        reorder("reorder_bfyx", input_info("add_prim"), p.default_format, data_types::f32)
+    );
+
+    tolerance = default_tolerance(p.default_type);
+    execute(p, true);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_add, ::testing::ValuesIn(std::vector<gemm_test_params>{
     gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sum },
     gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::batch, eltwise_mode::prod },

From f4fe856d9dd26c357a56623299327efa03ec361d Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Tue, 4 Apr 2023 12:42:19 +0200
Subject: [PATCH 232/296] [DOCS] Adding a new class for sortable tables - for
 master (#15314)

---
 docs/_static/js/custom.js                     | 74 ------------------
 .../static/js/sortable_tables.js              | 76 +++++++++++++++++++
 .../templates/layout.html                     |  1 +
 3 files changed, 77 insertions(+), 74 deletions(-)
 create mode 100644 docs/openvino_sphinx_theme/openvino_sphinx_theme/static/js/sortable_tables.js

diff --git a/docs/_static/js/custom.js b/docs/_static/js/custom.js
index 0e67c39943533a..f9f8c9a9000d4f 100644
--- a/docs/_static/js/custom.js
+++ b/docs/_static/js/custom.js
@@ -40,10 +40,6 @@ $(document).ready(function () {
     init_switchers();
     handleSwitcherParam();
     initViewerJS();
-    var TABLE_SORT = window.TABLE_SORT;
-    if (TABLE_SORT) {
-        addTableSort();
-    }
     addLegalNotice();
     updateSearchForm();
     initBenchmarkPickers();   // included with the new benchmarks page 
@@ -134,76 +130,6 @@ function createVersions() {
 
 }
 
-
-function addTableSort() {
-    var tables = $('table.table');
-    tables.each(function () {
-        var table = $(this);
-        var headings = table.find('th');
-        headings.each(function () {
-            var th = $(this);
-            var index = th.index();
-            var sortBtn = $('<span class="sort-btn"></span>');
-            th.addClass('sort-header');
-            th.click(function () {
-                var counter = 0;
-                sortBtn.addClass('sort-active');
-                sortBy = sortBtn.data('sortby');
-                var trs = table.find('tbody tr');
-                sortBtn.toggleClass('ascending');
-                trs.sort(function (item1, item2) {
-
-                    if (sortBtn.hasClass('ascending')) {
-                        var text1 = $(item1).find('td').eq(index).text();
-                        var text2 = $(item2).find('td').eq(index).text();
-                    }
-                    else {
-                        var text1 = $(item2).find('td').eq(index).text();
-                        var text2 = $(item1).find('td').eq(index).text();
-                    }
-                    // try converting to num
-                    var _text1 = parseFloat(text1);
-                    var _text2 = parseFloat(text2);
-
-                    if (!isNaN(_text1) && !isNaN(_text2)) {
-                        text1 = _text1;
-                        text2 = _text2;
-                    }
-                    if (text1 > text2) {
-                        return 1;
-                    }
-                    else if (text1 < text2) {
-                        return -1;
-                    }
-                    else {
-                        return 0;
-                    }
-                }).map(function () {
-                    var row = $(this);
-                    if (counter % 2 === 0) {
-                        row.removeClass('row-odd');
-                        row.addClass('row-even');
-                    }
-                    else {
-                        row.removeClass('row-even');
-                        row.addClass('row-odd');
-                    }
-                    counter++;
-                    table.find('tbody').append(row);
-                });
-
-                headings.each(function () {
-                    if ($(this).index() !== index) {
-                        $(this).find('.sort-btn').removeClass('ascending');
-                        $(this).find('.sort-btn').removeClass('sort-active');
-                    }
-                });
-            });
-            th.find('p').append(sortBtn);
-        });
-    });
-}
-
 function initViewerJS() {
     try {
         var images = $('main img[src*="_images"]');
diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/js/sortable_tables.js b/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/js/sortable_tables.js
new file mode 100644
index 00000000000000..523d7b4d4a7031
--- /dev/null
+++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/js/sortable_tables.js
@@ -0,0 +1,76 @@
+document.addEventListener('DOMContentLoaded', function () {
+   sortableTables();
+});
+
+function sortableTables() {
+
+   var tablestosort = $('div.sort-table').find('table.table').addClass('table-sortable');
+   var tablestosortnext = $('div.sort-table').next('table.table').addClass('table-sortable');
+   var sortables = $('table.table-sortable').append(tablestosortnext);
+   var tables = sortables.append(tablestosort);
+   tables.each(function () {
+       var table = $(this);
+       var headings = table.find('th');
+       headings.each(function () {
+           var th = $(this);
+           var index = th.index();
+           var sortBtn = $('<span class="sort-btn"></span>');
+           th.addClass('sort-header');
+           th.click(function () {
+               var counter = 0;
+               sortBtn.addClass('sort-active');
+               sortBy = sortBtn.data('sortby');
+               var trs = table.find('tbody tr');
+               sortBtn.toggleClass('ascending');
+               trs.sort(function (item1, item2) {
+
+                   if (sortBtn.hasClass('ascending')) {
+                       var text1 = $(item1).find('td').eq(index).text();
+                       var text2 = $(item2).find('td').eq(index).text();
+                   }
+                   else {
+                       var text1 = $(item2).find('td').eq(index).text();
+                       var text2 = $(item1).find('td').eq(index).text();
+                   }
+                   // try converting to num
+                   var _text1 = parseFloat(text1);
+                   var _text2 = parseFloat(text2);
+
+                   if (!isNaN(_text1) && !isNaN(_text2)) {
+                       text1 = _text1;
+                       text2 = _text2;
+                   }
+                   if (text1 > text2) {
+                       return 1;
+                   }
+                   else if (text1 < text2) {
+                       return -1;
+                   }
+                   else {
+                       return 0;
+                   }
+               }).map(function () {
+                   var row = $(this);
+                   if (counter % 2 === 0) {
+                       row.removeClass('row-odd');
+                       row.addClass('row-even');
+                   }
+                   else {
+                       row.removeClass('row-even');
+                       row.addClass('row-odd');
+                   }
+                   counter++;
+                   table.find('tbody').append(row);
+               });
+
+               headings.each(function () {
+                   if ($(this).index() !== index) {
+                       $(this).find('.sort-btn').removeClass('ascending');
+                       $(this).find('.sort-btn').removeClass('sort-active');
+                   }
+               });
+           });
+           th.find('p').append(sortBtn);
+       });
+   });
+}
\ No newline at end of file
diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html
index 5a0155f3f8de5a..678ad2b37a7ede 100644
--- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html
+++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html
@@ -19,6 +19,7 @@
     <link rel="stylesheet" href="{{ pathto('_static/css/tabs.css', 1) }}" type="text/css" />
     <link rel="stylesheet" href="_static/css/homepage_style.css" type="text/css" />
     <script src="{{ pathto('_static/js/openvino_sphinx_theme.js', 1) }}"></script>
+    <script src="{{ pathto('_static/js/sortable_tables.js', 1) }}"></script>
 {% endblock %}
 
 {%- block content %}

From 8691ec27790f826461e4c9b9b39bee840b161a24 Mon Sep 17 00:00:00 2001
From: Maciej Smyk <maciejx.smyk@intel.com>
Date: Tue, 4 Apr 2023 13:15:31 +0200
Subject: [PATCH 233/296] Update README.md (#16700)

---
 tools/compile_tool/README.md | 122 +++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 56 deletions(-)

diff --git a/tools/compile_tool/README.md b/tools/compile_tool/README.md
index f8ed1bf84c30b6..b9bfa7546eb62d 100644
--- a/tools/compile_tool/README.md
+++ b/tools/compile_tool/README.md
@@ -1,76 +1,86 @@
 # Compile Tool {#openvino_inference_engine_tools_compile_tool_README}
 
+@sphinxdirective
+
 Compile tool is a C++ application that enables you to compile a model for inference on a specific device and export the compiled representation to a binary file.
-With this tool, you can compile a model using supported OpenVINO Runtime devices on a machine that does not have the physical device connected, and then transfer a generated file to any machine with the target inference device available. To learn which device supports the import / export functionality, see the [feature support matrix](../../docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md).
 
-The tool is delivered as an executable file that can be run on both Linux and Windows. It is located in the `<INSTALLROOT>/tools/compile_tool` directory.
+With this tool, you can compile a model using supported OpenVINO Runtime devices on a machine that does not have the physical device connected, and then transfer a generated file to any machine with the target inference device available. To learn which device supports the import / export functionality, see the :doc:`feature support matrix <openvino_docs_OV_UG_Working_with_devices>`.
+
+The tool is delivered as an executable file that can be run on both Linux and Windows. It is located in the ``<INSTALLROOT>/tools/compile_tool`` directory.
 
-## Workflow of the Compile tool
+Workflow of the Compile tool
+############################
 
 First, the application reads command-line parameters and loads a model to the OpenVINO Runtime device. After that, the application exports a blob with the compiled model and writes it to the output file.
 
 Also, the Compile tool supports the following capabilities:
-- Embedding [layout](../../docs/OV_Runtime_UG/layout_overview.md) and precision conversions (for more details, see the [Optimize Preprocessing](../../docs/OV_Runtime_UG/preprocessing_overview.md)). To compile the model with advanced preprocessing capabilities, refer to the [Use Case - Integrate and Save Preprocessing Steps Into OpenVINO IR](../../docs/OV_Runtime_UG/preprocessing_usecase_save.md), which shows how to have all the preprocessing in the compiled blob.
-- Compiling blobs for OpenVINO Runtime API 2.0 by default or for Inference Engine API with explicit option `-ov_api_1_0`.
+
+- Embedding :doc:`layout <openvino_docs_OV_UG_Layout_Overview>` and precision conversions (for more details, see the :doc:`Optimize Preprocessing <openvino_docs_OV_UG_Preprocessing_Overview>`). To compile the model with advanced preprocessing capabilities, refer to the :doc:`Use Case - Integrate and Save Preprocessing Steps Into OpenVINO IR <openvino_docs_OV_UG_Preprocess_Usecase_save>`, which shows how to have all the preprocessing in the compiled blob.
+- Compiling blobs for OpenVINO Runtime API 2.0 by default or for Inference Engine API with explicit option ``-ov_api_1_0``.
 - Accepting device specific options for customizing the compilation process.
 
-## Running the Compile Tool
-
-Running the application with the `-h` option yields the following usage message:
-
-```sh
-./compile_tool -h
-OpenVINO Runtime version ......... 2022.1.0
-Build ........... custom_changed_compile_tool_183a1adfcd7a001974fe1c5cfa21ec859b70ca2c
-
-compile_tool [OPTIONS]
-
- Common options:
-    -h                                       Optional. Print the usage message.
-    -m                           <value>     Required. Path to the XML model.
-    -d                           <value>     Required. Specify a target device for which executable network will be compiled.
-                                             Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
-                                             Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin.
-                                             The application looks for a suitable plugin for the specified device.
-    -o                           <value>     Optional. Path to the output file. Default value: "<model_xml_file>.blob".
-    -c                           <value>     Optional. Path to the configuration file.
-    -ip                          <value>     Optional. Specifies precision for all input layers of the network.
-    -op                          <value>     Optional. Specifies precision for all output layers of the network.
-    -iop                        "<value>"    Optional. Specifies precision for input and output layers by name.
-                                             Example: -iop "input:FP16, output:FP16".
-                                             Notice that quotes are required.
-                                             Overwrites precision from ip and op options for specified layers.
-    -il                          <value>     Optional. Specifies layout for all input layers of the network.
-    -ol                          <value>     Optional. Specifies layout for all output layers of the network.
-    -iol                        "<value>"    Optional. Specifies layout for input and output layers by name.
-                                             Example: -iol "input:NCHW, output:NHWC".
-                                             Notice that quotes are required.
-                                             Overwrites layout from il and ol options for specified layers.
-    -iml                         <value>     Optional. Specifies model layout for all input layers of the network.
-    -oml                         <value>     Optional. Specifies model layout for all output layers of the network.
-    -ioml                       "<value>"    Optional. Specifies model layout for input and output tensors by name.
-                                             Example: -ionl "input:NCHW, output:NHWC".
-                                             Notice that quotes are required.
-                                             Overwrites layout from il and ol options for specified layers.
-    -ov_api_1_0                              Optional. Compile model to legacy format for usage in Inference Engine API,
-                                             by default compiles to OV 2.0 API
-```
+Running the Compile Tool
+########################
+
+Running the application with the ``-h`` option yields the following usage message:
+
+.. code-block:: bash
+   
+   ./compile_tool -h
+   OpenVINO Runtime version ......... 2022.1.0
+   Build ........... custom_changed_compile_tool_183a1adfcd7a001974fe1c5cfa21ec859b70ca2c
+   
+   compile_tool [OPTIONS]
+   
+    Common options:
+       -h                                       Optional. Print the usage message.
+       -m                           <value>     Required. Path to the XML model.
+       -d                           <value>     Required. Specify a target device for which executable network will be compiled.
+                                                Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
+                                                Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin.
+                                                The application looks for a suitable plugin for the specified device.
+       -o                           <value>     Optional. Path to the output file. Default value: "<model_xml_file>.blob".
+       -c                           <value>     Optional. Path to the configuration file.
+       -ip                          <value>     Optional. Specifies precision for all input layers of the network.
+       -op                          <value>     Optional. Specifies precision for all output layers of the network.
+       -iop                        "<value>"    Optional. Specifies precision for input and output layers by name.
+                                                Example: -iop "input:FP16, output:FP16".
+                                                Notice that quotes are required.
+                                                Overwrites precision from ip and op options for specified layers.
+       -il                          <value>     Optional. Specifies layout for all input layers of the network.
+       -ol                          <value>     Optional. Specifies layout for all output layers of the network.
+       -iol                        "<value>"    Optional. Specifies layout for input and output layers by name.
+                                                Example: -iol "input:NCHW, output:NHWC".
+                                                Notice that quotes are required.
+                                                Overwrites layout from il and ol options for specified layers.
+       -iml                         <value>     Optional. Specifies model layout for all input layers of the network.
+       -oml                         <value>     Optional. Specifies model layout for all output layers of the network.
+       -ioml                       "<value>"    Optional. Specifies model layout for input and output tensors by name.
+                                                Example: -ionl "input:NCHW, output:NHWC".
+                                                Notice that quotes are required.
+                                                Overwrites layout from il and ol options for specified layers.
+       -ov_api_1_0                              Optional. Compile model to legacy format for usage in Inference Engine API,
+                                                by default compiles to OV 2.0 API
 
 Running the application with the empty list of options yields an error message.
 
 For example, to compile a blob for inference on an Intel® Neural Compute Stick 2 from a trained network, run the command below:
 
-```sh
-./compile_tool -m <path_to_model>/model_name.xml -d CPU
-```
+.. code-block:: bash
+   
+   ./compile_tool -m <path_to_model>/model_name.xml -d CPU
 
-### Import a Compiled Blob File to Your Application
+Import a Compiled Blob File to Your Application
++++++++++++++++++++++++++++++++++++++++++++++++
 
 To import a blob with the network from a generated file into your application, use the
-`ov::Core::import_model` method:
+``ov::Core::import_model`` method:
+
+.. code-block:: cpp
+   
+   ov::Core ie;
+   std::ifstream file{"model_name.blob"};
+   ov::CompiledModel compiled_model = ie.import_model(file, "CPU");
+
+@endsphinxdirective
 
-```cpp
-ov::Core ie;
-std::ifstream file{"model_name.blob"};
-ov::CompiledModel compiled_model = ie.import_model(file, "CPU");
-```

From 9f5450423202ba446487c1e859157418e5ca8cfa Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Tue, 4 Apr 2023 19:25:11 +0800
Subject: [PATCH 234/296] update cpu properties name to enable_hyper_threading
 and enable_hyper_threading (#16723)

---
 .../c/include/openvino/c/ov_property.h        |  4 +--
 src/bindings/c/src/ov_property.cpp            |  4 +--
 src/bindings/c/tests/ov_core_test.cpp         |  8 ++---
 .../pyopenvino/core/properties/properties.cpp |  4 +--
 .../tests/test_runtime/test_properties.py     | 16 ++++-----
 .../runtime/threading/cpu_map_scheduling.hpp  |  2 +-
 .../include/openvino/runtime/properties.hpp   | 12 +++----
 src/plugins/intel_cpu/src/config.cpp          | 16 ++++-----
 src/plugins/intel_cpu/src/config.h            |  4 +--
 src/plugins/intel_cpu/src/exec_network.cpp    | 16 ++++-----
 src/plugins/intel_cpu/src/plugin.cpp          | 16 ++++-----
 .../behavior/ov_plugin/core_integration.cpp   |  4 +--
 .../behavior/ov_plugin/core_integration.hpp   | 36 +++++++++----------
 13 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index cf15cf272d843e..1b8a580c42a116 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -125,14 +125,14 @@ ov_property_key_inference_num_threads;
  * @ingroup ov_property_c_api
  */
 OPENVINO_C_VAR(const char*)
-ov_property_key_hint_use_cpu_pinning;
+ov_property_key_hint_enable_cpu_pinning;
 
 /**
  * @brief Read-write property, it is high-level OpenVINO hint for using hyper threading processors during CPU inference
  * @ingroup ov_property_c_api
  */
 OPENVINO_C_VAR(const char*)
-ov_property_key_hint_use_hyper_threading;
+ov_property_key_hint_enable_hyper_threading;
 
 /**
  * @brief Read-write property, it is high-level OpenVINO Performance Hints
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index 999162df72048a..a7ea7697856590 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -23,9 +23,9 @@ const char* ov_property_key_num_streams = "NUM_STREAMS";
 const char* ov_property_key_affinity = "AFFINITY";
 const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
 const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
-const char* ov_property_key_hint_use_cpu_pinning = "USE_CPU_PINNING";
+const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
 const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
-const char* ov_property_key_hint_use_hyper_threading = "USE_HYPER_THREADING";
+const char* ov_property_key_hint_enable_hyper_threading = "ENABLE_HYPER_THREADING";
 const char* ov_property_key_hint_inference_precision = "INFERENCE_PRECISION_HINT";
 const char* ov_property_key_hint_num_requests = "PERFORMANCE_HINT_NUM_REQUESTS";
 const char* ov_property_key_hint_model_priority = "MODEL_PRIORITY";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index b9045ffeca4e6d..1a4c7045f927c6 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -260,7 +260,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STRNE(invalid_mode, ret);
     ov_free(ret);
 
-    const char* key_pin = ov_property_key_hint_use_cpu_pinning;
+    const char* key_pin = ov_property_key_hint_enable_cpu_pinning;
     const char* val_pin = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, val_pin));
     ret = nullptr;
@@ -289,7 +289,7 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
     EXPECT_STRNE(invalid_val, ret);
     ov_free(ret);
 
-    const char* key_ht = ov_property_key_hint_use_hyper_threading;
+    const char* key_ht = ov_property_key_hint_enable_hyper_threading;
     const char* val_ht = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
     ret = nullptr;
@@ -320,7 +320,7 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     EXPECT_STREQ(affinity, ret);
     ov_free(ret);
 
-    const char* key_pin = ov_property_key_hint_use_cpu_pinning;
+    const char* key_pin = ov_property_key_hint_enable_cpu_pinning;
     const char* val_pin = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, val_pin));
     ret = nullptr;
@@ -336,7 +336,7 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     EXPECT_STREQ(val_type, ret);
     ov_free(ret);
 
-    const char* key_ht = ov_property_key_hint_use_hyper_threading;
+    const char* key_ht = ov_property_key_hint_enable_hyper_threading;
     const char* val_ht = "YES";
     OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
     ret = nullptr;
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 923ae8eb88b570..01471f27218e45 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -72,9 +72,9 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::inference_precision, "inference_precision");
     wrap_property_RW(m_hint, ov::hint::model_priority, "model_priority");
     wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
-    wrap_property_RW(m_hint, ov::hint::use_cpu_pinning, "use_cpu_pinning");
+    wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
     wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
-    wrap_property_RW(m_hint, ov::hint::use_hyper_threading, "use_hyper_threading");
+    wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
     wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
     wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
     wrap_property_RW(m_hint, ov::hint::model, "model");
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index 0e96ffedf598ba..ea7dd3ba6dbddc 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -226,8 +226,8 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((properties.hint.PerformanceMode.UNDEFINED, properties.hint.PerformanceMode.UNDEFINED),),
         ),
         (
-            properties.hint.use_cpu_pinning,
-            "USE_CPU_PINNING",
+            properties.hint.enable_cpu_pinning,
+            "ENABLE_CPU_PINNING",
             (
                 (True, True),
                 (False, False),
@@ -241,8 +241,8 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((properties.hint.SchedulingCoreType.PCORE_ONLY, properties.hint.SchedulingCoreType.PCORE_ONLY),),
         ),
         (
-            properties.hint.use_hyper_threading,
-            "USE_HYPER_THREADING",
+            properties.hint.enable_hyper_threading,
+            "ENABLE_HYPER_THREADING",
             (
                 (True, True),
                 (False, False),
@@ -460,9 +460,9 @@ def test_single_property_setting(device):
                 properties.affinity(properties.Affinity.NONE),
                 properties.hint.inference_precision(Type.f32),
                 properties.hint.performance_mode(properties.hint.PerformanceMode.LATENCY),
-                properties.hint.use_cpu_pinning(True),
+                properties.hint.enable_cpu_pinning(True),
                 properties.hint.scheduling_core_type(properties.hint.SchedulingCoreType.PCORE_ONLY),
-                properties.hint.use_hyper_threading(True),
+                properties.hint.enable_hyper_threading(True),
                 properties.hint.num_requests(12),
                 properties.streams.num(5),
             ],
@@ -475,9 +475,9 @@ def test_single_property_setting(device):
             properties.affinity(): properties.Affinity.NONE,
             properties.hint.inference_precision(): Type.f32,
             properties.hint.performance_mode(): properties.hint.PerformanceMode.LATENCY,
-            properties.hint.use_cpu_pinning(): True,
+            properties.hint.enable_cpu_pinning(): True,
             properties.hint.scheduling_core_type(): properties.hint.SchedulingCoreType.PCORE_ONLY,
-            properties.hint.use_hyper_threading(): True,
+            properties.hint.enable_hyper_threading(): True,
             properties.hint.num_requests(): 12,
             properties.streams.num(): 5,
         },
diff --git a/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
index db13f57fe7698e..438a730565dedf 100644
--- a/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/cpu_map_scheduling.hpp
@@ -26,7 +26,7 @@ std::vector<std::vector<int>> apply_scheduling_core_type(const ov::hint::Schedul
 
 /**
  * @brief      Limit available CPU resource in processors type table according to hyper threading property
- * @param[in]  input_type indicate value of property use_hyper_threading.
+ * @param[in]  input_type indicate value of property enable_hyper_threading.
  * @param[in]  input_changed indicate if value is set by user.
  * @param[in]  proc_type_table candidate processors available at this time
  * @return     updated proc_type_table which removed unmatched processors
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index cfdb47866915df..1dbf47db03ec2a 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -418,11 +418,11 @@ static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_C
  * The following code is example to use this property.
  *
  * @code
- * ie.set_property(ov::hint::use_cpu_pinning(true));
- * ie.set_property(ov::hint::use_cpu_pinning(false));
+ * ie.set_property(ov::hint::enable_cpu_pinning(true));
+ * ie.set_property(ov::hint::enable_cpu_pinning(false));
  * @endcode
  */
-static constexpr Property<bool> use_cpu_pinning{"USE_CPU_PINNING"};
+static constexpr Property<bool> enable_cpu_pinning{"ENABLE_CPU_PINNING"};
 
 /**
  * @brief This property define if using hyper threading during inference.
@@ -434,11 +434,11 @@ static constexpr Property<bool> use_cpu_pinning{"USE_CPU_PINNING"};
  * The following code is example to use this property.
  *
  * @code
- * ie.set_property(ov::hint::use_hyper_threading(true));
- * ie.set_property(ov::hint::use_hyper_threading(false));
+ * ie.set_property(ov::hint::enable_hyper_threading(true));
+ * ie.set_property(ov::hint::enable_hyper_threading(false));
  * @endcode
  */
-static constexpr Property<bool> use_hyper_threading{"USE_HYPER_THREADING"};
+static constexpr Property<bool> enable_hyper_threading{"ENABLE_HYPER_THREADING"};
 
 /**
  * @brief (Optional) property that backs the (above) Performance Hints
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 10a0e3f04cfea3..534d835534f9a2 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -79,15 +79,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             streamExecutorConfig.SetConfig(key, val);
         } else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) {
             perfHintsConfig.SetConfig(key, val);
-        } else if (key == ov::hint::use_cpu_pinning.name()) {
+        } else if (key == ov::hint::enable_cpu_pinning.name()) {
             if (val == PluginConfigParams::YES) {
-                useCpuPinning = true;
+                enableCpuPinning = true;
                 changedCpuPinning = true;
             } else if (val == PluginConfigParams::NO) {
-                useCpuPinning = false;
+                enableCpuPinning = false;
                 changedCpuPinning = true;
             } else {
-                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::use_cpu_pinning.name()
+                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::enable_cpu_pinning.name()
                            << ". Expected only true/false." << std::endl;
             }
         } else if (key == ov::hint::scheduling_core_type.name()) {
@@ -102,15 +102,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                            << ov::hint::SchedulingCoreType::PCORE_ONLY << "/"
                            << ov::hint::SchedulingCoreType::ECORE_ONLY << std::endl;
             }
-        } else if (key == ov::hint::use_hyper_threading.name()) {
+        } else if (key == ov::hint::enable_hyper_threading.name()) {
             if (val == PluginConfigParams::YES) {
-                useHyperThreading = true;
+                enableHyperThreading = true;
                 changedHyperThreading = true;
             } else if (val == PluginConfigParams::NO) {
-                useHyperThreading = false;
+                enableHyperThreading = false;
                 changedHyperThreading = true;
             } else {
-                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::use_hyper_threading.name()
+                IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::enable_hyper_threading.name()
                            << ". Expected only true/false." << std::endl;
             }
         } else if (key == PluginConfigParams::KEY_DYN_BATCH_LIMIT) {
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 497603f3156b0d..894d21d87c6764 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -51,10 +51,10 @@ struct Config {
     size_t rtCacheCapacity = 5000ul;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
-    bool useCpuPinning = true;
+    bool enableCpuPinning = true;
     bool changedCpuPinning = false;
     ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
-    bool useHyperThreading = true;
+    bool enableHyperThreading = true;
     bool changedHyperThreading = false;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp
index ae81a3e1083778..825c66c2b4256b 100644
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -313,9 +313,9 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
             RO_property(ov::hint::performance_mode.name()),
             RO_property(ov::hint::execution_mode.name()),
             RO_property(ov::hint::num_requests.name()),
-            RO_property(ov::hint::use_cpu_pinning.name()),
+            RO_property(ov::hint::enable_cpu_pinning.name()),
             RO_property(ov::hint::scheduling_core_type.name()),
-            RO_property(ov::hint::use_hyper_threading.name()),
+            RO_property(ov::hint::enable_hyper_threading.name()),
             RO_property(ov::execution_devices.name()),
         };
     }
@@ -356,15 +356,15 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
-    } else if (name == ov::hint::use_cpu_pinning.name()) {
-        const bool use_pin = config.useCpuPinning;
-        return decltype(ov::hint::use_cpu_pinning)::value_type(use_pin);
+    } else if (name == ov::hint::enable_cpu_pinning.name()) {
+        const bool use_pin = config.enableCpuPinning;
+        return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = config.schedulingCoreType;
         return core_type;
-    } else if (name == ov::hint::use_hyper_threading.name()) {
-        const bool use_ht = config.useHyperThreading;
-        return decltype(ov::hint::use_hyper_threading)::value_type(use_ht);
+    } else if (name == ov::hint::enable_hyper_threading.name()) {
+        const bool use_ht = config.enableHyperThreading;
+        return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht);
     } else if (name == ov::hint::execution_mode) {
         return config.executionMode;
     } else if (name == ov::hint::num_requests) {
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 1d75a0e5f1e251..6db86eeaf7be98 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -596,15 +596,15 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
     } else if (name == ov::hint::performance_mode) {
         const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
         return perfHint;
-    } else if (name == ov::hint::use_cpu_pinning) {
-        const bool pin_value = engConfig.useCpuPinning;
-        return decltype(ov::hint::use_cpu_pinning)::value_type(pin_value);
+    } else if (name == ov::hint::enable_cpu_pinning) {
+        const bool pin_value = engConfig.enableCpuPinning;
+        return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value);
     } else if (name == ov::hint::scheduling_core_type) {
         const auto core_type = engConfig.schedulingCoreType;
         return core_type;
-    } else if (name == ov::hint::use_hyper_threading) {
-        const bool ht_value = engConfig.useHyperThreading;
-        return decltype(ov::hint::use_hyper_threading)::value_type(ht_value);
+    } else if (name == ov::hint::enable_hyper_threading) {
+        const bool ht_value = engConfig.enableHyperThreading;
+        return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
     } else if (name == ov::hint::num_requests) {
         const auto perfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests;
         return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
@@ -696,9 +696,9 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                     RW_property(ov::hint::performance_mode.name()),
                                                     RW_property(ov::hint::execution_mode.name()),
                                                     RW_property(ov::hint::num_requests.name()),
-                                                    RW_property(ov::hint::use_cpu_pinning.name()),
+                                                    RW_property(ov::hint::enable_cpu_pinning.name()),
                                                     RW_property(ov::hint::scheduling_core_type.name()),
-                                                    RW_property(ov::hint::use_hyper_threading.name()),
+                                                    RW_property(ov::hint::enable_hyper_threading.name()),
                                                     RW_property(ov::device::id.name()),
         };
 
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 866b6d51155b49..4d141c5581e60c 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -49,7 +49,7 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values("CPU", "MULTI", "HETERO", "AUTO"));
 
 INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest,
-                         OVClassSetUseHyperThreadingHintConfigTest,
+                         OVClassSetEnableHyperThreadingHintConfigTest,
                          ::testing::Values("CPU"));
 
 INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest,
@@ -80,7 +80,7 @@ INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassGetAvailableDevices, OVClassGetAvailableDevices,
         ::testing::Values("CPU"));
 
-INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest, OVClassSetUseCpuPinningHintConfigTest, ::testing::Values("CPU"));
+INSTANTIATE_TEST_SUITE_P(smoke_OVClassSetConfigTest, OVClassSetEnableCpuPinningHintConfigTest, ::testing::Values("CPU"));
 
 INSTANTIATE_TEST_SUITE_P(
         smoke_OVClassSetModelPriorityConfigTest, OVClassSetModelPriorityConfigTest,
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index ed21ea054cb37d..cb63a7929a285f 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -121,9 +121,9 @@ using OVClassLoadNetworkTest = OVClassQueryNetworkTest;
 using OVClassSetGlobalConfigTest = OVClassBaseTestP;
 using OVClassSetModelPriorityConfigTest = OVClassBaseTestP;
 using OVClassSetExecutionModeHintConfigTest = OVClassBaseTestP;
-using OVClassSetUseCpuPinningHintConfigTest = OVClassBaseTestP;
+using OVClassSetEnableCpuPinningHintConfigTest = OVClassBaseTestP;
 using OVClassSetSchedulingCoreTypeHintConfigTest = OVClassBaseTestP;
-using OVClassSetUseHyperThreadingHintConfigTest = OVClassBaseTestP;
+using OVClassSetEnableHyperThreadingHintConfigTest = OVClassBaseTestP;
 using OVClassSetTBBForceTerminatePropertyTest = OVClassBaseTestP;
 using OVClassSetLogLevelConfigTest = OVClassBaseTestP;
 using OVClassSpecificDeviceTestSetConfig = OVClassBaseTestP;
@@ -612,21 +612,21 @@ TEST_P(OVClassSetExecutionModeHintConfigTest, SetConfigNoThrow) {
     ASSERT_EQ(ov::hint::ExecutionMode::PERFORMANCE, ie.get_property(target_device, ov::hint::execution_mode));
 }
 
-TEST_P(OVClassSetUseCpuPinningHintConfigTest, SetConfigNoThrow) {
+TEST_P(OVClassSetEnableCpuPinningHintConfigTest, SetConfigNoThrow) {
     ov::Core ie = createCoreWithTemplate();
 
-    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::use_cpu_pinning);
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::enable_cpu_pinning);
 
     bool defaultMode{};
-    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::use_cpu_pinning));
+    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::enable_cpu_pinning));
     (void)defaultMode;
 
-    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_cpu_pinning));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::enable_cpu_pinning));
 
-    ie.set_property(target_device, ov::hint::use_cpu_pinning(false));
-    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::use_cpu_pinning));
-    ie.set_property(target_device, ov::hint::use_cpu_pinning(true));
-    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_cpu_pinning));
+    ie.set_property(target_device, ov::hint::enable_cpu_pinning(false));
+    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::enable_cpu_pinning));
+    ie.set_property(target_device, ov::hint::enable_cpu_pinning(true));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::enable_cpu_pinning));
 }
 
 TEST_P(OVClassSetSchedulingCoreTypeHintConfigTest, SetConfigNoThrow) {
@@ -648,21 +648,21 @@ TEST_P(OVClassSetSchedulingCoreTypeHintConfigTest, SetConfigNoThrow) {
     ASSERT_EQ(ov::hint::SchedulingCoreType::ANY_CORE, ie.get_property(target_device, ov::hint::scheduling_core_type));
 }
 
-TEST_P(OVClassSetUseHyperThreadingHintConfigTest, SetConfigNoThrow) {
+TEST_P(OVClassSetEnableHyperThreadingHintConfigTest, SetConfigNoThrow) {
     ov::Core ie = createCoreWithTemplate();
 
-    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::use_hyper_threading);
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::hint::enable_hyper_threading);
 
     bool defaultMode{};
-    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::use_hyper_threading));
+    ASSERT_NO_THROW(defaultMode = ie.get_property(target_device, ov::hint::enable_hyper_threading));
     (void)defaultMode;
 
-    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_hyper_threading));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::enable_hyper_threading));
 
-    ie.set_property(target_device, ov::hint::use_hyper_threading(false));
-    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::use_hyper_threading));
-    ie.set_property(target_device, ov::hint::use_hyper_threading(true));
-    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::use_hyper_threading));
+    ie.set_property(target_device, ov::hint::enable_hyper_threading(false));
+    ASSERT_EQ(false, ie.get_property(target_device, ov::hint::enable_hyper_threading));
+    ie.set_property(target_device, ov::hint::enable_hyper_threading(true));
+    ASSERT_EQ(true, ie.get_property(target_device, ov::hint::enable_hyper_threading));
 }
 
 TEST_P(OVClassSetDevicePriorityConfigTest, SetConfigAndCheckGetConfigNoThrow) {

From 1ee0c151ea0c8699efa9b24089f7657cbef180fc Mon Sep 17 00:00:00 2001
From: Tatiana Savina <tatiana.savina@intel.com>
Date: Tue, 4 Apr 2023 13:30:19 +0200
Subject: [PATCH 235/296] DOCS shift to rst - Conversion tutorials  (#16704)

---
 .../convert_model/Convert_Model_Tutorials.md  |   7 +-
 .../onnx_specific/Convert_Faster_RCNN.md      |  42 +-
 .../onnx_specific/Convert_GPT2.md             |  24 +-
 .../onnx_specific/Convert_Mask_RCNN.md        |  48 +-
 .../Convert_AttentionOCR_From_Tensorflow.md   |  58 ++-
 .../Convert_BERT_From_Tensorflow.md           | 194 +++++---
 .../Convert_CRNN_From_Tensorflow.md           |  95 ++--
 .../Convert_DeepSpeech_From_Tensorflow.md     | 106 +++--
 .../Convert_EfficientDet_Models.md            | 190 ++++----
 .../Convert_FaceNet_From_Tensorflow.md        |  41 +-
 .../Convert_GNMT_From_Tensorflow.md           | 446 ++++++++++--------
 .../Convert_NCF_From_Tensorflow.md            |  91 ++--
 .../Convert_Object_Detection_API_Models.md    | 151 +++---
 .../Convert_RetinaNet_From_Tensorflow.md      |  18 +-
 .../Convert_Slim_Library_Models.md            | 112 +++--
 .../Convert_WideAndDeep_Family_Models.md      | 234 ++++-----
 .../Convert_XLNet_From_Tensorflow.md          | 306 ++++++------
 .../Convert_lm_1b_From_Tensorflow.md          | 171 ++++---
 docs/_static/images/DeepSpeech-0.8.2.png      |   3 +
 docs/_static/images/FaceNet.svg               |   3 +
 docs/_static/images/NCF_start.svg             |   3 +
 docs/_static/images/lm_1b.svg                 |   3 +
 22 files changed, 1316 insertions(+), 1030 deletions(-)
 create mode 100644 docs/_static/images/DeepSpeech-0.8.2.png
 create mode 100644 docs/_static/images/FaceNet.svg
 create mode 100644 docs/_static/images/NCF_start.svg
 create mode 100644 docs/_static/images/lm_1b.svg

diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md
index a46d1cbcb76962..c647709d218975 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md
@@ -35,9 +35,10 @@
    openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet
    openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model
 
-@endsphinxdirective
 
 This section provides a set of tutorials that demonstrate conversion methods for specific TensorFlow, ONNX, PyTorch, MXNet, and Kaldi models, that unnecessarily cover your case.
-Before studying the tutorials, try to convert the model out-of-the-box by specifying only the `--input_model` parameter in the command line.
+Before studying the tutorials, try to convert the model out-of-the-box by specifying only the ``--input_model`` parameter in the command line.
+
+You will find a collection of :doc:`Python tutorials <tutorials>` written for running on Jupyter notebooks that provide an introduction to the OpenVINO™ toolkit and explain how to use the Python API and tools for optimized deep learning inference.
 
-You will find a collection of [Python tutorials](../../../tutorials.md) written for running on Jupyter notebooks that provide an introduction to the OpenVINO™ toolkit and explain how to use the Python API and tools for optimized deep learning inference.
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md
index e16c86c8ed2671..ccebf3b7178b4a 100644
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md
@@ -1,23 +1,29 @@
 # Converting an ONNX Faster R-CNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN}
 
-The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the [maskrcnn-benchmark model](https://github.com/facebookresearch/maskrcnn-benchmark):
+@sphinxdirective
 
-1. Download the pretrained model file from [onnx/models](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn):
-   * (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117).
+The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model <https://github.com/facebookresearch/maskrcnn-benchmark>`__:
+
+1. Download the pretrained model file from `onnx/models <https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn>`__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117).
 
 2. Generate the Intermediate Representation of the model, by changing your current working directory to the Model Optimizer installation directory, and running Model Optimizer with the following parameters:
-```sh
- mo \
---input_model FasterRCNN-10.onnx \
---input_shape [1,3,800,800] \
---input 0:2 \
---mean_values [102.9801,115.9465,122.7717] \
---transformations_config front/onnx/faster_rcnn.json
-```
-
-Be aware that the height and width specified with the `input_shape` command line parameter could be different. For more information about supported input image dimensions and required pre- and post-processing steps, refer to the [Faster R-CNN article](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn).
-
-3. Interpret the outputs of the generated IR: class indices, probabilities and box coordinates. Below are the outputs from the "DetectionOutput" layer:
-   * class indices.
-   * probabilities.
-   * box coordinates.
+
+.. code-block:: sh
+
+   mo \
+   --input_model FasterRCNN-10.onnx \
+   --input_shape [1,3,800,800] \
+   --input 0:2 \
+   --mean_values [102.9801,115.9465,122.7717] \
+   --transformations_config front/onnx/faster_rcnn.json
+
+
+Be aware that the height and width specified with the ``input_shape`` command line parameter could be different. For more information about supported input image dimensions and required pre- and post-processing steps, refer to the `Faster R-CNN article <https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn>`__.
+
+3. Interpret the outputs of the generated IR: class indices, probabilities and box coordinates. Below are the outputs from the ``DetectionOutput`` layer:
+
+   * class indices
+   * probabilities
+   * box coordinates
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
index cdd2b41b7de961..c694ad431a47a8 100644
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
@@ -1,17 +1,25 @@
 # Converting an ONNX GPT-2 Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2}
 
-[Public pretrained GPT-2 model](https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2) is a large
+@sphinxdirective
+
+`Public pre-trained GPT-2 model <https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2>`__ is a large
 transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text.
 
-## Downloading the Pre-Trained Base GPT-2 Model
+Downloading the Pre-Trained Base GPT-2 Model
+############################################
 
-To download the model, go to [this model](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.onnx), and press **Download**.
+To download the model, go to `this model <https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.onnx>`__, and press **Download**.
 
-To download the model and sample test data, go to [this model](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz), and press **Download**.
+To download the model and sample test data, go to `this model <https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz>`__, and press **Download**.
 
-## Converting an ONNX GPT-2 Model to IR
+Converting an ONNX GPT-2 Model to IR
+####################################
 
 Generate the Intermediate Representation of the model GPT-2 by running Model Optimizer with the following parameters:
-```sh
-mo --input_model gpt2-10.onnx --input_shape [X,Y,Z] --output_dir <OUTPUT_MODEL_DIR>
-```
+
+.. code-block:: sh
+
+    mo --input_model gpt2-10.onnx --input_shape [X,Y,Z] --output_dir <OUTPUT_MODEL_DIR>
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md
index fed92fabe20bf7..63f64bb39824d3 100644
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md
@@ -1,26 +1,32 @@
 # Converting an ONNX Mask R-CNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN}
 
-The instructions below are applicable **only** to the Mask R-CNN model converted to the ONNX file format from the [maskrcnn-benchmark model](https://github.com/facebookresearch/maskrcnn-benchmark).
+@sphinxdirective
 
-1. Download the pretrained model file from [onnx/models](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn):
-   * commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117.
+The instructions below are applicable **only** to the Mask R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model <https://github.com/facebookresearch/maskrcnn-benchmark>`__.
+
+1. Download the pretrained model file from `onnx/models <https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn>`__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117).
 
 2. Generate the Intermediate Representation of the model by changing your current working directory to the Model Optimizer installation directory and running Model Optimizer with the following parameters:
-```sh
- mo \
---input_model mask_rcnn_R_50_FPN_1x.onnx \
---input "0:2" \
---input_shape [1,3,800,800] \
---mean_values [102.9801,115.9465,122.7717] \
---transformations_config front/onnx/mask_rcnn.json
-```
-
-Be aware that the height and width specified with the `input_shape` command line parameter could be different. For more information about supported input image dimensions and required pre- and post-processing steps, refer to the [documentation](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn).
-
-3. Interpret the outputs of the generated IR file: masks, class indices, probabilities and box coordinates.
-   * masks.
-   * class indices.
-   * probabilities.
-   * box coordinates. 
-
-The first one is a layer with the name `6849/sink_port_0`, and rest are outputs from the `DetectionOutput` layer.
+
+.. code-block:: sh
+
+   mo \
+   --input_model mask_rcnn_R_50_FPN_1x.onnx \
+   --input "0:2" \
+   --input_shape [1,3,800,800] \
+   --mean_values [102.9801,115.9465,122.7717] \
+   --transformations_config front/onnx/mask_rcnn.json
+
+
+Be aware that the height and width specified with the ``input_shape`` command line parameter could be different. For more information about supported input image dimensions and required pre- and post-processing steps, refer to the `documentation <https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn>`__.
+
+3. Interpret the outputs of the generated IR file: masks, class indices, probabilities and box coordinates:
+
+   * masks
+   * class indices
+   * probabilities
+   * box coordinates 
+
+The first one is a layer with the name ``6849/sink_port_0``, and rest are outputs from the ``DetectionOutput`` layer.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md
index ced208fe3f3231..6465ddc2abb2ba 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md
@@ -1,35 +1,49 @@
 # Converting a TensorFlow Attention OCR Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_AttentionOCR_From_Tensorflow}
 
-This tutorial explains how to convert the Attention OCR (AOCR) model from the [TensorFlow Attention OCR repository](https://github.com/emedvedev/attention-ocr) to the Intermediate Representation (IR).
+@sphinxdirective
 
-## Extracting a Model from `aocr` Library
+This tutorial explains how to convert the Attention OCR (AOCR) model from the `TensorFlow Attention OCR repository <https://github.com/emedvedev/attention-ocr>`__ to the Intermediate Representation (IR).
+
+Extracting a Model from ``aocr`` Library
+########################################
+
+To get an AOCR model, download ``aocr`` Python library:
+
+.. code-block:: sh
+
+   pip install git+https://github.com/emedvedev/attention-ocr.git@master#egg=aocr
 
-To get an AOCR model, download `aocr` Python library:
-```
-pip install git+https://github.com/emedvedev/attention-ocr.git@master#egg=aocr
-```
 This library contains a pretrained model and allows training and running AOCR, using the command line. After installation of `aocr`, extract the model:
-```
-aocr export --format=frozengraph model/path/
-```
-Once extracted, the model can be found in `model/path/` folder.
 
-## Converting the TensorFlow AOCR Model to IR
+.. code-block:: sh
+
+   aocr export --format=frozengraph model/path/
+
+Once extracted, the model can be found in ``model/path/`` folder.
+
+Converting the TensorFlow AOCR Model to IR
+##########################################
 
 The original AOCR model includes the preprocessing data, which contains:
+
 * Decoding input data to binary format where input data is an image represented as a string.
 * Resizing binary image to working resolution.
 
-The resized image is sent to the convolution neural network (CNN). Because Model Optimizer does not support image decoding, the preprocessing part of the model should be cut off, using the `--input` command-line parameter.
-```sh
-mo \
---input_model=model/path/frozen_graph.pb \
---input="map/TensorArrayStack/TensorArrayGatherV3:0[1,32,86,1]" \
---output "transpose_1,transpose_2" \
---output_dir path/to/ir/
-```
+The resized image is sent to the convolution neural network (CNN). Because Model Optimizer does not support image decoding, the preprocessing part of the model should be cut off, using the ``--input`` command-line parameter.
+
+.. code-block:: sh
+
+   mo \
+   --input_model=model/path/frozen_graph.pb \
+   --input="map/TensorArrayStack/TensorArrayGatherV3:0[1,32,86,1]" \
+   --output "transpose_1,transpose_2" \
+   --output_dir path/to/ir/
+
 
 Where:
-* `map/TensorArrayStack/TensorArrayGatherV3:0[1 32 86 1]` - name of node producing tensor after preprocessing.
-* `transpose_1` - name of the node producing tensor with predicted characters.
-* `transpose_2` - name of the node producing tensor with predicted characters probabilities.
+
+* ``map/TensorArrayStack/TensorArrayGatherV3:0[1 32 86 1]`` - name of node producing tensor after preprocessing.
+* ``transpose_1`` - name of the node producing tensor with predicted characters.
+* ``transpose_2`` - name of the node producing tensor with predicted characters probabilities.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md
index b0ef95f1dd57c3..6d56a3c1d18047 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md
@@ -1,79 +1,109 @@
 # Converting a TensorFlow BERT Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow}
 
+@sphinxdirective
+
 Pretrained models for BERT (Bidirectional Encoder Representations from Transformers) are
-[publicly available](https://github.com/google-research/bert).
+`publicly available <https://github.com/google-research/bert>`__.
+
+.. _supported_models:
+
+Supported Models
+################
 
-## <a name="supported-models"></a>Supported Models
+The following models from the pretrained `BERT model list <https://github.com/google-research/bert#pre-trained-models>`__ are currently supported:
 
-The following models from the pretrained [BERT model list](https://github.com/google-research/bert#pre-trained-models) are currently supported:
+* ``BERT-Base, Cased``
+* ``BERT-Base, Uncased``
+* ``BERT-Base, Multilingual Cased``
+* ``BERT-Base, Multilingual Uncased``
+* ``BERT-Base, Chinese``
+* ``BERT-Large, Cased``
+* ``BERT-Large, Uncased``
 
-* `BERT-Base, Cased`
-* `BERT-Base, Uncased`
-* `BERT-Base, Multilingual Cased`
-* `BERT-Base, Multilingual Uncased`
-* `BERT-Base, Chinese`
-* `BERT-Large, Cased`
-* `BERT-Large, Uncased`
+Downloading the Pretrained BERT Model
+#####################################
 
-## Downloading the Pretrained BERT Model
+Download and unzip an archive with the `BERT-Base, Multilingual Uncased Model <https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip>`__.
 
-Download and unzip an archive with the [BERT-Base, Multilingual Uncased Model](https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip).
+After the archive is unzipped, the directory ``uncased_L-12_H-768_A-12`` is created and contains the following files:
 
-After the archive is unzipped, the directory `uncased_L-12_H-768_A-12` is created and contains the following files:
-* `bert_config.json`
-* `bert_model.ckpt.data-00000-of-00001`
-* `bert_model.ckpt.index`
-* `bert_model.ckpt.meta`
-* `vocab.txt`
+* ``bert_config.json``
+* ``bert_model.ckpt.data-00000-of-00001``
+* ``bert_model.ckpt.index``
+* ``bert_model.ckpt.meta``
+* ``vocab.txt``
 
-Pretrained model meta-graph files are `bert_model.ckpt.*`.
+Pretrained model meta-graph files are ``bert_model.ckpt.*``.
 
-## Converting a TensorFlow BERT Model to IR
+Converting a TensorFlow BERT Model to IR
+#########################################
 
 To generate the BERT Intermediate Representation (IR) of the model, run Model Optimizer with the following parameters:
-```sh
- mo \
---input_meta_graph uncased_L-12_H-768_A-12/bert_model.ckpt.meta \
---output bert/pooler/dense/Tanh                                 \
---input Placeholder{i32},Placeholder_1{i32},Placeholder_2{i32}
-```
+
+.. code-block:: sh
+
+    mo \
+   --input_meta_graph uncased_L-12_H-768_A-12/bert_model.ckpt.meta \
+   --output bert/pooler/dense/Tanh                                 \
+   --input Placeholder{i32},Placeholder_1{i32},Placeholder_2{i32}
+
 
 Pretrained models are not suitable for batch reshaping out-of-the-box because of multiple hardcoded shapes in the model.
 
-# Converting a Reshapable TensorFlow BERT Model to OpenVINO IR
+Converting a Reshapable TensorFlow BERT Model to OpenVINO IR
+=============================================================
 
 Follow these steps to make a pretrained TensorFlow BERT model reshapable over batch dimension:
-1. Download a pretrained BERT model you want to use from the <a href="#supported-models">Supported Models list</a>
+
+1. Download a pretrained BERT model you want to use from the `Supported Models list <#supported_models>`__.
+
 2. Clone google-research/bert git repository:
-```sh
-https://github.com/google-research/bert.git
-```
-3. Go to the root directory of the cloned repository:<br>
-```sh
-cd bert
-```
-4. (Optional) Checkout to the commit that the conversion was tested on:<br>
-```sh
-git checkout eedf5716c
-```
+
+.. code-block:: sh
+
+   https://github.com/google-research/bert.git
+
+3. Go to the root directory of the cloned repository:
+
+.. code-block:: sh
+
+   cd bert
+
+4. (Optional) Checkout to the commit that the conversion was tested on:
+
+.. code-block:: sh
+
+   git checkout eedf5716c
+
 5. Download script to load GLUE data:
-    * For UNIX-like systems, run the following command:
-```sh
-wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
-```
-    * For Windows systems:<br>
-        Download the [Python script](https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py) to the current working directory.
+
+   * For UNIX-like systems, run the following command:
+
+   .. code-block:: sh
+
+      wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
+
+   * For Windows systems:
+
+      Download the `Python script <https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py>`__ to the current working directory.
+
 6. Download GLUE data by running:
-```sh
-python3 download_glue_data.py --tasks MRPC
-```
-7. Open the file `modeling.py` in the text editor and delete lines 923-924. They should look like this:
-```python
+
+.. code-block:: sh
+
+   python3 download_glue_data.py --tasks MRPC
+
+7. Open the file ``modeling.py`` in the text editor and delete lines 923-924. They should look like this:
+
+.. code-block:: python
+
     if not non_static_indexes:
         return shape
-```
-8. Open the file `run_classifier.py` and insert the following code after the line 645:
-```python
+
+8. Open the file ``run_classifier.py`` and insert the following code after the line 645:
+
+.. code-block:: python
+
     import os, sys
     import tensorflow as tf
     from tensorflow.python.framework import graph_io
@@ -86,34 +116,42 @@ python3 download_glue_data.py --tasks MRPC
         graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
     print('BERT frozen model path {}'.format(os.path.join(os.path.dirname(__file__), 'inference_graph.pb')))
     sys.exit(0)
-```
+
 Lines before the inserted code should look like this:
-```python
+
+.. code-block:: python
+
     (total_loss, per_example_loss, logits, probabilities) = create_model(
         bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
         num_labels, use_one_hot_embeddings)
-```
-9. Set environment variables `BERT_BASE_DIR`, `BERT_REPO_DIR` and run the script `run_classifier.py` to create `inference_graph.pb` file in the root of the cloned BERT repository.
-```sh
-export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12
-export BERT_REPO_DIR=/current/working/directory
-
-python3 run_classifier.py \
-    --task_name=MRPC \
-    --do_eval=true \
-    --data_dir=$BERT_REPO_DIR/glue_data/MRPC \
-    --vocab_file=$BERT_BASE_DIR/vocab.txt \
-    --bert_config_file=$BERT_BASE_DIR/bert_config.json \
-    --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
-    --output_dir=./
-```
+
+
+9. Set environment variables ``BERT_BASE_DIR``, ``BERT_REPO_DIR`` and run the script ``run_classifier.py`` to create ``inference_graph.pb`` file in the root of the cloned BERT repository.
+
+.. code-block:: sh
+
+   export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12
+   export BERT_REPO_DIR=/current/working/directory
+
+   python3 run_classifier.py \
+       --task_name=MRPC \
+       --do_eval=true \
+       --data_dir=$BERT_REPO_DIR/glue_data/MRPC \
+       --vocab_file=$BERT_BASE_DIR/vocab.txt \
+       --bert_config_file=$BERT_BASE_DIR/bert_config.json \
+       --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
+       --output_dir=./
 
 Run Model Optimizer with the following command line parameters to generate reshape-able BERT Intermediate Representation (IR):
-```sh
- mo \
---input_model inference_graph.pb \
---input "IteratorGetNext:0{i32}[1,128],IteratorGetNext:1{i32}[1,128],IteratorGetNext:4{i32}[1,128]"
-```
-For other applicable parameters, refer to the [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md) guide.
-
-For more information about reshape abilities, refer to the [Using Shape Inference](../../../../OV_Runtime_UG/ShapeInference.md) guide.
+
+.. code-block:: sh
+
+    mo \
+   --input_model inference_graph.pb \
+   --input "IteratorGetNext:0{i32}[1,128],IteratorGetNext:1{i32}[1,128],IteratorGetNext:4{i32}[1,128]"
+
+For other applicable parameters, refer to the :doc:`Convert Model from TensorFlow <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
+
+For more information about reshape abilities, refer to the :doc:`Using Shape Inference <openvino_docs_OV_UG_ShapeInference>` guide.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
index 68a399c67922ed..4eee022ef71c74 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
@@ -1,52 +1,77 @@
 # Converting a TensorFlow CRNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow}
 
+@sphinxdirective
+
 This tutorial explains how to convert a CRNN model to OpenVINO™ Intermediate Representation (IR).
 
 There are several public versions of TensorFlow CRNN model implementation available on GitHub. This tutorial explains how to convert the model from
-the [CRNN Tensorflow](https://github.com/MaybeShewill-CV/CRNN_Tensorflow) repository to IR, and is validated with Python 3.7, TensorFlow 1.15.0, and protobuf 3.19.0.
+the `CRNN Tensorflow <https://github.com/MaybeShewill-CV/CRNN_Tensorflow>`__ repository to IR, and is validated with Python 3.7, TensorFlow 1.15.0, and protobuf 3.19.0.
 If you have another implementation of CRNN model, it can be converted to OpenVINO IR in a similar way. You need to get inference graph and run Model Optimizer on it.
 
 **To convert the model to IR:**
 
 **Step 1.** Clone this GitHub repository and check out the commit:
-    1. Clone the repository:
-```sh
-git clone https://github.com/MaybeShewill-CV/CRNN_Tensorflow.git
-```
-    2. Go to the `CRNN_Tensorflow` directory of the cloned repository:
-```sh
-cd path/to/CRNN_Tensorflow
-```
-    3. Check out the necessary commit:
-```sh
-git checkout 64f1f1867bffaacfeacc7a80eebf5834a5726122
-```
+
+1. Clone the repository:
+
+.. code-block:: sh
+
+   git clone https://github.com/MaybeShewill-CV/CRNN_Tensorflow.git
+
+2. Go to the ``CRNN_Tensorflow`` directory of the cloned repository:
+
+.. code-block:: sh
+   
+   cd path/to/CRNN_Tensorflow
+
+3. Check out the necessary commit:
+
+.. code-block:: sh
+   
+   git checkout 64f1f1867bffaacfeacc7a80eebf5834a5726122
+
 
 **Step 2.** Train the model using the framework or the pretrained checkpoint provided in this repository.
 
+
 **Step 3.** Create an inference graph:
-    1. Add the `CRNN_Tensorflow` folder to `PYTHONPATH`.
-       * For Linux:
-```sh
-export PYTHONPATH="${PYTHONPATH}:/path/to/CRNN_Tensorflow/"
-```
-       * For  Windows, add `/path/to/CRNN_Tensorflow/` to the `PYTHONPATH` environment variable in settings.
-    2. Edit the `tools/demo_shadownet.py` script. After `saver.restore(sess=sess, save_path=weights_path)` line, add the following code:
-```python
-from tensorflow.python.framework import graph_io
-frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['shadow/LSTMLayers/transpose_time_major'])
-graph_io.write_graph(frozen, '.', 'frozen_graph.pb', as_text=False)
-```
-    3. Run the demo with the following command:
-```sh
-python tools/demo_shadownet.py --image_path data/test_images/test_01.jpg --weights_path model/shadownet/shadownet_2017-10-17-11-47-46.ckpt-199999
-```
-   If you want to use your checkpoint, replace the path in the `--weights_path` parameter with a path to your checkpoint.
-    4. In the `CRNN_Tensorflow` directory, you will find the inference CRNN graph `frozen_graph.pb`. You can use this graph with OpenVINO 
-     to convert the model to IR and then run inference.
+   
+1. Add the ``CRNN_Tensorflow`` folder to ``PYTHONPATH``.
+
+   * For Linux:
+
+   .. code-block:: sh
+
+      export PYTHONPATH="${PYTHONPATH}:/path/to/CRNN_Tensorflow/"
+
+
+   * For  Windows, add ``/path/to/CRNN_Tensorflow/`` to the ``PYTHONPATH`` environment variable in settings.
+
+
+
+2. Edit the ``tools/demo_shadownet.py`` script. After ``saver.restore(sess=sess, save_path=weights_path)`` line, add the following code:
+
+.. code-block:: python
+
+   from tensorflow.python.framework import graph_io
+   frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['shadow/LSTMLayers/transpose_time_major'])
+   graph_io.write_graph(frozen, '.', 'frozen_graph.pb', as_text=False)
+
+3. Run the demo with the following command:
+
+.. code-block:: sh
+
+   python tools/demo_shadownet.py --image_path data/test_images/test_01.jpg --weights_path model/shadownet/shadownet_2017-10-17-11-47-46.ckpt-199999
+
+
+If you want to use your checkpoint, replace the path in the ``--weights_path`` parameter with a path to your checkpoint.
+   
+4. In the ``CRNN_Tensorflow`` directory, you will find the inference CRNN graph ``frozen_graph.pb``. You can use this graph with OpenVINO to convert the model to IR and then run inference.
 
 **Step 4.** Convert the model to IR:
-```sh
-mo --input_model path/to/your/CRNN_Tensorflow/frozen_graph.pb
-```
 
+.. code-block:: sh
+
+   mo --input_model path/to/your/CRNN_Tensorflow/frozen_graph.pb
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
index 24bc0b35d34ff9..f024ec5465ec38 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
@@ -1,81 +1,99 @@
 # Converting a TensorFlow DeepSpeech Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow}
 
-[DeepSpeech project](https://github.com/mozilla/DeepSpeech) provides an engine to train speech-to-text models.
+@sphinxdirective
 
-## Downloading the Pretrained DeepSpeech Model
+`DeepSpeech project <https://github.com/mozilla/DeepSpeech>`__ provides an engine to train speech-to-text models.
+
+Downloading the Pretrained DeepSpeech Model
+###########################################
 
 Create a directory where model and metagraph with pretrained weights will be stored:
-```
-mkdir deepspeech
-cd deepspeech
-```
-[Pretrained English speech-to-text model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.2) is publicly available.
+
+.. code-block:: sh
+
+   mkdir deepspeech
+   cd deepspeech
+
+`Pre-trained English speech-to-text model <https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.2>`__ is publicly available.
 To download the model, follow the instruction below:
 
 * For UNIX-like systems, run the following command:
-```
-wget -O - https://github.com/mozilla/DeepSpeech/archive/v0.8.2.tar.gz | tar xvfz -
-wget -O - https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-checkpoint.tar.gz | tar xvfz -
-```
+
+.. code-block:: sh
+
+   wget -O - https://github.com/mozilla/DeepSpeech/archive/v0.8.2.tar.gz | tar xvfz -
+   wget -O - https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-checkpoint.tar.gz | tar xvfz -
+
 * For Windows systems:
-  1. Download [the archive with the model](https://github.com/mozilla/DeepSpeech/archive/v0.8.2.tar.gz).
-  2. Download the [TensorFlow MetaGraph with pretrained weights](https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-checkpoint.tar.gz).
+
+  1. Download `the archive with the model <https://github.com/mozilla/DeepSpeech/archive/v0.8.2.tar.gz>`__.
+  2. Download the `TensorFlow MetaGraph with pre-trained weights <https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-checkpoint.tar.gz>`__.
   3. Unpack it with a file archiver application.
 
-## Freezing the Model into a *.pb File
+Freezing the Model into a *.pb File
+###################################
 
 After unpacking the archives above, you have to freeze the model. This requires
 TensorFlow version 1, which is not available under Python 3.8, so you need Python 3.7 or lower.
 Before freezing, deploy a virtual environment and install the required packages:
-```
-virtualenv --python=python3.7 venv-deep-speech
-source venv-deep-speech/bin/activate
-cd DeepSpeech-0.8.2
-pip3 install -e .
-```
+
+.. code-block:: sh
+
+   virtualenv --python=python3.7 venv-deep-speech
+   source venv-deep-speech/bin/activate
+   cd DeepSpeech-0.8.2
+   pip3 install -e .
+
 Freeze the model with the following command:
-```
-python3 DeepSpeech.py --checkpoint_dir ../deepspeech-0.8.2-checkpoint --export_dir ../
-```
-After that, you will get the pretrained frozen model file `output_graph.pb` in the directory `deepspeech` created at
+
+.. code-block:: sh
+
+   python3 DeepSpeech.py --checkpoint_dir ../deepspeech-0.8.2-checkpoint --export_dir ../
+
+After that, you will get the pretrained frozen model file ``output_graph.pb`` in the directory ``deepspeech`` created at
 the beginning. The model contains the preprocessing and main parts. The first preprocessing part performs conversion of input
 spectrogram into a form useful for speech recognition (mel). This part of the model is not convertible into
-the IR because it contains unsupported operations `AudioSpectrogram` and `Mfcc`.
+the IR because it contains unsupported operations ``AudioSpectrogram`` and ``Mfcc``.
 
 The main and most computationally expensive part of the model converts the preprocessed audio into text.
 There are two specificities with the supported part of the model.
 
 The first is that the model contains an input with sequence length. So the model can be converted with
 a fixed input length shape, thus the model is not reshapable.
-Refer to the [Using Shape Inference](../../../../OV_Runtime_UG/ShapeInference.md) guide.
+Refer to the :doc:`Using Shape Inference <openvino_docs_OV_UG_ShapeInference>` guide.
 
-The second is that the frozen model still has two variables: `previous_state_c` and `previous_state_h`, figure
+The second is that the frozen model still has two variables: ``previous_state_c`` and ``previous_state_h``, figure
 with the frozen *.pb model is below. It means that the model keeps training these variables at each inference.
 
-![DeepSpeech model view](../../../img/DeepSpeech-0.8.2.png)
+.. image:: ./_static/images/DeepSpeech-0.8.2.png
 
-At the first inference, the variables are initialized with zero tensors. After execution, the results of the `BlockLSTM'
+At the first inference, the variables are initialized with zero tensors. After execution, the results of the ``BlockLSTM``
 are assigned to cell state and hidden state, which are these two variables.
 
-## Converting the Main Part of DeepSpeech Model into OpenVINO IR
+Converting the Main Part of DeepSpeech Model into OpenVINO IR
+#############################################################
 
-Model Optimizer assumes that the output model is for inference only. That is why you should cut `previous_state_c`
-and `previous_state_h` variables off and resolve keeping cell and hidden states on the application level.
+Model Optimizer assumes that the output model is for inference only. That is why you should cut ``previous_state_c`` and ``previous_state_h`` variables off and resolve keeping cell and hidden states on the application level.
 
 There are certain limitations for the model conversion:
-- Time length (`time_len`) and sequence length (`seq_len`) are equal.
-- Original model cannot be reshaped, so you should keep original shapes.
+
+* Time length (``time_len``) and sequence length (``seq_len``) are equal.
+* Original model cannot be reshaped, so you should keep original shapes.
 
 To generate the IR, run Model Optimizer with the following parameters:
-```sh
-mo                             \
---input_model output_graph.pb  \
---input "input_lengths->[16],input_node[1,16,19,26],previous_state_h[1,2048],previous_state_c[1,2048]"   \
---output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits"
-```
+
+.. code-block:: sh
+
+  mo                             \
+  --input_model output_graph.pb  \
+  --input "input_lengths->[16],input_node[1,16,19,26],previous_state_h[1,2048],previous_state_c[1,2048]"   \
+  --output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits"
+
 
 Where:
-* `input_lengths->[16]` Replaces the input node with name "input_lengths" with a constant tensor of shape [1] with a
-  single integer value of 16. This means that the model now can consume input sequences of length 16 only.
-* `input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]` replaces the variables with a placeholder.
-* `--output ".../GatherNd_1,.../GatherNd,logits" ` output node names.
+
+* ``input_lengths->[16]`` Replaces the input node with name "input_lengths" with a constant tensor of shape [1] with a single integer value of 16. This means that the model now can consume input sequences of length 16 only.
+* ``input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]`` replaces the variables with a placeholder.
+* ``--output ".../GatherNd_1,.../GatherNd,logits"`` output node names.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
index 7c28307fa4e4f9..be7df6181e375e 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
@@ -1,93 +1,121 @@
 # Converting TensorFlow EfficientDet Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
 
+@sphinxdirective
+
 This tutorial explains how to convert EfficientDet public object detection models to the Intermediate Representation (IR).
 
-## <a name="efficientdet-to-ir"></a>Converting EfficientDet Model to the IR
+.. _efficientdet-to-ir:
+
+Converting EfficientDet Model to the IR
+#######################################
 
 There are several public versions of EfficientDet model implementation available on GitHub. This tutorial explains how to
-convert models from the [repository](https://github.com/google/automl/tree/master/efficientdet)
- (commit 96e1fee) to the OpenVINO format.
+convert models from the `repository <https://github.com/google/automl/tree/master/efficientdet>`__  (commit 96e1fee) to the OpenVINO format.
 
-### Getting a Frozen TensorFlow Model
+Getting a Frozen TensorFlow Model
++++++++++++++++++++++++++++++++++
 
 Follow the instructions below to get frozen TensorFlow EfficientDet model. EfficientDet-D4 model is an example:
 
-1. Clone the repository:<br>
-```sh
-git clone https://github.com/google/automl
-cd automl/efficientdet
-```
-2. (Optional) Checkout to the commit that the conversion was tested on:<br>
-```sh
-git checkout 96e1fee
-```
-3. Install required dependencies:<br>
-```sh
-python3 -m pip install --upgrade pip
-python3 -m pip install -r requirements.txt
-python3 -m pip install --upgrade tensorflow-model-optimization
-```
-4. Download and extract the model checkpoint [efficientdet-d4.tar.gz](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz)
-referenced in the **"Pretrained EfficientDet Checkpoints"** section of the model repository:<br>
-```sh
-wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
-tar zxvf efficientdet-d4.tar.gz
-```
-5. Freeze the model:<br>
-```sh
- mo --runmode=saved_model --model_name=efficientdet-d4  --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
-```
-As a result, the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will be generated.
-
-> **NOTE**: For custom trained models, specify `--hparams` flag to `config.yaml` which was used during training.
-
-> **NOTE**: If you see an error *AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'*, apply the fix from the [patch](https://github.com/google/automl/pull/846).
-
-### Converting an EfficientDet TensorFlow Model to the IR
-
-To generate the IR of the EfficientDet TensorFlow model, run:<br>
-```sh
-mo \
---input_model savedmodeldir/efficientdet-d4_frozen.pb \
---transformations_config front/tf/automl_efficientdet.json \
---input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
---reverse_input_channels
-```
-
-Where `$IMAGE_SIZE` is the size that the input image of the original TensorFlow model will be resized to. Different
-EfficientDet models were trained with different input image sizes. To determine the right one, refer to the `efficientdet_model_param_dict`
-dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304) file.
-The attribute `image_size` specifies the shape to be defined for the model conversion.
-
-The `transformations_config` command line parameter specifies the configuration json file containing hints
-for the Model Optimizer on how to convert the model and trigger transformations implemented in the
-`<PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you
-train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4.
-The attribute names are self-explanatory or match the name in the `hparams_config` file.
-
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the [Converting a Model to Intermediate Representation (IR)](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
+1. Clone the repository:
+
+.. code-block:: sh
+
+   git clone https://github.com/google/automl
+   cd automl/efficientdet
+
+2. (Optional) Checkout to the commit that the conversion was tested on:
+
+.. code-block:: sh
+
+   git checkout 96e1fee
+
+3. Install required dependencies:
+
+.. code-block:: sh
+
+   python3 -m pip install --upgrade pip
+   python3 -m pip install -r requirements.txt
+   python3 -m pip install --upgrade tensorflow-model-optimization
+
+4. Download and extract the model checkpoint `efficientdet-d4.tar.gz <https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz>`__
+referenced in the **"Pretrained EfficientDet Checkpoints"** section of the model repository:
+
+.. code-block:: sh
+
+   wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
+   tar zxvf efficientdet-d4.tar.gz
+
+5. Freeze the model:
+
+.. code-block:: sh
+
+    mo --runmode=saved_model --model_name=efficientdet-d4  --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
+
+As a result, the frozen model file ``savedmodeldir/efficientdet-d4_frozen.pb`` will be generated.
+
+.. note::
+
+    For custom trained models, specify ``--hparams`` flag to ``config.yaml`` which was used during training.
+
+.. note::
+
+    If you see an error *AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'*, apply the fix from the `patch <https://github.com/google/automl/pull/846>`__.
+
+Converting an EfficientDet TensorFlow Model to the IR
++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+To generate the IR of the EfficientDet TensorFlow model, run:
+
+.. code-block:: sh
+
+   mo \
+   --input_model savedmodeldir/efficientdet-d4_frozen.pb \
+   --transformations_config front/tf/automl_efficientdet.json \
+   --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
+   --reverse_input_channels
+
+
+Where ``$IMAGE_SIZE`` is the size that the input image of the original TensorFlow model will be resized to. Different
+EfficientDet models were trained with different input image sizes. To determine the right one, refer to the ``efficientdet_model_param_dict``
+dictionary in the `hparams_config.py <https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304>`__ file.
+The attribute ``image_size`` specifies the shape to be defined for the model conversion.
+
+The ``transformations_config`` command line parameter specifies the configuration json file containing hints for the Model Optimizer on how to convert the model and trigger transformations implemented in the ``<PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/AutomlEfficientDet.py``. The json file contains some parameters which must be changed if you
+train the model yourself and modified the ``hparams_config`` file or the parameters are different from the ones used for EfficientDet-D4.
+The attribute names are self-explanatory or match the name in the ``hparams_config`` file.
+
+.. note::
+
+    The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
 OpenVINO toolkit provides samples that can be used to infer EfficientDet model. 
-For more information, refer to the [Open Model Zoo Demos](@ref omz_demos).
-
-## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR
-
-The TensorFlow model produces as output a list of 7-element tuples: `[image_id, y_min, x_min, y_max, x_max, confidence, class_id]`, where:
-* `image_id` -- image batch index.
-* `y_min` -- absolute `y` coordinate of the lower left corner of the detected object.
-* `x_min` -- absolute `x` coordinate of the lower left corner of the detected object.
-* `y_max` -- absolute `y` coordinate of the upper right corner of the detected object.
-* `x_max` -- absolute `x` coordinate of the upper right corner of the detected object.
-* `confidence` -- is the confidence of the detected object.
-* `class_id` -- is the id of the detected object class counted from 1.
-
-The output of the IR is a list of 7-element tuples: `[image_id, class_id, confidence, x_min, y_min, x_max, y_max]`, where:
-* `image_id` -- image batch index.
-* `class_id` -- is the id of the detected object class counted from 0.
-* `confidence` -- is the confidence of the detected object.
-* `x_min` -- normalized `x` coordinate of the lower left corner of the detected object.
-* `y_min` -- normalized `y` coordinate of the lower left corner of the detected object.
-* `x_max` -- normalized `x` coordinate of the upper right corner of the detected object.
-* `y_max` -- normalized `y` coordinate of the upper right corner of the detected object.
-
-The first element with `image_id = -1` means end of data.
+For more information, refer to the :doc:`Open Model Zoo Demos <omz_demos>`.
+
+Interpreting Results of the TensorFlow Model and the IR
+#######################################################
+
+The TensorFlow model produces as output a list of 7-element tuples: ``[image_id, y_min, x_min, y_max, x_max, confidence, class_id]``, where:
+
+* ``image_id`` -- image batch index.
+* ``y_min`` -- absolute ``y`` coordinate of the lower left corner of the detected object.
+* ``x_min`` -- absolute ``x`` coordinate of the lower left corner of the detected object.
+* ``y_max`` -- absolute ``y`` coordinate of the upper right corner of the detected object.
+* ``x_max`` -- absolute ``x`` coordinate of the upper right corner of the detected object.
+* ``confidence`` -- the confidence of the detected object.
+* ``class_id`` -- the id of the detected object class counted from 1.
+
+The output of the IR is a list of 7-element tuples: ``[image_id, class_id, confidence, x_min, y_min, x_max, y_max]``, where:
+
+* ``image_id`` -- image batch index.
+* ``class_id`` -- the id of the detected object class counted from 0.
+* ``confidence`` -- the confidence of the detected object.
+* ``x_min`` -- normalized ``x`` coordinate of the lower left corner of the detected object.
+* ``y_min`` -- normalized ``y`` coordinate of the lower left corner of the detected object.
+* ``x_max`` -- normalized ``x`` coordinate of the upper right corner of the detected object.
+* ``y_max`` -- normalized ``y`` coordinate of the upper right corner of the detected object.
+
+The first element with ``image_id = -1`` means end of data.
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md
index 5e987b1bcd1a79..aa7b3056c8bb1e 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md
@@ -1,28 +1,33 @@
 # Converting TensorFlow FaceNet Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow}
 
-[Public pretrained FaceNet models](https://github.com/davidsandberg/facenet#pre-trained-models) contain both training
+@sphinxdirective
+
+`Public pre-trained FaceNet models <https://github.com/davidsandberg/facenet#pre-trained-models>`__ contain both training
 and inference part of graph. Switch between this two states is manageable with placeholder value.
 Intermediate Representation (IR) models are intended for inference, which means that train part is redundant.
 
-There are two inputs in this network: boolean `phase_train` which manages state of the graph (train/infer) and
-`batch_size` which is a part of batch joining pattern.
-
+There are two inputs in this network: boolean ``phase_train`` which manages state of the graph (train/infer) and
+``batch_size`` which is a part of batch joining pattern.
 
-![FaceNet model view](../../../img/FaceNet.svg)
+.. image:: ./_static/images/FaceNet.svg
 
-## Converting a TensorFlow FaceNet Model to the IR
+Converting a TensorFlow FaceNet Model to the IR
+###############################################
 
 To generate a FaceNet OpenVINO model, feed a TensorFlow FaceNet model to Model Optimizer with the following parameters:
-```sh
- mo
---input_model path_to_model/model_name.pb       \
---freeze_placeholder_with_value "phase_train->False"
-```
-
-The batch joining pattern transforms to a placeholder with the model default shape if `--input_shape` or `--batch`*/*`-b` are not
-provided. Otherwise, the placeholder shape has custom parameters.
-
-* `--freeze_placeholder_with_value "phase_train->False"` to switch graph to inference mode
-* `--batch`*/*`-b` is applicable to override original network batch
-* `--input_shape` is applicable with or without `--input`
+
+.. code-block:: sh
+
+    mo
+   --input_model path_to_model/model_name.pb       \
+   --freeze_placeholder_with_value "phase_train->False"
+
+
+The batch joining pattern transforms to a placeholder with the model default shape if ``--input_shape`` or ``--batch`*/*`-b`` are not provided. Otherwise, the placeholder shape has custom parameters.
+
+* ``--freeze_placeholder_with_value "phase_train->False"`` to switch graph to inference mode
+* ``--batch`*/*`-b`` is applicable to override original network batch
+* ``--input_shape`` is applicable with or without ``--input``
 * other options are applicable
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
index 6f984f53b1dbc5..0cb950cf94a97d 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
@@ -1,276 +1,304 @@
 # Converting a TensorFlow GNMT Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow}
 
+@sphinxdirective
+
 This tutorial explains how to convert Google Neural Machine Translation (GNMT) model to the Intermediate Representation (IR).
 
-There are several public versions of TensorFlow GNMT model implementation available on GitHub. This tutorial explains how to convert the GNMT model from the [TensorFlow Neural Machine Translation (NMT) repository](https://github.com/tensorflow/nmt) to the IR.
+There are several public versions of TensorFlow GNMT model implementation available on GitHub. This tutorial explains how to convert the GNMT model from the `TensorFlow Neural Machine Translation (NMT) repository <https://github.com/tensorflow/nmt>`__ to the IR.
 
-## Creating a Patch File <a name="patch-file-gnmt"></a>
+Creating a Patch File 
+#####################
 
 Before converting the model, you need to create a patch file for the repository. The patch modifies the framework code by adding a special command-line argument to the framework options that enables inference graph dumping:
 
-1. Go to a writable directory and create a `GNMT_inference.patch` file.
+1. Go to a writable directory and create a ``GNMT_inference.patch`` file.
 2. Copy the following diff code to the file:
-```git
-diff --git a/nmt/inference.py b/nmt/inference.py
-index 2cbef07..e185490 100644
---- a/nmt/inference.py
-+++ b/nmt/inference.py
-@@ -17,9 +17,11 @@
- from __future__ import print_function
-
- import codecs
-+import os
- import time
-
- import tensorflow as tf
-+from tensorflow.python.framework import graph_io
-
- from . import attention_model
- from . import gnmt_model
-@@ -105,6 +107,29 @@ def start_sess_and_load_model(infer_model, ckpt_path):
-   return sess, loaded_infer_model
-
-
-+def inference_dump_graph(ckpt_path, path_to_dump, hparams, scope=None):
-+    model_creator = get_model_creator(hparams)
-+    infer_model = model_helper.create_infer_model(model_creator, hparams, scope)
-+    sess = tf.Session(
-+        graph=infer_model.graph, config=utils.get_config_proto())
-+    with infer_model.graph.as_default():
-+        loaded_infer_model = model_helper.load_model(
-+            infer_model.model, ckpt_path, sess, "infer")
-+    utils.print_out("Dumping inference graph to {}".format(path_to_dump))
-+    loaded_infer_model.saver.save(
-+        sess,
-+        os.path.join(path_to_dump + 'inference_GNMT_graph')
-+        )
-+    utils.print_out("Dumping done!")
-+
-+    output_node_name = 'index_to_string_Lookup'
-+    utils.print_out("Freezing GNMT graph with output node {}...".format(output_node_name))
-+    frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
-+                                                          [output_node_name])
-+    graph_io.write_graph(frozen, '.', os.path.join(path_to_dump, 'frozen_GNMT_inference_graph.pb'), as_text=False)
-+    utils.print_out("Freezing done. Freezed model frozen_GNMT_inference_graph.pb saved to {}".format(path_to_dump))
-+
-+
- def inference(ckpt_path,
-               inference_input_file,
-               inference_output_file,
-diff --git a/nmt/nmt.py b/nmt/nmt.py
-index f5823d8..a733748 100644
---- a/nmt/nmt.py
-+++ b/nmt/nmt.py
-@@ -310,6 +310,13 @@ def add_arguments(parser):
-   parser.add_argument("--num_intra_threads", type=int, default=0,
-                       help="number of intra_op_parallelism_threads")
-
-+  # Special argument for inference model dumping without inference
-+  parser.add_argument("--dump_inference_model", type="bool", nargs="?",
-+                      const=True, default=False,
-+                      help="Argument for dump inference graph for specified trained ckpt")
-+
-+  parser.add_argument("--path_to_dump", type=str, default="",
-+                      help="Path to dump inference graph.")
-
- def create_hparams(flags):
-   """Create training hparams."""
-@@ -396,6 +403,9 @@ def create_hparams(flags):
-       language_model=flags.language_model,
-       num_intra_threads=flags.num_intra_threads,
-       num_inter_threads=flags.num_inter_threads,
-+
-+      dump_inference_model=flags.dump_inference_model,
-+      path_to_dump=flags.path_to_dump,
-   )
-
-
-@@ -613,7 +623,7 @@ def create_or_load_hparams(
-   return hparams
-
-
--def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
-+def run_main(flags, default_hparams, train_fn, inference_fn, inference_dump, target_session=""):
-   """Run main."""
-   # Job
-   jobid = flags.jobid
-@@ -653,8 +663,26 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
-         out_dir, default_hparams, flags.hparams_path,
-         save_hparams=(jobid == 0))
-
--  ## Train / Decode
--  if flags.inference_input_file:
-+  #  Dumping inference model
-+  if flags.dump_inference_model:
-+      # Inference indices
-+      hparams.inference_indices = None
-+      if flags.inference_list:
-+          (hparams.inference_indices) = (
-+              [int(token) for token in flags.inference_list.split(",")])
-+
-+      # Ckpt
-+      ckpt = flags.ckpt
-+      if not ckpt:
-+          ckpt = tf.train.latest_checkpoint(out_dir)
-+
-+      # Path to dump graph
-+      assert flags.path_to_dump != "", "Please, specify path_to_dump model."
-+      path_to_dump = flags.path_to_dump
-+      if not tf.gfile.Exists(path_to_dump): tf.gfile.MakeDirs(path_to_dump)
-+
-+      inference_dump(ckpt, path_to_dump, hparams)
-+  elif flags.inference_input_file:
-     # Inference output directory
-     trans_file = flags.inference_output_file
-     assert trans_file
-@@ -693,7 +721,8 @@ def main(unused_argv):
-   default_hparams = create_hparams(FLAGS)
-   train_fn = train.train
-   inference_fn = inference.inference
--  run_main(FLAGS, default_hparams, train_fn, inference_fn)
-+  inference_dump = inference.inference_dump_graph
-+  run_main(FLAGS, default_hparams, train_fn, inference_fn, inference_dump)
-
-
- if __name__ == "__main__":
-
-```
+
+.. code-block:: cpp
+
+   diff --git a/nmt/inference.py b/nmt/inference.py
+   index 2cbef07..e185490 100644
+   --- a/nmt/inference.py
+   +++ b/nmt/inference.py
+   @@ -17,9 +17,11 @@
+   from __future__ import print_function
+
+   import codecs
+   +import os
+   import time
+
+   import tensorflow as tf
+   +from tensorflow.python.framework import graph_io
+
+   from . import attention_model
+   from . import gnmt_model
+   @@ -105,6 +107,29 @@ def start_sess_and_load_model(infer_model, ckpt_path):
+      return sess, loaded_infer_model
+
+
+   +def inference_dump_graph(ckpt_path, path_to_dump, hparams, scope=None):
+   +    model_creator = get_model_creator(hparams)
+   +    infer_model = model_helper.create_infer_model(model_creator, hparams, scope)
+   +    sess = tf.Session(
+   +        graph=infer_model.graph, config=utils.get_config_proto())
+   +    with infer_model.graph.as_default():
+   +        loaded_infer_model = model_helper.load_model(
+   +            infer_model.model, ckpt_path, sess, "infer")
+   +    utils.print_out("Dumping inference graph to {}".format(path_to_dump))
+   +    loaded_infer_model.saver.save(
+   +        sess,
+   +        os.path.join(path_to_dump + 'inference_GNMT_graph')
+   +        )
+   +    utils.print_out("Dumping done!")
+   +
+   +    output_node_name = 'index_to_string_Lookup'
+   +    utils.print_out("Freezing GNMT graph with output node {}...".format(output_node_name))
+   +    frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
+   +                                                          [output_node_name])
+   +    graph_io.write_graph(frozen, '.', os.path.join(path_to_dump, 'frozen_GNMT_inference_graph.pb'), as_text=False)
+   +    utils.print_out("Freezing done. Freezed model frozen_GNMT_inference_graph.pb saved to {}".format(path_to_dump))
+   +
+   +
+   def inference(ckpt_path,
+                  inference_input_file,
+                  inference_output_file,
+   diff --git a/nmt/nmt.py b/nmt/nmt.py
+   index f5823d8..a733748 100644
+   --- a/nmt/nmt.py
+   +++ b/nmt/nmt.py
+   @@ -310,6 +310,13 @@ def add_arguments(parser):
+      parser.add_argument("--num_intra_threads", type=int, default=0,
+                        help="number of intra_op_parallelism_threads")
+
+   +  # Special argument for inference model dumping without inference
+   +  parser.add_argument("--dump_inference_model", type="bool", nargs="?",
+   +                      const=True, default=False,
+   +                      help="Argument for dump inference graph for specified trained ckpt")
+   +
+   +  parser.add_argument("--path_to_dump", type=str, default="",
+   +                      help="Path to dump inference graph.")
+
+   def create_hparams(flags):
+      """Create training hparams."""
+   @@ -396,6 +403,9 @@ def create_hparams(flags):
+         language_model=flags.language_model,
+         num_intra_threads=flags.num_intra_threads,
+         num_inter_threads=flags.num_inter_threads,
+   +
+   +      dump_inference_model=flags.dump_inference_model,
+   +      path_to_dump=flags.path_to_dump,
+      )
+
+
+   @@ -613,7 +623,7 @@ def create_or_load_hparams(
+      return hparams
+
+
+   -def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
+   +def run_main(flags, default_hparams, train_fn, inference_fn, inference_dump, target_session=""):
+      """Run main."""
+      # Job
+      jobid = flags.jobid
+   @@ -653,8 +663,26 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
+            out_dir, default_hparams, flags.hparams_path,
+            save_hparams=(jobid == 0))
+
+   -  ## Train / Decode
+   -  if flags.inference_input_file:
+   +  #  Dumping inference model
+   +  if flags.dump_inference_model:
+   +      # Inference indices
+   +      hparams.inference_indices = None
+   +      if flags.inference_list:
+   +          (hparams.inference_indices) = (
+   +              [int(token) for token in flags.inference_list.split(",")])
+   +
+   +      # Ckpt
+   +      ckpt = flags.ckpt
+   +      if not ckpt:
+   +          ckpt = tf.train.latest_checkpoint(out_dir)
+   +
+   +      # Path to dump graph
+   +      assert flags.path_to_dump != "", "Please, specify path_to_dump model."
+   +      path_to_dump = flags.path_to_dump
+   +      if not tf.gfile.Exists(path_to_dump): tf.gfile.MakeDirs(path_to_dump)
+   +
+   +      inference_dump(ckpt, path_to_dump, hparams)
+   +  elif flags.inference_input_file:
+      # Inference output directory
+      trans_file = flags.inference_output_file
+      assert trans_file
+   @@ -693,7 +721,8 @@ def main(unused_argv):
+      default_hparams = create_hparams(FLAGS)
+      train_fn = train.train
+      inference_fn = inference.inference
+   -  run_main(FLAGS, default_hparams, train_fn, inference_fn)
+   +  inference_dump = inference.inference_dump_graph
+   +  run_main(FLAGS, default_hparams, train_fn, inference_fn, inference_dump)
+
+
+   if __name__ == "__main__":
+
+
 3. Save and close the file.
 
-## Converting a GNMT Model to the IR
+Converting a GNMT Model to the IR
+#################################
 
-> **NOTE**: Use TensorFlow version 1.13 or lower.
+.. note:: Use TensorFlow version 1.13 or lower.
 
 **Step 1**. Clone the GitHub repository and check out the commit:
 
 1. Clone the NMT reposirory:
-```sh
-git clone https://github.com/tensorflow/nmt.git
-```
+
+.. code-block:: sh
+
+   git clone https://github.com/tensorflow/nmt.git
+
 2. Check out the necessary commit:
-```sh
-git checkout b278487980832417ad8ac701c672b5c3dc7fa553
-```
+
+.. code-block:: sh
+
+   git checkout b278487980832417ad8ac701c672b5c3dc7fa553
+
 
 **Step 2**. Get a trained model. You have two options:
 
-* Train the model with the GNMT `wmt16_gnmt_4_layer.json` or `wmt16_gnmt_8_layer.json` configuration file using the NMT framework.
+* Train the model with the GNMT ``wmt16_gnmt_4_layer.json`` or ``wmt16_gnmt_8_layer.json`` configuration file using the NMT framework.
 * *Do not use the pre-trained checkpoints provided in the NMT repository, as they are outdated and can be incompatible with the current repository version.*
 
-This tutorial assumes the use of the trained GNMT model from `wmt16_gnmt_4_layer.json` config, German to English translation.
+This tutorial assumes the use of the trained GNMT model from ``wmt16_gnmt_4_layer.json`` config, German to English translation.
 
 **Step 3**. Create an inference graph:
 
 The OpenVINO assumes that a model is used for inference only. Hence, before converting the model into the IR, you need to transform the training graph into the inference graph.
 For the GNMT model, the training graph and the inference graph have different decoders: the training graph uses a greedy search decoding algorithm, while the inference graph uses a beam search decoding algorithm.
 
-1. Apply the `GNMT_inference.patch` patch to the repository. Refer to the <a href="#patch-file-gnmt">Create a Patch File</a> instructions if you do not have it:
-```sh
- git apply /path/to/patch/GNMT_inference.patch
-```
+1. Apply the ``GNMT_inference.patch`` patch to the repository. `Create a Patch File <#Creating-a-Patch-File>`__ instructions if you do not have it:
+
+.. code-block:: sh
+
+   git apply /path/to/patch/GNMT_inference.patch
+
 
 2. Run the NMT framework to dump the inference model:
 
-```sh
-python -m nmt.nmt
-    --src=de
-    --tgt=en
-    --ckpt=/path/to/ckpt/translate.ckpt
-    --hparams_path=/path/to/repository/nmt/nmt/standard_hparams/wmt16_gnmt_4_layer.json
-    --vocab_prefix=/path/to/vocab/vocab.bpe.32000
-    --out_dir=""
-    --dump_inference_model
-    --infer_mode beam_search
-    --path_to_dump /path/to/dump/model/
-```
-
-If you use different checkpoints, use the corresponding values for the `src`,`tgt`,`ckpt`,`hparams_path`, and `vocab_prefix` parameters.
-Inference checkpoint `inference_GNMT_graph` and frozen inference graph `frozen_GNMT_inference_graph.pb` will appear in the `/path/to/dump/model/` folder.
-
-To generate `vocab.bpe.32000`, execute the `nmt/scripts/wmt16_en_de.sh` script. If you face an issue of a size mismatch between the checkpoint graph's embedding layer and vocabulary (both src and target), make sure you add the following code to the `nmt.py` file to the `extend_hparams` function after the line 508 (after initialization of the `src_vocab_size` and `tgt_vocab_size` variables):
-```py
-src_vocab_size -= 1
-tgt_vocab_size -= 1
-```
+.. code-block:: sh
+
+   python -m nmt.nmt
+      --src=de
+      --tgt=en
+      --ckpt=/path/to/ckpt/translate.ckpt
+      --hparams_path=/path/to/repository/nmt/nmt/standard_hparams/wmt16_gnmt_4_layer.json
+      --vocab_prefix=/path/to/vocab/vocab.bpe.32000
+      --out_dir=""
+      --dump_inference_model
+      --infer_mode beam_search
+      --path_to_dump /path/to/dump/model/
+
+
+If you use different checkpoints, use the corresponding values for the ``src``, ``tgt``, ``ckpt``, ``hparams_path``, and ``vocab_prefix`` parameters.
+Inference checkpoint ``inference_GNMT_graph`` and frozen inference graph ``frozen_GNMT_inference_graph.pb`` will appear in the ``/path/to/dump/model/`` folder.
+
+To generate ``vocab.bpe.32000``, execute the ``nmt/scripts/wmt16_en_de.sh`` script. If you face an issue of a size mismatch between the checkpoint graph's embedding layer and vocabulary (both src and target), make sure you add the following code to the ``nmt.py`` file to the ``extend_hparams`` function after the line 508 (after initialization of the ``src_vocab_size`` and ``tgt_vocab_size`` variables):
+
+.. code-block:: py
+
+   src_vocab_size -= 1
+   tgt_vocab_size -= 1
+
 
 **Step 4**. Convert the model to the IR:
 
-```sh
-mo
---input_model /path/to/dump/model/frozen_GNMT_inference_graph.pb
---input "IteratorGetNext:1{i32}[1],IteratorGetNext:0{i32}[1,50],dynamic_seq2seq/hash_table_Lookup_1:0[1]->[2],dynamic_seq2seq/hash_table_Lookup:0[1]->[1]"
---output dynamic_seq2seq/decoder/decoder/GatherTree
---output_dir /path/to/output/IR/
-```
+.. code-block:: sh
 
-Input and output cutting with the `--input` and `--output` options is required since OpenVINO&trade; does not support `IteratorGetNext` and `LookupTableFindV2` operations.
+   mo
+   --input_model /path/to/dump/model/frozen_GNMT_inference_graph.pb
+   --input "IteratorGetNext:1{i32}[1],IteratorGetNext:0{i32}[1,50],dynamic_seq2seq/hash_table_Lookup_1:0[1]->[2],dynamic_seq2seq/hash_table_Lookup:0[1]->[1]"
+   --output dynamic_seq2seq/decoder/decoder/GatherTree
+   --output_dir /path/to/output/IR/
+
+
+Input and output cutting with the ``--input`` and ``--output`` options is required since OpenVINO&trade; does not support ``IteratorGetNext`` and ``LookupTableFindV2`` operations.
 
 Input cutting:
 
-* `IteratorGetNext` operation iterates over a dataset. It is cut by output ports: port 0 contains data tensor with shape `[batch_size, max_sequence_length]`, port 1 contains `sequence_length` for every batch with shape `[batch_size]`.
+* ``IteratorGetNext`` operation iterates over a dataset. It is cut by output ports: port 0 contains data tensor with shape ``[batch_size, max_sequence_length]``, port 1 contains ``sequence_length`` for every batch with shape ``[batch_size]``.
 
-* `LookupTableFindV2` operations (`dynamic_seq2seq/hash_table_Lookup_1` and `dynamic_seq2seq/hash_table_Lookup` nodes in the graph) are cut with constant values).
+* ``LookupTableFindV2`` operations (``dynamic_seq2seq/hash_table_Lookup_1`` and ``dynamic_seq2seq/hash_table_Lookup`` nodes in the graph) are cut with constant values).
 
 Output cutting:
 
-* `LookupTableFindV2` operation is cut from the output and the `dynamic_seq2seq/decoder/decoder/GatherTree` node is treated as a new exit point.
+* ``LookupTableFindV2`` operation is cut from the output and the ``dynamic_seq2seq/decoder/decoder/GatherTree`` node is treated as a new exit point.
 
-For more information about model cutting, refer to the [Cutting Off Parts of a Model](@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model) guide.
+For more information about model cutting, refer to the :doc:`Cutting Off Parts of a Model <openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model>` guide.
 
-## Using a GNMT Model <a name="run_GNMT_model"></a>
+Using a GNMT Model
+##################
 
-> **NOTE**: This step assumes you have converted a model to the Intermediate Representation.
+.. note::
+
+   This step assumes you have converted a model to the Intermediate Representation.
 
 Inputs of the model:
-* `IteratorGetNext/placeholder_out_port_0` input with shape `[batch_size, max_sequence_length]` contains `batch_size` decoded input sentences.
- Every sentence is decoded the same way as indices of sentence elements in vocabulary and padded with index of `eos` (end of sentence symbol). If the length of the sentence is less than `max_sequence_length`, remaining elements are filled with index of `eos` token.
 
-* `IteratorGetNext/placeholder_out_port_1` input with shape `[batch_size]` contains sequence lengths for every sentence from the first input.
- For example, if `max_sequence_length = 50`, `batch_size = 1` and the sentence has only 30 elements, then the input tensor for `IteratorGetNext/placeholder_out_port_1` should be `[30]`.
+* ``IteratorGetNext/placeholder_out_port_0`` input with shape ``[batch_size, max_sequence_length]`` contains ``batch_size`` decoded input sentences. Every sentence is decoded the same way as indices of sentence elements in vocabulary and padded with index of ``eos`` (end of sentence symbol). If the length of the sentence is less than ``max_sequence_length``, remaining elements are filled with index of ``eos`` token.
+
+* ``IteratorGetNext/placeholder_out_port_1`` input with shape ``[batch_size]`` contains sequence lengths for every sentence from the first input. For example, if ``max_sequence_length = 50``, ``batch_size = 1`` and the sentence has only 30 elements, then the input tensor for ``IteratorGetNext/placeholder_out_port_1`` should be ``[30]``.
 
 
 Outputs of the model:
 
-* `dynamic_seq2seq/decoder/decoder/GatherTree` tensor with shape `[max_sequence_length * 2, batch, beam_size]`,
-  that contains `beam_size` best translations for every sentence from input (also decoded as indices of words in
+* ``dynamic_seq2seq/decoder/decoder/GatherTree`` tensor with shape ``[max_sequence_length * 2, batch, beam_size]``,
+  that contains ``beam_size`` best translations for every sentence from input (also decoded as indices of words in
   vocabulary).
-> **NOTE**: The shape of this tensor in TensorFlow can be different: instead of `max_sequence_length * 2`, it can be any value less than that, because OpenVINO does not support dynamic shapes of outputs, while TensorFlow can stop decoding iterations when `eos` symbol is generated.
 
-#### Running GNMT IR <a name="run_GNMT"></a>
+.. note::   
+   The shape of this tensor in TensorFlow can be different: instead of ``max_sequence_length * 2``, it can be any value less than that, because OpenVINO does not support dynamic shapes of outputs, while TensorFlow can stop decoding iterations when ``eos`` symbol is generated.
+
+Running GNMT IR 
+---------------
 
 1. With benchmark app:
-```sh
-benchmark_app -m <path to the generated GNMT IR> -d CPU
-```
+
+.. code-block:: sh
+
+   benchmark_app -m <path to the generated GNMT IR> -d CPU
+
 
 2. With OpenVINO Runtime Python API:
 
-> **NOTE**: Before running the example, insert a path to your GNMT `.xml` and `.bin` files into `MODEL_PATH` and `WEIGHTS_PATH`, and fill `input_data_tensor` and `seq_lengths` tensors according to your input data.
+.. note:: 
+
+   Before running the example, insert a path to your GNMT ``.xml`` and ``.bin`` files into ``MODEL_PATH`` and ``WEIGHTS_PATH``, and fill ``input_data_tensor`` and ``seq_lengths`` tensors according to your input data.
+
+.. code-block:: py
+
+   from openvino.inference_engine import IENetwork, IECore
+
+   MODEL_PATH = '/path/to/IR/frozen_GNMT_inference_graph.xml'
+   WEIGHTS_PATH = '/path/to/IR/frozen_GNMT_inference_graph.bin'
 
-```python
-from openvino.inference_engine import IENetwork, IECore
+   # Creating network
+   net = IENetwork(
+      model=MODEL_PATH,
+      weights=WEIGHTS_PATH)
 
-MODEL_PATH = '/path/to/IR/frozen_GNMT_inference_graph.xml'
-WEIGHTS_PATH = '/path/to/IR/frozen_GNMT_inference_graph.bin'
+   # Creating input data
+   input_data = {'IteratorGetNext/placeholder_out_port_0': input_data_tensor,
+               'IteratorGetNext/placeholder_out_port_1': seq_lengths}
 
-# Creating network
-net = IENetwork(
-    model=MODEL_PATH,
-    weights=WEIGHTS_PATH)
+   # Creating plugin and loading extensions
+   ie = IECore()
+   ie.add_extension(extension_path="libcpu_extension.so", device_name="CPU")
 
-# Creating input data
-input_data = {'IteratorGetNext/placeholder_out_port_0': input_data_tensor,
-              'IteratorGetNext/placeholder_out_port_1': seq_lengths}
+   # Loading network
+   exec_net = ie.load_network(network=net, device_name="CPU")
 
-# Creating plugin and loading extensions
-ie = IECore()
-ie.add_extension(extension_path="libcpu_extension.so", device_name="CPU")
+   # Run inference
+   result_ie = exec_net.infer(input_data)
 
-# Loading network
-exec_net = ie.load_network(network=net, device_name="CPU")
 
-# Run inference
-result_ie = exec_net.infer(input_data)
-```
+For more information about Python API, refer to the `OpenVINO Runtime Python API <https://docs.openvino.ai/2022.3/api/ie_python_api/api.html>`__  guide.
 
-For more information about Python API, refer to the [OpenVINO Runtime Python API](https://docs.openvino.ai/latest/api/api_reference.html) guide.
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
index 5c7820f4cba04e..fb9951ddfad3a7 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
@@ -1,49 +1,58 @@
 # Converting a TensorFlow Neural Collaborative Filtering Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_NCF_From_Tensorflow}
 
+@sphinxdirective
+
 This tutorial explains how to convert Neural Collaborative Filtering (NCF) model to the OpenVINO Intermediate Representation.
 
-[Public TensorFlow NCF model](https://github.com/tensorflow/models/tree/master/official/recommendation) does not contain pre-trained weights. To convert this model to the IR:
- 1. Use [the instructions](https://github.com/tensorflow/models/tree/master/official/recommendation#train-and-evaluate-model) from this repository to train the model.
- 2. Freeze the inference graph you get in the previous step in `model_dir`, following
-the instructions from the **Freezing Custom Models in Python** section of the
-[Converting a TensorFlow Model](../Convert_Model_From_TensorFlow.md) guide.
+`Public TensorFlow NCF model <https://github.com/tensorflow/models/tree/master/official/recommendation>`__ does not contain pre-trained weights. To convert this model to the IR:
+
+1. Use `the instructions <https://github.com/tensorflow/models/tree/master/official/recommendation#train-and-evaluate-model>`__ from this repository to train the model.
+
+2. Freeze the inference graph you get in the previous step in ``model_dir``, following the instructions from the **Freezing Custom Models in Python** section of the :doc:`Converting a TensorFlow Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
+
 Run the following commands:
-```python
-import tensorflow as tf
-from tensorflow.python.framework import graph_io
-
-sess = tf.compat.v1.Session()
-saver = tf.compat.v1.train.import_meta_graph("/path/to/model/model.meta")
-saver.restore(sess, tf.train.latest_checkpoint('/path/to/model/'))
-
-frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, \
-                                                      ["rating/BiasAdd"])
-graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
-```
-where `rating/BiasAdd` is an output node.
-
- 3. Convert the model to the OpenVINO format. If you look at your frozen model, you can see that
-it has one input that is split into four `ResourceGather` layers. (Click image to zoom in.)
-
-![NCF model beginning](../../../img/NCF_start.svg)
-
- However, as the Model Optimizer does not support such data feeding, you should skip it. Cut
-the edges incoming in `ResourceGather` port 1:
-```sh
- mo --input_model inference_graph.pb                    \
---input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
-1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
---input_shape [256],[256],[256],[256]                                \
---output_dir <OUTPUT_MODEL_DIR>
-```
-In the `input_shape` parameter, 256 specifies the `batch_size` for your model.
+
+.. code-block:: py
+
+    import tensorflow as tf
+    from tensorflow.python.framework import graph_io
+
+    sess = tf.compat.v1.Session()
+    saver = tf.compat.v1.train.import_meta_graph("/path/to/model/model.meta")
+    saver.restore(sess, tf.train.latest_checkpoint('/path/to/model/'))
+
+    frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, \
+                                                        ["rating/BiasAdd"])
+    graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
+
+where ``rating/BiasAdd`` is an output node.
+
+3. Convert the model to the OpenVINO format. If you look at your frozen model, you can see that
+it has one input that is split into four ``ResourceGather`` layers. (Click image to zoom in.)
+
+.. image::  ./_static/images/NCF_start.svg
+
+However, as the Model Optimizer does not support such data feeding, you should skip it. Cut
+the edges incoming in ``ResourceGather`` port 1:
+
+.. code-block:: shell
+
+    mo --input_model inference_graph.pb                    \
+    --input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
+    1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
+    --input_shape [256],[256],[256],[256]                                \
+    --output_dir <OUTPUT_MODEL_DIR>
+
+In the ``input_shape`` parameter, 256 specifies the ``batch_size`` for your model.
 
 Alternatively, you can do steps 2 and 3 in one command line:
-```sh
- mo --input_meta_graph /path/to/model/model.meta        \
---input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
-1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
---input_shape [256],[256],[256],[256] --output rating/BiasAdd        \
---output_dir <OUTPUT_MODEL_DIR>
-```
 
+.. code-block:: shell
+
+    mo --input_meta_graph /path/to/model/model.meta        \
+    --input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
+    1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
+    --input_shape [256],[256],[256],[256] --output rating/BiasAdd        \
+    --output_dir <OUTPUT_MODEL_DIR>
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
index 87b2dd159ebf01..0a40001afeb6d5 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
@@ -1,18 +1,19 @@
 # Converting TensorFlow Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models}
 
-**NOTES**: 
+@sphinxdirective
 
-* Starting with the 2022.1 release, Model Optimizer can convert the TensorFlow Object Detection API Faster and Mask RCNNs topologies differently. By default, Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the preprocessing applied to the input image (refer to the [Proposal](@ref openvino_docs_ops_detection_Proposal_4) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](@ref openvino_docs_ops_detection_DetectionOutput_1) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info". Moreover, for some models the produced inference results are closer to the original TensorFlow model. In order to trigger new behavior, the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal".
-* Starting with the 2021.1 release, Model Optimizer converts the TensorFlow Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the OpenVINO Runtime using dedicated reshape API. Refer to the [Using Shape Inference](@ref openvino_docs_OV_UG_ShapeInference) guide for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size.
-* To generate IRs for TF 1 SSD topologies, Model Optimizer creates a number of `PriorBoxClustered` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the OpenVINO Runtime using dedicated API. The reshaping is supported for all SSD topologies except FPNs, which contain hardcoded shapes for some operations preventing from changing topology input shape.
+* Starting with the 2022.1 release, Model Optimizer can convert the TensorFlow Object Detection API Faster and Mask RCNNs topologies differently. By default, Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the preprocessing applied to the input image (refer to the :doc:`Proposal <openvino_docs_ops_detection_Proposal_4>` operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation :doc:`DetectionOutput <openvino_docs_ops_detection_DetectionOutput_1>` instead of ``Proposal``. The `DetectionOutput` operation does not require additional model input "image_info". Moreover, for some models the produced inference results are closer to the original TensorFlow model. In order to trigger new behavior, the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal".
+* Starting with the 2021.1 release, Model Optimizer converts the TensorFlow Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the OpenVINO Runtime using dedicated reshape API. Refer to the :doc:`Using Shape Inference <openvino_docs_OV_UG_ShapeInference>` guide for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size.
+* To generate IRs for TF 1 SSD topologies, Model Optimizer creates a number of ``PriorBoxClustered`` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the OpenVINO Runtime using dedicated API. The reshaping is supported for all SSD topologies except FPNs, which contain hardcoded shapes for some operations preventing from changing topology input shape.
 
-## Converting a Model
+Converting a Model
+##################
 
-You can download TensorFlow Object Detection API models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a> or <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a>.
+You can download TensorFlow Object Detection API models from the `TensorFlow 1 Detection Model Zoo <https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md>`__ or `TensorFlow 2 Detection Model Zoo <https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md>`__.
 
-> **NOTE**: Before converting, make sure you have configured Model Optimizer. For configuration steps, refer to the [Configuring Model Optimizer](../../../Deep_Learning_Model_Optimizer_DevGuide.md).
+.. note::
 
-@sphinxdirective
+   Before converting, make sure you have configured Model Optimizer. For configuration steps, refer to the :doc:`Configuring Model Optimizer <openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide>`.
 
 To convert a TensorFlow Object Detection API model, run the ``mo`` command with the following required parameters:
 
@@ -48,105 +49,119 @@ To convert a TensorFlow Object Detection API model, run the ``mo`` command with
   * ``rfcn_support_api_v1.14.json`` - for RFCN topology from the models zoo frozen with TensorFlow version 1.14.0 or higher
 
 * ``--tensorflow_object_detection_api_pipeline_config <path_to_pipeline.config>`` - A special configuration file that describes the topology hyper-parameters and structure of the TensorFlow Object Detection API model. For the models downloaded from the TensorFlow Object Detection API zoo, the configuration file is named ``pipeline.config``. If you plan to train a model yourself, you can find templates for these files in the `models repository <https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs>`__.
-* ``--input_shape`` (optional) - A custom input image shape. For more information how the ``--input_shape`` parameter is handled for the TensorFlow Object Detection API models, refer to the :ref:`Custom Input Shape <custom-input-shape>` guide.
+* ``--input_shape`` (optional) - A custom input image shape. For more information how the ``--input_shape`` parameter is handled for the TensorFlow Object Detection API models, refer to the `Custom Input Shape <#Custom-Input-Shape>`__  guide.
 
-@endsphinxdirective
+.. note::
 
+   The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. If you convert a TensorFlow Object Detection API model to use with the OpenVINO sample applications, you must specify the ``--reverse_input_channels`` parameter. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. If you convert a TensorFlow Object Detection API model to use with the OpenVINO sample applications, you must specify the `--reverse_input_channels` parameter. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the [Converting a Model to Intermediate Representation (IR)](../Converting_Model.md) guide.
+Additionally to the mandatory parameters listed above you can use optional conversion parameters if needed. A full list of parameters is available in the :doc:`Converting a TensorFlow Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
 
-Additionally to the mandatory parameters listed above you can use optional conversion parameters if needed. A full list of parameters is available in the [Converting a TensorFlow Model](../Convert_Model_From_TensorFlow.md) guide.
+For example, if you downloaded the pre-trained `SSD InceptionV2 topology <http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz>`__ and extracted archive to the directory ``/tmp/ssd_inception_v2_coco_2018_01_28``, the sample command line to convert the model looks as follows:
 
-For example, if you downloaded the pre-trained [SSD InceptionV2 topology](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz) and extracted archive to the directory `/tmp/ssd_inception_v2_coco_2018_01_28`, the sample command line to convert the model looks as follows:
+.. code-block:: sh
 
-```
-mo --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --transformations_config front/tf/ssd_v2_support.json --tensorflow_object_detection_api_pipeline_config /tmp/ssd_inception_v2_coco_2018_01_28/pipeline.config --reverse_input_channels
-```
+  mo --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --transformations_config front/tf/ssd_v2_support.json --tensorflow_object_detection_api_pipeline_config /tmp/ssd_inception_v2_coco_2018_01_28/pipeline.config --reverse_input_channels
 
-## OpenVINO™ Toolkit Samples and Open Model Zoo Demos
+
+OpenVINO™ Toolkit Samples and Open Model Zoo Demos
+##################################################
 
 OpenVINO comes with a number of samples to demonstrate use of OpenVINO Runtime API. Additionally,
 Open Model Zoo provides set of demo applications to show implementation of close to real life applications,
 based on deep learning in various tasks, including Image Classification, Visual Object Detection, Text Recognition,
 Speech Recognition, Natural Language Processing and others. Refer to the links below for more details.
 
-* [OpenVINO Samples](@ref openvino_docs_OV_UG_Samples_Overview)
-* [Open Model Zoo Demos](@ref omz_demos)
+* :doc:`OpenVINO Samples <openvino_docs_OV_UG_Samples_Overview>`
+* :doc:`Open Model Zoo Demos <model_zoo>`
 
-## Feeding Input Images to the Samples
+Feeding Input Images to the Samples
+###################################
 
 There are several important notes about feeding input images to the samples:
 
-1. OpenVINO samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of preprocessing is defined in the pipeline configuration file in the section `image_resizer`. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true").
+1. OpenVINO samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of preprocessing is defined in the pipeline configuration file in the section ``image_resizer``. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true").
 
-2. TensorFlow implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like `.jpg`) could give different results in the sample and TensorFlow. If it is necessary to compare accuracy between the TensorFlow and the OpenVINO, it is recommended to pass pre-resized input image in a non-compressed format (like `.bmp`).
+2. TensorFlow implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like ``.jpg``) could give different results in the sample and TensorFlow. If it is necessary to compare accuracy between the TensorFlow and the OpenVINO, it is recommended to pass pre-resized input image in a non-compressed format (like ``.bmp``).
 
-3. If you want to infer the model with the OpenVINO samples, convert the model specifying the `--reverse_input_channels` command line parameter. The samples load images in BGR channels order, while TensorFlow models were trained with images in RGB order. When the `--reverse_input_channels` command line parameter is specified, Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order.
+3. If you want to infer the model with the OpenVINO samples, convert the model specifying the ``--reverse_input_channels`` command line parameter. The samples load images in BGR channels order, while TensorFlow models were trained with images in RGB order. When the ``--reverse_input_channels`` command line parameter is specified, Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order.
 
 4. Read carefully the messages printed by Model Optimizer during a model conversion. They contain important instructions on how to prepare input data before running the inference and how to interpret the output.
 
-@sphinxdirective
+Custom Input Shape
+##################
 
-.. _custom-input-shape:
+Model Optimizer handles the command line parameter ``--input_shape`` for TensorFlow Object Detection API models in a special way depending on the image resizer type defined in the ``pipeline.config`` file. TensorFlow Object Detection API generates different ``Preprocessor`` sub-graph based on the image resizer type. Model Optimizer supports two types of image resizer:
 
-@endsphinxdirective
+* ``fixed_shape_resizer`` --- *Stretches* input image to the specific height and width. The ``pipeline.config`` snippet below shows a ``fixed_shape_resizer`` sample definition:
 
+.. code-block:: sh
 
-## Custom Input Shape
-Model Optimizer handles the command line parameter `--input_shape` for TensorFlow Object Detection API models in a special way depending on the image resizer type defined in the `pipeline.config` file. TensorFlow Object Detection API generates different `Preprocessor` sub-graph based on the image resizer type. Model Optimizer supports two types of image resizer:
-* `fixed_shape_resizer` --- *Stretches* input image to the specific height and width. The `pipeline.config` snippet below shows a `fixed_shape_resizer` sample definition:
-```
-image_resizer {
-  fixed_shape_resizer {
-    height: 300
-    width: 300
+  image_resizer {
+    fixed_shape_resizer {
+      height: 300
+      width: 300
+    }
   }
-}
-```
-* `keep_aspect_ratio_resizer` --- Resizes the input image *keeping aspect ratio* to satisfy the minimum and maximum size constraints. The `pipeline.config` snippet below shows a `keep_aspect_ratio_resizer` sample definition:
-```
-image_resizer {
-  keep_aspect_ratio_resizer {
-    min_dimension: 600
-    max_dimension: 1024
+
+* ``keep_aspect_ratio_resizer`` --- Resizes the input image *keeping aspect ratio* to satisfy the minimum and maximum size constraints. The ``pipeline.config`` snippet below shows a ``keep_aspect_ratio_resizer`` sample definition:
+
+.. code-block:: sh
+
+  image_resizer {
+    keep_aspect_ratio_resizer {
+      min_dimension: 600
+      max_dimension: 1024
+    }
   }
-}
-```
+
 If an additional parameter "pad_to_max_dimension" is equal to "true", then the resized image will be padded with 0s to the square image of size "max_dimension".
 
-### Fixed Shape Resizer Replacement
-* If the `--input_shape` command line parameter is not specified, Model Optimizer generates an input operation with the height and width as defined in the `pipeline.config`.
+Fixed Shape Resizer Replacement
++++++++++++++++++++++++++++++++
 
-* If the `--input_shape [1, H, W, 3]` command line parameter is specified, Model Optimizer sets the input operation height to `H` and width to `W` and convert the model. However, the conversion may fail because of the following reasons:
-  * The model is not reshape-able, meaning that it's not possible to change the size of the model input image. For example, SSD FPN models have `Reshape` operations with hard-coded output shapes, but the input size to these `Reshape` instances depends on the input image size. In this case, Model Optimizer shows an error during the shape inference phase. Run Model Optimizer with `--log_level DEBUG` to see the inferred operations output shapes to see the mismatch.
-  * Custom input shape is too small. For example, if you specify `--input_shape [1,100,100,3]` to convert a SSD Inception V2 model, one of convolution or pooling nodes decreases input tensor spatial dimensions to non-positive values. In this case, Model Optimizer shows error message like this: '[ ERROR ]  Shape [  1  -1  -1 256] is not fully defined for output X of "node_name".'
+* If the ``--input_shape`` command line parameter is not specified, Model Optimizer generates an input operation with the height and width as defined in the ``pipeline.config``.
 
+* If the ``--input_shape [1, H, W, 3]`` command line parameter is specified, Model Optimizer sets the input operation height to ``H`` and width to ``W`` and convert the model. However, the conversion may fail because of the following reasons:
 
-### Keeping Aspect Ratio Resizer Replacement
-* If the `--input_shape` command line parameter is not specified, Model Optimizer generates an input operation with both height and width equal to the value of parameter `min_dimension` in the `keep_aspect_ratio_resizer`.
+  * The model is not reshape-able, meaning that it's not possible to change the size of the model input image. For example, SSD FPN models have ``Reshape`` operations with hard-coded output shapes, but the input size to these ``Reshape`` instances depends on the input image size. In this case, Model Optimizer shows an error during the shape inference phase. Run Model Optimizer with ``--log_level DEBUG`` to see the inferred operations output shapes to see the mismatch.
+  * Custom input shape is too small. For example, if you specify ``--input_shape [1,100,100,3]`` to convert a SSD Inception V2 model, one of convolution or pooling nodes decreases input tensor spatial dimensions to non-positive values. In this case, Model Optimizer shows error message like this: '[ ERROR ]  Shape [  1  -1  -1 256] is not fully defined for output X of "node_name".'
 
-* If the `--input_shape [1, H, W, 3]` command line parameter is specified, Model Optimizer scales the specified input image height `H` and width `W` to satisfy the `min_dimension` and `max_dimension` constraints defined in the `keep_aspect_ratio_resizer`. The following function calculates the input operation height and width:
 
-```python
-def calculate_shape_keeping_aspect_ratio(H: int, W: int, min_dimension: int, max_dimension: int):
-    ratio_min = min_dimension / min(H, W)
-    ratio_max = max_dimension / max(H, W)
-    ratio = min(ratio_min, ratio_max)
-    return int(round(H * ratio)), int(round(W * ratio))
-```
-The `--input_shape` command line parameter should be specified only if the "pad_to_max_dimension" does not exist of is set to "false" in the `keep_aspect_ratio_resizer`.
+Keeping Aspect Ratio Resizer Replacement
+++++++++++++++++++++++++++++++++++++++++
 
-Models with `keep_aspect_ratio_resizer` were trained to recognize object in real aspect ratio, in contrast with most of the classification topologies trained to recognize objects stretched vertically and horizontally as well. By default, Model Optimizer converts topologies with `keep_aspect_ratio_resizer` to consume a square input image. If the non-square image is provided as input, it is stretched without keeping aspect ratio that results to object detection quality decrease.
+* If the ``--input_shape`` command line parameter is not specified, Model Optimizer generates an input operation with both height and width equal to the value of parameter ``min_dimension`` in the ``keep_aspect_ratio_resizer``.
 
-> **NOTE**: It is highly recommended to specify the `--input_shape` command line parameter for the models with `keep_aspect_ratio_resizer`, if the input image dimensions are known in advance.
+* If the ``--input_shape [1, H, W, 3]`` command line parameter is specified, Model Optimizer scales the specified input image height ``H`` and width ``W`` to satisfy the ``min_dimension`` and ``max_dimension`` constraints defined in the ``keep_aspect_ratio_resizer``. The following function calculates the input operation height and width:
 
-## Model Conversion Process in Detail
+.. code-block:: py
 
-This section is intended for users who want to understand how Model Optimizer performs Object Detection API models conversion in details. The information in this section is also useful for users having complex models that are not converted with Model Optimizer out of the box. It is highly recommended to read the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](../../customize_model_optimizer/Customize_Model_Optimizer.md) documentation first to understand sub-graph replacement concepts which are used here.
+  def calculate_shape_keeping_aspect_ratio(H: int, W: int, min_dimension: int, max_dimension: int):
+      ratio_min = min_dimension / min(H, W)
+      ratio_max = max_dimension / max(H, W)
+      ratio = min(ratio_min, ratio_max)
+      return int(round(H * ratio)), int(round(W * ratio))
 
-It is also important to open the model in the [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) to see the topology structure. Model Optimizer can create an event file that can be then fed to the TensorBoard tool. Run Model Optimizer, providing two command line parameters:
-* `--input_model <path_to_frozen.pb>` --- Path to the frozen model.
-* `--tensorboard_logdir` --- Path to the directory where TensorBoard looks for the event files.
+The ``--input_shape`` command line parameter should be specified only if the "pad_to_max_dimension" does not exist of is set to "false" in the ``keep_aspect_ratio_resizer``.
 
-Implementation of the transformations for Object Detection API models is located in the [file](https://github.com/openvinotoolkit/openvino/blob/releases/2022/1/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py). Refer to the code in this file to understand the details of the conversion process.
+Models with ``keep_aspect_ratio_resizer`` were trained to recognize object in real aspect ratio, in contrast with most of the classification topologies trained to recognize objects stretched vertically and horizontally as well. By default, Model Optimizer converts topologies with ``keep_aspect_ratio_resizer`` to consume a square input image. If the non-square image is provided as input, it is stretched without keeping aspect ratio that results to object detection quality decrease.
 
+.. note::
+
+   It is highly recommended to specify the ``--input_shape`` command line parameter for the models with ``keep_aspect_ratio_resizer``, if the input image dimensions are known in advance.
+
+Model Conversion Process in Detail
+##################################
+
+This section is intended for users who want to understand how Model Optimizer performs Object Detection API models conversion in details. The information in this section is also useful for users having complex models that are not converted with Model Optimizer out of the box. It is highly recommended to read the **Graph Transformation Extensions** section in the :doc:`Model Optimizer Extensibility <openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer>` documentation first to understand sub-graph replacement concepts which are used here.
+
+It is also important to open the model in the `TensorBoard <https://www.tensorflow.org/guide/summaries_and_tensorboard>`__ to see the topology structure. Model Optimizer can create an event file that can be then fed to the TensorBoard tool. Run Model Optimizer, providing two command line parameters:
+
+* ``--input_model <path_to_frozen.pb>`` --- Path to the frozen model.
+* ``--tensorboard_logdir`` --- Path to the directory where TensorBoard looks for the event files.
+
+Implementation of the transformations for Object Detection API models is located in the `file <https://github.com/openvinotoolkit/openvino/blob/releases/2022/1/tools/mo/openvino/tools/mo/front/tf/ObjectDetectionAPI.py>`__. Refer to the code in this file to understand the details of the conversion process.
+
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
index 16a77341c5ff04..6278f614415156 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
@@ -1,15 +1,21 @@
 # Converting a TensorFlow RetinaNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow}
 
+@sphinxdirective
+
 This tutorial explains how to convert a RetinaNet model to the Intermediate Representation (IR).
 
-[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow weights.
-To convert this model to the TensorFlow format, follow the [Reproduce Keras to TensorFlow Conversion tutorial](@ref omz_models_model_retinanet_tf).
+`Public RetinaNet model <https://github.com/fizyr/keras-retinanet>`__ does not contain pretrained TensorFlow weights.
+To convert this model to the TensorFlow format, follow the `Reproduce Keras to TensorFlow Conversion tutorial <https://docs.openvino.ai/2022.3/omz_models_model_retinanet_tf.html>`__. 
 
 After converting the model to TensorFlow format, run the Model Optimizer command below:
-```sh
-mo --input "input_1[1,1333,1333,3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --transformations_config front/tf/retinanet.json
-```
 
-Where `transformations_config` command-line parameter specifies the configuration json file containing model conversion hints for the Model Optimizer.
+.. code-block:: sh
+
+   mo --input "input_1[1,1333,1333,3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --transformations_config front/tf/retinanet.json
+
+
+Where ``transformations_config`` command-line parameter specifies the configuration json file containing model conversion hints for the Model Optimizer.
 The json file contains some parameters that need to be changed if you train the model yourself. It also contains information on how to match endpoints
 to replace the subgraph nodes. After the model is converted to the OpenVINO IR format, the output nodes will be replaced with DetectionOutput layer.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md
index fb8bd420b44e3f..ffecf5a24d30f7 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md
@@ -1,76 +1,90 @@
 # Converting TensorFlow Slim Image Classification Model Library Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models}
 
-<a href="https://github.com/tensorflow/models/tree/master/research/slim/README.md">TensorFlow-Slim Image Classification Model Library</a> is a library to define, train and evaluate classification models in TensorFlow. The library contains Python scripts defining the classification topologies together with checkpoint files for several pre-trained classification topologies. To convert a TensorFlow-Slim library model, complete the following steps:
+@sphinxdirective
 
-1. Download the TensorFlow-Slim models [git repository](https://github.com/tensorflow/models).
-2. Download the pre-trained model [checkpoint](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
+`TensorFlow-Slim Image Classification Model Library <https://github.com/tensorflow/models/tree/master/research/slim/README.md>`__ is a library to define, train and evaluate classification models in TensorFlow. The library contains Python scripts defining the classification topologies together with checkpoint files for several pre-trained classification topologies. To convert a TensorFlow-Slim library model, complete the following steps:
+
+1. Download the TensorFlow-Slim models `git repository <https://github.com/tensorflow/models>`__.
+2. Download the pre-trained model `checkpoint <https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models>`__.
 3. Export the inference graph.
 4. Convert the model using the Model Optimizer.
 
-The [Example of an Inception V1 Model Conversion](#example_of_an_inception_v1_model_conversion) below illustrates the process of converting an Inception V1 Model.
+The `Example of an Inception V1 Model Conversion <#example_of_an_inception_v1_model_conversion>`__ below illustrates the process of converting an Inception V1 Model.
+
+Example of an Inception V1 Model Conversion 
+###########################################
 
-## Example of an Inception V1 Model Conversion <a name="example_of_an_inception_v1_model_conversion"></a>
 This example demonstrates how to convert the model on Linux OSes, but it could be easily adopted for the Windows OSes.
 
 **Step 1**. Create a new directory to clone the TensorFlow-Slim git repository to:
 
-```sh
-mkdir tf_models
-```
-```sh
-git clone https://github.com/tensorflow/models.git tf_models
-```
+.. code-block:: sh
+
+   mkdir tf_models
+
+.. code-block:: sh
+
+   git clone https://github.com/tensorflow/models.git tf_models
+
+
+**Step 2**. Download and unpack the `Inception V1 model checkpoint file <http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz>`__:
+
+.. code-block:: sh
+
+   wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz
+
+.. code-block:: sh
+
+   tar xzvf inception_v1_2016_08_28.tar.gz
 
-**Step 2**. Download and unpack the [Inception V1 model checkpoint file](http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz):
+**Step 3**. Export the inference graph --- the protobuf file (``.pb``) containing the architecture of the topology. This file *does not* contain the neural network weights and cannot be used for inference.
 
-```sh
-wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz
-```
-```sh
-tar xzvf inception_v1_2016_08_28.tar.gz
-```
+.. code-block:: sh
 
-**Step 3**. Export the inference graph --- the protobuf file (`.pb`) containing the architecture of the topology. This file *does not* contain the neural network weights and cannot be used for inference.
+  python3 tf_models/research/slim/export_inference_graph.py \
+      --model_name inception_v1 \
+      --output_file inception_v1_inference_graph.pb
 
-```sh
-python3 tf_models/research/slim/export_inference_graph.py \
-    --model_name inception_v1 \
-    --output_file inception_v1_inference_graph.pb
-```
 
 Model Optimizer comes with the summarize graph utility, which identifies graph input and output nodes. Run the utility to determine input/output nodes of the Inception V1 model:
 
-```sh
-python3 <PYTHON_SITE_PACKAGES>/openvino/tools/mo/utils/summarize_graph.py --input_model ./inception_v1_inference_graph.pb
-```
+.. code-block:: sh
 
-The output looks as follows:<br>
-```sh
-1 input(s) detected:
-Name: input, type: float32, shape: (-1,224,224,3)
-1 output(s) detected:
-InceptionV1/Logits/Predictions/Reshape_1
-```
-The tool finds one input node with name `input`, type `float32`, fixed image size `(224,224,3)` and undefined batch size `-1`. The output node name is `InceptionV1/Logits/Predictions/Reshape_1`.<br>
+  python3 <PYTHON_SITE_PACKAGES>/openvino/tools/mo/utils/summarize_graph.py --input_model ./inception_v1_inference_graph.pb
+
+The output looks as follows:
+
+.. code-block:: sh
+
+  1 input(s) detected:
+  Name: input, type: float32, shape: (-1,224,224,3)
+  1 output(s) detected:
+  InceptionV1/Logits/Predictions/Reshape_1
+
+The tool finds one input node with name ``input``, type ``float32``, fixed image size ``(224,224,3)`` and undefined batch size ``-1``. The output node name is ``InceptionV1/Logits/Predictions/Reshape_1``.
 
 **Step 4**. Convert the model with the Model Optimizer:
 
-```sh
-mo --input_model ./inception_v1_inference_graph.pb --input_checkpoint ./inception_v1.ckpt -b 1 --mean_value [127.5,127.5,127.5] --scale 127.5
-```
+.. code-block:: sh
+
+  mo --input_model ./inception_v1_inference_graph.pb --input_checkpoint ./inception_v1.ckpt -b 1 --mean_value [127.5,127.5,127.5] --scale 127.5
 
-The `-b` command line parameter is required because the Model Optimizer cannot convert a model with undefined input size.
 
-For the information on why `--mean_values` and `--scale` command-line parameters are used, refer to the [Mean and Scale Values for TensorFlow-Slim Models](#tf_slim_mean_scale_values).
+The ``-b`` command line parameter is required because the Model Optimizer cannot convert a model with undefined input size.
+
+For the information on why ``--mean_values`` and ``--scale`` command-line parameters are used, refer to the `Mean and Scale Values for TensorFlow-Slim Models <#Mean-and-Scale-Values-for-TensorFlow-Slim-Models>`__.
+
+Mean and Scale Values for TensorFlow-Slim Models 
+#################################################
 
-## Mean and Scale Values for TensorFlow-Slim Models <a name="tf_slim_mean_scale_values"></a>
 The TensorFlow-Slim Models were trained with normalized input data. There are several different normalization algorithms used in the Slim library. OpenVINO classification sample does not perform image pre-processing except resizing to the input layer size. It is necessary to pass mean and scale values to the Model Optimizer so they are embedded into the generated IR in order to get correct classification results.
 
-The file [preprocessing_factory.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py) contains a dictionary variable `preprocessing_fn_map` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values.
+The file `preprocessing_factory.py <https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py>`__ contains a dictionary variable ``preprocessing_fn_map`` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values.
+
+The `inception_preprocessing.py <https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/inception_preprocessing.py>`__ file defines the pre-processing function for the Inception models. The ``preprocess_for_eval`` function contains the following code:
 
-The [inception_preprocessing.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/inception_preprocessing.py) file defines the pre-processing function for the Inception models. The `preprocess_for_eval` function contains the following code:
+.. code-block:: python
 
-```python3
     ...
     import tensorflow as tf
     if image.dtype != tf.float32:
@@ -79,12 +93,14 @@ The [inception_preprocessing.py](https://github.com/tensorflow/models/blob/maste
     image = tf.subtract(image, 0.5)
     image = tf.multiply(image, 2.0)
     return image
-```
 
-Firstly, the `image` is converted to data type `tf.float32` and the values in the tensor are scaled to the `[0, 1]` range using the [tf.image.convert_image_dtype](https://www.tensorflow.org/api_docs/python/tf/image/convert_image_dtype) function. Then the `0.5` is subtracted from the image values and values multiplied by `2.0`. The final image range of values is `[-1, 1]`.
 
-OpenVINO classification sample reads an input image as a three-dimensional array of integer values from the range `[0, 255]`. In order to scale them to `[-1, 1]` range, the mean value `127.5` for each image channel should be specified as well as a scale factor `127.5`.
+Firstly, the ``image`` is converted to data type `tf.float32` and the values in the tensor are scaled to the ``[0, 1]`` range using the `tf.image.convert_image_dtype <https://www.tensorflow.org/api_docs/python/tf/image/convert_image_dtype>`__ function. Then the ``0.5`` is subtracted from the image values and values multiplied by ``2.0``. The final image range of values is ``[-1, 1]``.
+
+OpenVINO classification sample reads an input image as a three-dimensional array of integer values from the range ``[0, 255]``. In order to scale them to ``[-1, 1]`` range, the mean value ``127.5`` for each image channel should be specified as well as a scale factor ``127.5``.
 
 Similarly, the mean/scale values can be determined for other Slim models.
 
-The exact mean/scale values are defined in the table with list of supported TensorFlow-Slim models at the [Converting a TensorFlow Model](../Convert_Model_From_TensorFlow.md) guide.
+The exact mean/scale values are defined in the table with list of supported TensorFlow-Slim models at the :doc:`Converting a TensorFlow Model <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
index b3fd681df24649..c3583380d2e309 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
@@ -1,23 +1,40 @@
 # Converting TensorFlow Wide and Deep Family Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models}
 
+@sphinxdirective
+
 The Wide and Deep models is a combination of wide and deep parts for memorization and generalization of object features respectively.
 These models can contain different types of object features such as numerical, categorical, sparse and sequential features. These feature types are specified
 through Tensorflow tf.feature_column API. Table below presents what feature types are supported by the OpenVINO toolkit.
 
-| numeric | (weighted) categorical | categorical with hash | bucketized | sequential | crossed |
-|:-------:|:----------------------:|:---------------------:|:----------:|:----------:|:-------:|
-| yes     | yes                    | no                    | yes        | yes        | no      |
+.. list-table::
+    :header-rows: 1
+
+    * - numeric
+      - (weighted) categorical
+      - categorical with hash
+      - bucketized
+      - sequential
+      - crossed
+    * - yes
+      - yes
+      - no
+      - yes
+      - yes
+      - no
 
-> **NOTE**: The categorical with hash and crossed features are currently unsupported since OpenVINO does not cover tensors of the `string` type and operations with them.
 
-## Preparing an Example of Wide and Deep Model
+.. note:: The categorical with hash and crossed features are currently unsupported since OpenVINO does not cover tensors of the `string` type and operations with them.
+
+Preparing an Example of Wide and Deep Model
+###########################################
 
 **Step 1**. Clone the GitHub repository with TensorFlow models and move to the directory with an example of Wide and Deep model:
 
-```sh
-git clone https://github.com/tensorflow/models.git --branch r2.2.0;
-cd official/r1/wide_deep
-```
+.. code-block:: sh
+
+   git clone https://github.com/tensorflow/models.git --branch r2.2.0;
+   cd official/r1/wide_deep
+
 
 The Wide and Deep model is no longer in the master branch of the repository but is still available in the r2.2.0 branch.
 
@@ -25,109 +42,114 @@ The Wide and Deep model is no longer in the master branch of the repository but
 **Step 2**. Train the model
 
 As the OpenVINO&trade; toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model
-by changing the `build_model_columns()` function in `census_dataset.py` as follows:
-
-```python
-def build_model_columns():
-  """Builds a set of wide and deep feature columns."""
-  # Continuous variable columns
-  age = tf.feature_column.numeric_column('age')
-  education_num = tf.feature_column.numeric_column('education_num')
-  capital_gain = tf.feature_column.numeric_column('capital_gain')
-  capital_loss = tf.feature_column.numeric_column('capital_loss')
-  hours_per_week = tf.feature_column.numeric_column('hours_per_week')
-  education = tf.feature_column.categorical_column_with_vocabulary_list(
-      'education', [
-          'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
-          'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
-          '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
-  marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
-      'marital_status', [
-          'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
-          'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
-  relationship = tf.feature_column.categorical_column_with_vocabulary_list(
-      'relationship', [
-          'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
-          'Other-relative'])
-  workclass = tf.feature_column.categorical_column_with_vocabulary_list(
-      'workclass', [
-          'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
-          'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
-  # To show an example of hashing:
-  #occupation = tf.feature_column.categorical_column_with_hash_bucket(
-  #    'occupation', hash_bucket_size=_HASH_BUCKET_SIZE)
-  # Transformations.
-  age_buckets = tf.feature_column.bucketized_column(
-      age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
-  # Wide columns and deep columns.
-  base_columns = [
-      education, marital_status, relationship, workclass,
-      age_buckets,
-  ]
-  crossed_columns = []
-  wide_columns = base_columns + crossed_columns
-  deep_columns = [
-      age,
-      education_num,
-      capital_gain,
-      capital_loss,
-      hours_per_week,
-      tf.feature_column.indicator_column(workclass),
-      tf.feature_column.indicator_column(education),
-      tf.feature_column.indicator_column(marital_status),
-      tf.feature_column.indicator_column(relationship),
-      # To show an example of embedding
-  ]
-  return wide_columns, deep_columns
-```
+by changing the ``build_model_columns()`` function in `census_dataset.py` as follows:
+
+.. code-block:: python
+
+    def build_model_columns():
+    """Builds a set of wide and deep feature columns."""
+    # Continuous variable columns
+    age = tf.feature_column.numeric_column('age')
+    education_num = tf.feature_column.numeric_column('education_num')
+    capital_gain = tf.feature_column.numeric_column('capital_gain')
+    capital_loss = tf.feature_column.numeric_column('capital_loss')
+    hours_per_week = tf.feature_column.numeric_column('hours_per_week')
+    education = tf.feature_column.categorical_column_with_vocabulary_list(
+        'education', [
+            'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
+            'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
+            '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
+    marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
+        'marital_status', [
+            'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
+            'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
+    relationship = tf.feature_column.categorical_column_with_vocabulary_list(
+        'relationship', [
+            'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
+            'Other-relative'])
+    workclass = tf.feature_column.categorical_column_with_vocabulary_list(
+        'workclass', [
+            'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
+            'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
+    # To show an example of hashing:
+    #occupation = tf.feature_column.categorical_column_with_hash_bucket(
+    #    'occupation', hash_bucket_size=_HASH_BUCKET_SIZE)
+    # Transformations.
+    age_buckets = tf.feature_column.bucketized_column(
+        age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
+    # Wide columns and deep columns.
+    base_columns = [
+        education, marital_status, relationship, workclass,
+        age_buckets,
+    ]
+    crossed_columns = []
+    wide_columns = base_columns + crossed_columns
+    deep_columns = [
+        age,
+        education_num,
+        capital_gain,
+        capital_loss,
+        hours_per_week,
+        tf.feature_column.indicator_column(workclass),
+        tf.feature_column.indicator_column(education),
+        tf.feature_column.indicator_column(marital_status),
+        tf.feature_column.indicator_column(relationship),
+        # To show an example of embedding
+    ]
+    return wide_columns, deep_columns
 
 After that, start training with the following command:
 
-```sh
-python census_main.py
-```
+.. code-block:: sh
+
+   python census_main.py
 
-## Converting the Wide and Deep Model to IR
+
+Converting the Wide and Deep Model to IR
+########################################
 
 Use the following command line to convert the saved model file with the checkpoint:
 
-```sh
- mo
---input_checkpoint checkpoint --input_meta_graph model.ckpt.meta
---input "IteratorGetNext:0[2],
-         IteratorGetNext:1[2],
-         IteratorGetNext:2[2],
-         IteratorGetNext:4[2],
-         IteratorGetNext:7[2],
-         linear/linear_model/linear_model/linear_model/education/to_sparse_input/indices:0[10,2]{i64},
-         linear/linear_model/linear_model/linear_model/education/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         linear/linear_model/linear_model/linear_model/education/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/indices:0[10,2]{i64},
-         linear/linear_model/linear_model/linear_model/marital_status/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/indices:0[10,2]{i64},
-         linear/linear_model/linear_model/linear_model/relationship/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/indices:0[10,2]{i64},
-         linear/linear_model/linear_model/linear_model/workclass/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/indices:0[10,2]{i64},
-         dnn/input_from_feature_columns/input_layer/education_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/indices:0[10,2]{i64},
-         dnn/input_from_feature_columns/input_layer/marital_status_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/indices:0[10,2]{i64},
-         dnn/input_from_feature_columns/input_layer/relationship_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
-         dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/indices:0[10,2]{i64},
-         dnn/input_from_feature_columns/input_layer/workclass_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50]"
---output head/predictions/probabilities
-```
-
-The model contains operations unsupported by the OpenVINO&trade; toolkit such as `IteratorGetNext` and `LookupTableFindV2`, so the Model Optimizer must prune these nodes.
-The pruning is specified through `--input` option. The prunings for `IteratorGetNext:*` nodes correspond to numeric features.
-The pruning for each categorical feature consists of three prunings for the following nodes: `*/to_sparse_input/indices:0`, `*/hash_table_Lookup/LookupTableFindV2:0`, and `*/to_sparse_input/dense_shape:0`.
+.. code-block:: sh
+
+    mo
+    --input_checkpoint checkpoint --input_meta_graph model.ckpt.meta
+    --input "IteratorGetNext:0[2],
+            IteratorGetNext:1[2],
+            IteratorGetNext:2[2],
+            IteratorGetNext:4[2],
+            IteratorGetNext:7[2],
+            linear/linear_model/linear_model/linear_model/education/to_sparse_input/indices:0[10,2]{i64},
+            linear/linear_model/linear_model/linear_model/education/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            linear/linear_model/linear_model/linear_model/education/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/indices:0[10,2]{i64},
+            linear/linear_model/linear_model/linear_model/marital_status/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/indices:0[10,2]{i64},
+            linear/linear_model/linear_model/linear_model/relationship/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/indices:0[10,2]{i64},
+            linear/linear_model/linear_model/linear_model/workclass/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/indices:0[10,2]{i64},
+            dnn/input_from_feature_columns/input_layer/education_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/indices:0[10,2]{i64},
+            dnn/input_from_feature_columns/input_layer/marital_status_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/indices:0[10,2]{i64},
+            dnn/input_from_feature_columns/input_layer/relationship_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50],
+            dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/indices:0[10,2]{i64},
+            dnn/input_from_feature_columns/input_layer/workclass_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
+            dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50]"
+    --output head/predictions/probabilities
+
+
+The model contains operations unsupported by the OpenVINO&trade; toolkit such as ``IteratorGetNext`` and ``LookupTableFindV2``, so the Model Optimizer must prune these nodes.
+The pruning is specified through `--input` option. The prunings for ``IteratorGetNext:*`` nodes correspond to numeric features.
+The pruning for each categorical feature consists of three prunings for the following nodes: ``*/to_sparse_input/indices:0``, ``*/hash_table_Lookup/LookupTableFindV2:0``, and ``*/to_sparse_input/dense_shape:0``.
 
 The above command line generates an OpenVINO model for a batch of two objects, with the total number of actual categorical feature values equal to 10 and maximum size of a sparse categorical feature for one object equal to 50.
+
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
index 1f0cf82bb83ea5..e85d952770bb9f 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
@@ -1,29 +1,35 @@
 # Converting a TensorFlow XLNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow}
 
+@sphinxdirective
+
 Pretrained models for XLNet (Bidirectional Encoder Representations from Transformers) are
-[publicly available](https://github.com/zihangdai/xlnet).
+`publicly available <https://github.com/zihangdai/xlnet>`__.
+
+Supported Models
+################
 
-## Supported Models
+The following models from the pretrained `XLNet model list <https://github.com/zihangdai/xlnet#pre-trained-models>`__ are currently supported:
 
-The following models from the pretrained [XLNet model list](https://github.com/zihangdai/xlnet#pre-trained-models) are currently supported:
+* `XLNet-Large, Cased <https://storage.googleapis.com/xlnet/released_models/cased_L-24_H-1024_A-16.zip>`__
+* `XLNet-Base, Cased <https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip>`__
 
-* [XLNet-Large, Cased](https://storage.googleapis.com/xlnet/released_models/cased_L-24_H-1024_A-16.zip)
-* [XLNet-Base, Cased](https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip)
+Downloading the Pretrained Base XLNet Model
+###########################################
 
-## Downloading the Pretrained Base XLNet Model
+Download and unzip an archive with the `XLNet-Base, Cased <https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip>`__.
 
-Download and unzip an archive with the [XLNet-Base, Cased](https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip).
+After the archive is unzipped, the directory ``cased_L-12_H-768_A-12`` is created and contains the following files:
 
-After the archive is unzipped, the directory `cased_L-12_H-768_A-12` is created and contains the following files:
-* TensorFlow checkpoint (`xlnet_model.ckpt`), containing the pretrained weights (which is actually 3 files)
-* sentence piece model (`spiece.model`) used for (de)tokenization
-* config file (`xlnet_config.json`), which specifies the hyperparameters of the model
+* TensorFlow checkpoint (``xlnet_model.ckpt``), containing the pretrained weights (which is actually 3 files)
+* sentence piece model (``spiece.model``) used for (de)tokenization
+* config file (``xlnet_config.json``), which specifies the hyperparameters of the model
 
 To get pb-file from the archive contents, you need to do the following.
 
 1. Run commands
 
-   ```sh
+   .. code-block:: sh
+   
       cd ~
       mkdir XLNet-Base
       cd XLNet-Base
@@ -31,84 +37,84 @@ To get pb-file from the archive contents, you need to do the following.
       wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip
       unzip cased_L-12_H-768_A-12.zip
       mkdir try_save
-   ```
-
-
-
-2. Save and run the following Python script in `~/XLNet-Base/xlnet`:
-
-   > **NOTE**: The original model repository has been tested with TensorFlow 1.13.1 under Python2.
-
-   ```python
-   from collections import namedtuple
-   
-   import tensorflow as tf
-   from tensorflow.python.framework import graph_io
-   
-   import model_utils
-   import xlnet
-
-   LENGTHS = 50
-   BATCH = 1
-   OUTPUT_DIR = '~/XLNet-Base/try_save/'
-   INIT_CKPT_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_model.ckpt'
-   XLNET_CONFIG_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_config.json'
    
-   FLags = namedtuple('FLags', 'use_tpu init_checkpoint')
-   FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH)
-   
-   xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH)
-   run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,)
-   
-   
-   sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids')
-   sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids')
-   sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask')
-   
-   with tf.compat.v1.Session() as sess:
-       xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config,
-                                      input_ids=sentence_features_input_idx,
-                                      seg_ids=sentence_features_segment_ids,
-                                      input_mask=sentence_features_input_mask)
-   
-       sess.run(tf.compat.v1.global_variables_initializer())
-       model_utils.init_from_checkpoint(FLAGS, True)
-   
-       # Save the variables to disk.
-       saver = tf.compat.v1.train.Saver()
-   
-       # Saving checkpoint
-       save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt")
-   
-       # Freezing model
-       outputs = ['model/transformer/dropout_2/Identity']
-       graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
-   
-       # Saving non-frozen and frozen  model to pb
-       graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False)
-       graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb',
-                            as_text=False)
-   
-       # Write to tensorboard
-       with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer:
-           writer.flush()
-   ```
-
-## Downloading the Pretrained Large XLNet Model
 
-Download and unzip an archive with the [XLNet-Large, Cased](https://storage.googleapis.com/xlnet/released_models/cased_L-24_H-1024_A-16.zip).
-
-After unzipping the archive, the directory `cased_L-12_H-1024_A-16` is created and contains the following files:
-
-* TensorFlow checkpoint (`xlnet_model.ckpt`) containing the pretrained weights (which is actually 3 files)
-* sentence piece model (`spiece.model`) used for (de)tokenization
-* config file (`xlnet_config.json`) which specifies the hyperparameters of the model
+2. Save and run the following Python script in `~/XLNet-Base/xlnet`:
 
-To get `pb-file` from the archive contents, follow the instructions below:
+   .. note:: The original model repository has been tested with TensorFlow 1.13.1 under Python2.
+
+   .. code-block:: python
+
+      from collections import namedtuple
+      
+      import tensorflow as tf
+      from tensorflow.python.framework import graph_io
+      
+      import model_utils
+      import xlnet
+
+      LENGTHS = 50
+      BATCH = 1
+      OUTPUT_DIR = '~/XLNet-Base/try_save/'
+      INIT_CKPT_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_model.ckpt'
+      XLNET_CONFIG_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_config.json'
+      
+      FLags = namedtuple('FLags', 'use_tpu init_checkpoint')
+      FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH)
+      
+      xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH)
+      run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,)
+      
+      
+      sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids')
+      sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids')
+      sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask')
+      
+      with tf.compat.v1.Session() as sess:
+         xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config,
+                                       input_ids=sentence_features_input_idx,
+                                       seg_ids=sentence_features_segment_ids,
+                                       input_mask=sentence_features_input_mask)
+      
+         sess.run(tf.compat.v1.global_variables_initializer())
+         model_utils.init_from_checkpoint(FLAGS, True)
+      
+         # Save the variables to disk.
+         saver = tf.compat.v1.train.Saver()
+      
+         # Saving checkpoint
+         save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt")
+      
+         # Freezing model
+         outputs = ['model/transformer/dropout_2/Identity']
+         graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
+      
+         # Saving non-frozen and frozen  model to pb
+         graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False)
+         graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb',
+                              as_text=False)
+      
+         # Write to tensorboard
+         with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer:
+            writer.flush()
+ 
+Downloading the Pretrained Large XLNet Model
+############################################
+
+Download and unzip an archive with the `XLNet-Base, Cased <https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip>`__.
+
+After unzipping the archive, the directory ``cased_L-12_H-1024_A-16`` is created and contains the following files:
+
+* TensorFlow checkpoint (``xlnet_model.ckpt``) containing the pretrained weights (which is actually 3 files)
+* sentence piece model (``spiece.model``) used for (de)tokenization
+* config file (``xlnet_config.json``) which specifies the hyperparameters of the model
+
+To get ``pb-file`` from the archive contents, follow the instructions below:
 
 1. Run commands
 
-   ```sh
+   .. code-block:: sh
+
       cd ~
       mkdir XLNet-Large
       cd XLNet-Large
@@ -116,74 +122,76 @@ To get `pb-file` from the archive contents, follow the instructions below:
       wget https://storage.googleapis.com/xlnet/released_models/cased_L-24_H-1024_A-16.zip
       unzip cased_L-24_H-1024_A-16.zip
       mkdir try_save
-   ```
-
-2. Save and run the following Python script in `~/XLNet-Large/xlnet`:
-
-   ```python
-   from collections import namedtuple
-   
-   import tensorflow as tf
-   from tensorflow.python.framework import graph_io
-   
-   import model_utils
-   import xlnet
-   
-   LENGTHS = 50
-   BATCH = 1
-   OUTPUT_DIR = '~/XLNet-Large/try_save'
-   INIT_CKPT_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_model.ckpt'
-   XLNET_CONFIG_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_config.json'
-   
-   FLags = namedtuple('FLags', 'use_tpu init_checkpoint')
-   FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH)
-   
-   xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH)
-   run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,)
    
-   
-   sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids')
-   sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids')
-   sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask')
-   
-   with tf.compat.v1.Session() as sess:
-       xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config,
-                                      input_ids=sentence_features_input_idx,
-                                      seg_ids=sentence_features_segment_ids,
-                                      input_mask=sentence_features_input_mask)
-   
-       sess.run(tf.compat.v1.global_variables_initializer())
-       model_utils.init_from_checkpoint(FLAGS, True)
-   
-       # Save the variables to disk.
-       saver = tf.compat.v1.train.Saver()
-   
-       # Saving checkpoint
-       save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt")
-   
-       # Freezing model
-       outputs = ['model/transformer/dropout_2/Identity']
-       graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
-   
-       # Saving non-frozen and frozen  model to pb
-       graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False)
-       graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb',
-                            as_text=False)
-   
-       # Write to tensorboard
-       with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer:
-           writer.flush()
-   ```
-
 
-The script should save into `~/XLNet-Large/xlnet`.
-
-## Converting a frozen TensorFlow XLNet Model to IR
+2. Save and run the following Python script in ``~/XLNet-Large/xlnet``:
+
+   .. code-block:: python
+
+      from collections import namedtuple
+      
+      import tensorflow as tf
+      from tensorflow.python.framework import graph_io
+      
+      import model_utils
+      import xlnet
+      
+      LENGTHS = 50
+      BATCH = 1
+      OUTPUT_DIR = '~/XLNet-Large/try_save'
+      INIT_CKPT_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_model.ckpt'
+      XLNET_CONFIG_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_config.json'
+      
+      FLags = namedtuple('FLags', 'use_tpu init_checkpoint')
+      FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH)
+      
+      xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH)
+      run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,)
+      
+      
+      sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids')
+      sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids')
+      sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask')
+      
+      with tf.compat.v1.Session() as sess:
+         xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config,
+                                       input_ids=sentence_features_input_idx,
+                                       seg_ids=sentence_features_segment_ids,
+                                       input_mask=sentence_features_input_mask)
+      
+         sess.run(tf.compat.v1.global_variables_initializer())
+         model_utils.init_from_checkpoint(FLAGS, True)
+      
+         # Save the variables to disk.
+         saver = tf.compat.v1.train.Saver()
+      
+         # Saving checkpoint
+         save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt")
+      
+         # Freezing model
+         outputs = ['model/transformer/dropout_2/Identity']
+         graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
+      
+         # Saving non-frozen and frozen  model to pb
+         graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False)
+         graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb',
+                              as_text=False)
+      
+         # Write to tensorboard
+         with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer:
+            writer.flush()
+
+
+The script should save into ``~/XLNet-Large/xlnet``.
+
+Converting a frozen TensorFlow XLNet Model to IR
+#################################################
 
 To generate the XLNet Intermediate Representation (IR) of the model, run Model Optimizer with the following parameters:
 
-```sh
-mo --input_model path-to-model/model_frozen.pb \
-   --input "input_mask[50,1],input_ids[50,1],seg_ids[50,1]"
-```
+.. code-block:: sh
+
+   mo --input_model path-to-model/model_frozen.pb \
+      --input "input_mask[50,1],input_ids[50,1],seg_ids[50,1]"
 
+@endsphinxdirective
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md
index 5c441717b02f8b..7cd7ccf23c4194 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md
@@ -1,100 +1,121 @@
 # Converting a TensorFlow Language Model on One Billion Word Benchmark {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow}
 
-## Downloading a Pre-trained Language Model on One Billion Word Benchmark
+@sphinxdirective
 
-TensorFlow provides a pretrained [Language Model on One Billion Word Benchmark](https://github.com/tensorflow/models/tree/r2.3.0/research/lm_1b).
+Downloading a Pre-trained Language Model on One Billion Word Benchmark
+######################################################################
+
+TensorFlow provides a pretrained `Language Model on One Billion Word Benchmark <https://github.com/tensorflow/models/tree/r2.3.0/research/lm_1b>`__.
 
 To download the model for IR conversion, follow the instructions:
+
 1. Create new directory to store the model:
-```shell
-mkdir lm_1b
-```
-2. Go to the `lm_1b` directory:
-```shell
-cd lm_1b
-```
+
+.. code-block:: shell
+
+   mkdir lm_1b
+
+2. Go to the ``lm_1b`` directory:
+
+.. code-block:: shell
+
+   cd lm_1b
+
 3. Download the model GraphDef file:
-```
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/graph-2016-09-10.pbtxt
-```
+
+.. code-block:: shell
+
+   wget http://download.tensorflow.org/models/LM_LSTM_CNN/graph-2016-09-10.pbtxt
+
 4. Create new directory to store 12 checkpoint shared files:
-```shell
-mkdir ckpt
-```
-5. Go to the `ckpt` directory:
-```shell
-cd ckpt
-```
+
+.. code-block:: shell
+
+   mkdir ckpt
+
+5. Go to the ``ckpt`` directory:
+
+.. code-block:: shell
+
+   cd ckpt
+
 6. Download 12 checkpoint shared files:
-```
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-base
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-char-embedding
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-lstm
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax0
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax1
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax2
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax3
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax4
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax5
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax6
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax7
-wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax8
-```
-
-Once you have downloaded the pretrained model files, you will have the `lm_1b` directory with the following hierarchy:
-
-```
-lm_1b/
-    graph-2016-09-10.pbtxt
-    ckpt/
-        ckpt-base
-        ckpt-char-embedding
-        ckpt-lstm
-        ckpt-softmax0
-        ckpt-softmax1
-        ckpt-softmax2
-        ckpt-softmax3
-        ckpt-softmax4
-        ckpt-softmax5
-        ckpt-softmax6
-        ckpt-softmax7
-        ckpt-softmax8
-```
-
-
-![lm_1b model view](../../../img/lm_1b.svg)
-
-The frozen model still has two variables: `Variable` and `Variable_1`.
+
+.. code-block:: shell
+  
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-base
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-char-embedding
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-lstm
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax0
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax1
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax2
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax3
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax4
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax5
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax6
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax7
+    wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax8
+
+
+Once you have downloaded the pretrained model files, you will have the ``lm_1b`` directory with the following hierarchy:
+
+.. code-block:: shell
+
+    lm_1b/
+        graph-2016-09-10.pbtxt
+        ckpt/
+            ckpt-base
+            ckpt-char-embedding
+            ckpt-lstm
+            ckpt-softmax0
+            ckpt-softmax1
+            ckpt-softmax2
+            ckpt-softmax3
+            ckpt-softmax4
+            ckpt-softmax5
+            ckpt-softmax6
+            ckpt-softmax7
+            ckpt-softmax8
+
+
+
+.. image:: ./_static/images/lm_1b.svg
+
+The frozen model still has two variables: ``Variable`` and ``Variable_1``.
 It means that the model keeps training those variables at each inference.
 
 At the first inference of this graph, the variables are initialized by initial values.
-After executing the `lstm` nodes, results of execution are assigned to these two variables.
+After executing the ``lstm`` nodes, results of execution are assigned to these two variables.
 
-With each inference of the `lm_1b` graph, `lstm` initial states data is taken from previous inference
-from variables, and states of current inference of `lstm` is reassigned to the same variables.
+With each inference of the ``lm_1b`` graph, ``lstm`` initial states data is taken from previous inference
+from variables, and states of current inference of ``lstm`` is reassigned to the same variables.
 
 It helps the model to remember the context of the words that it takes as input.
 
-## Converting a TensorFlow Language Model on One Billion Word Benchmark to IR
+Converting a TensorFlow Language Model on One Billion Word Benchmark to IR
+##########################################################################
 
 Model Optimizer assumes that output model is for inference only.
 Therefore, you should cut those variables off and resolve keeping cell and hidden states on application level.
 
 There is a certain limitation for the model conversion: the original model cannot be reshaped, so you should keep original shapes.
 
-To generate the `lm_1b` Intermediate Representation (IR), provide TensorFlow `lm_1b` model to the
+To generate the ``lm_1b`` Intermediate Representation (IR), provide TensorFlow ``lm_1b`` model to the
 Model Optimizer with parameters:
-```sh
- mo
---input_model lm_1b/graph-2016-09-10.pbtxt  \
---input_checkpoint lm_1b/ckpt               \
---input_model_is_text                       \
---input_shape [50],[50],[1,9216],[1,9216]    \
---output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2 \
---input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read
-```
+
+.. code-block:: shell
+
+    mo
+    --input_model lm_1b/graph-2016-09-10.pbtxt  \
+    --input_checkpoint lm_1b/ckpt               \
+    --input_model_is_text                       \
+    --input_shape [50],[50],[1,9216],[1,9216]    \
+    --output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2 \
+    --input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read
 
 Where:
-* `--input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read`
- and `--input_shape [50],[50],[1,9216],[1,9216]` replace the variables with a placeholder.
-* `--output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2` specifies output node name and names of LSTM cell states.
+
+* ``--input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read`` and ``--input_shape [50],[50],[1,9216],[1,9216]`` replace the variables with a placeholder.
+* ``--output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2`` specifies output node name and names of LSTM cell states.
+
+@endsphinxdirective
diff --git a/docs/_static/images/DeepSpeech-0.8.2.png b/docs/_static/images/DeepSpeech-0.8.2.png
new file mode 100644
index 00000000000000..ddab04ac34ac29
--- /dev/null
+++ b/docs/_static/images/DeepSpeech-0.8.2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdff3768930f683b81ca466be4f947af3172933a702cd38201a254df27a68556
+size 62498
diff --git a/docs/_static/images/FaceNet.svg b/docs/_static/images/FaceNet.svg
new file mode 100644
index 00000000000000..89927473829898
--- /dev/null
+++ b/docs/_static/images/FaceNet.svg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2720b6d3b5e680978a91379c8c37366285299aab31aa139ad9abea8334aae34
+size 57687
diff --git a/docs/_static/images/NCF_start.svg b/docs/_static/images/NCF_start.svg
new file mode 100644
index 00000000000000..345d9561721cff
--- /dev/null
+++ b/docs/_static/images/NCF_start.svg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c0389fe34562993b1285f1994dbc878e9547a841c903bf204074ed2219b6bc7
+size 323210
diff --git a/docs/_static/images/lm_1b.svg b/docs/_static/images/lm_1b.svg
new file mode 100644
index 00000000000000..df8075c041624f
--- /dev/null
+++ b/docs/_static/images/lm_1b.svg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:062fa64afa0cc43c4a2c2c0442e499b6176c837857222af30bad2fa7c9515420
+size 95508

From d4b394c1b64998f8d5a1942bbce07c9c8a7025d8 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <mateusz.bencer@intel.com>
Date: Tue, 4 Apr 2023 14:50:34 +0200
Subject: [PATCH 236/296] Skip Reduce* ops layer tests due to ORT error
 (#16730)

* skip reduce mean

* skip other reduce ops
---
 tests/layer_tests/onnx_tests/test_reduce.py    | 6 ++++++
 tests/layer_tests/onnx_tests/test_reduce_lp.py | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/tests/layer_tests/onnx_tests/test_reduce.py b/tests/layer_tests/onnx_tests/test_reduce.py
index eef97ce56eb1cd..f445bb55a10385 100644
--- a/tests/layer_tests/onnx_tests/test_reduce.py
+++ b/tests/layer_tests/onnx_tests/test_reduce.py
@@ -124,6 +124,7 @@ def create_reduce(self, shape, reshapped_shape, op, axes, keep_dims, ir_version)
     @pytest.mark.parametrize("params", test_data_precommit)
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMax, ticket: 107652')
     def test_reduce_max_precommit(self, params, keep_dims, ie_device, precision, ir_version,
                                   temp_dir, use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMax', keep_dims=keep_dims,
@@ -133,6 +134,7 @@ def test_reduce_max_precommit(self, params, keep_dims, ie_device, precision, ir_
     @pytest.mark.parametrize("params", test_data)
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.nightly
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMax, ticket: 107652')
     def test_reduce_max(self, params, keep_dims, ie_device, precision, ir_version, temp_dir, use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMax', keep_dims=keep_dims,
                                        ir_version=ir_version),
@@ -160,6 +162,7 @@ def test_reduce_prod(self, params, keep_dims, ie_device, precision, ir_version,
     @pytest.mark.parametrize("params", test_data_precommit)
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMean, ticket: 107652')
     def test_reduce_mean_precommit(self, params, keep_dims, ie_device, precision, ir_version,
                                    temp_dir, use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMean', keep_dims=keep_dims,
@@ -170,6 +173,7 @@ def test_reduce_mean_precommit(self, params, keep_dims, ie_device, precision, ir
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMean, ticket: 107652')
     def test_reduce_mean(self, params, keep_dims, ie_device, precision, ir_version, temp_dir,
                          use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMean', keep_dims=keep_dims,
@@ -179,6 +183,7 @@ def test_reduce_mean(self, params, keep_dims, ie_device, precision, ir_version,
     @pytest.mark.parametrize("params", test_data_precommit)
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMin, ticket: 107652')
     def test_reduce_min_precommit(self, params, keep_dims, ie_device, precision, ir_version,
                                   temp_dir, use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMin', keep_dims=keep_dims,
@@ -188,6 +193,7 @@ def test_reduce_min_precommit(self, params, keep_dims, ie_device, precision, ir_
     @pytest.mark.parametrize("params", test_data)
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.nightly
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceMin, ticket: 107652')
     def test_reduce_min(self, params, keep_dims, ie_device, precision, ir_version, temp_dir, use_old_api):
         self._test(*self.create_reduce(**params, op='ReduceMin', keep_dims=keep_dims,
                                        ir_version=ir_version),
diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py
index 6bd360cab8f985..28ee005ee93484 100644
--- a/tests/layer_tests/onnx_tests/test_reduce_lp.py
+++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py
@@ -220,6 +220,7 @@ def create_reduce_lp_const(self, shape, axes, keep_dims, reduce_p, ir_version):
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.parametrize("reduce_p", [1, 2])
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceL1/ReduceL2, ticket: 107652')
     def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, precision,
                                  ir_version, temp_dir, use_old_api):
         self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p,
@@ -230,6 +231,7 @@ def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, preci
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.parametrize("reduce_p", [1, 2])
     @pytest.mark.nightly
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceL1/ReduceL2, ticket: 107652')
     def test_reduce_lp(self, params, keep_dims, reduce_p, ie_device, precision, ir_version,
                        temp_dir, use_old_api):
         if ie_device == 'GPU':
@@ -242,6 +244,7 @@ def test_reduce_lp(self, params, keep_dims, reduce_p, ie_device, precision, ir_v
     @pytest.mark.parametrize("keep_dims", [True, False])
     @pytest.mark.parametrize("reduce_p", [1, 2])
     @pytest.mark.precommit
+    @pytest.mark.skip(reason='ONNX Runtime error: Error Unrecognized attribute: axes for operator ReduceL1/ReduceL2, ticket: 107652')
     def test_reduce_lp_const_precommit(self, params, keep_dims, reduce_p, ie_device, precision,
                                        ir_version, temp_dir, use_old_api):
         self._test(

From c034975183a2393789cac3bc03ddc182808d530b Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 4 Apr 2023 18:46:12 +0400
Subject: [PATCH 237/296] [TF FE] Fix layer tests for BatchToSpace and add to
 the pre-commit (#16722)

* [TF FE] Fix layer tests for BatchToSpace and add to the pre-commit

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Specify type for batch_shape

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../tensorflow_tests/test_tf_BatchToSpace.py  | 70 +++++++------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py b/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py
index 2dcfc53949f0ba..0d1b2c5d4f05d9 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py
@@ -7,77 +7,61 @@
 
 
 class TestBatchToSpace(CommonTFLayerTest):
-    def create_batch_to_space_net(self, in_shape, crops_value, block_shape_value, out_shape,
-                                  ir_version, use_new_frontend):
-        """
-            Tensorflow net                     IR net
-
-            Input->BatchToSpace        =>      Input->BatchToSpace
-
-        """
-
+    def create_batch_to_space_net(self, in_shape, crops_value, block_shape_value):
         import tensorflow as tf
-
         tf.compat.v1.reset_default_graph()
 
         # Create the graph and model
         with tf.compat.v1.Session() as sess:
             x = tf.compat.v1.placeholder(tf.float32, in_shape, 'Input')
-            crops = tf.constant(crops_value)
-            block_shape = tf.constant(block_shape_value)
-            tf.compat.v1.batch_to_space(x, crops, block_shape, name='Operation')
+            crops = tf.constant(crops_value, dtype=tf.int32)
+            block_shape = tf.constant(block_shape_value, dtype=tf.int32)
+            tf.batch_to_space(input=x, block_shape=block_shape, crops=crops, name='Operation')
             tf.compat.v1.global_variables_initializer()
             tf_net = sess.graph_def
 
-        #
-        #   Create reference IR net
-        #   Please, specify 'type': 'Input' for input node
-        #   Moreover, do not forget to validate ALL layer attributes!!!
-        #
+        return tf_net, None
 
-        ref_net = None
+    test_data_basic = [
+        dict(in_shape=[4, 1, 1, 3], block_shape_value=[1], crops_value=[[0, 0]]),
+        dict(in_shape=[12, 1, 1, 3], block_shape_value=[3, 1, 4], crops_value=[[1, 0], [0, 0], [1, 1]]),
+        dict(in_shape=[72, 2, 1, 4, 2], block_shape_value=[3, 4, 2],
+             crops_value=[[1, 2], [0, 0], [3, 0]]),
+    ]
 
-        return tf_net, ref_net
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_batch_to_space_basic(self, params, ie_device, precision, ir_version, temp_dir,
+                                  use_new_frontend, use_old_api):
+        self._test(*self.create_batch_to_space_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_new_frontend=use_new_frontend, use_old_api=use_old_api)
 
     test_data_4D = [
-        dict(in_shape=[4, 1, 1, 3], block_shape_value=[1], crops_value=[[0, 0]],
-             out_shape=[4, 1, 1, 3]),
-        dict(in_shape=[4, 1, 1, 3], block_shape_value=[2, 2], crops_value=[[0, 0], [0, 0]],
-             out_shape=[1, 2, 2, 3]),
-        dict(in_shape=[60, 100, 30, 30], block_shape_value=[3, 2], crops_value=[[1, 5], [4, 1]],
-             out_shape=[2, 2, 1, 1]),
-        # todo: enable these tests after supporting the general case on CPU
-        # dict(in_shape=[4, 1, 1, 1], block_shape_value=[2, 1, 2], crops_value=[[0, 0], [0, 0], [0, 0]],
-        #      out_shape=[]),
-        # dict(in_shape=[12, 1, 1, 3], block_shape_value=[3, 2, 2], crops_value=[[1, 0], [0, 1], [1, 1]],
-        #      out_shape=[1, 2, 1, 4]),
-        # dict(in_shape=[36, 2, 2, 3], block_shape_value=[2, 3, 3], crops_value=[[1, 0], [0, 0], [2, 2]],
-        #      out_shape=[2, 3, 6, 5])
+        dict(in_shape=[4, 1, 1, 3], block_shape_value=[2, 2], crops_value=[[0, 0], [0, 0]]),
+        dict(in_shape=[60, 100, 30, 30], block_shape_value=[3, 2], crops_value=[[1, 5], [4, 1]]),
+        dict(in_shape=[4, 1, 1, 1], block_shape_value=[2, 1, 2], crops_value=[[0, 0], [0, 0], [0, 0]]),
+        dict(in_shape=[36, 2, 2, 3], block_shape_value=[2, 3, 3], crops_value=[[1, 0], [0, 0], [2, 2]])
     ]
 
     @pytest.mark.parametrize("params", test_data_4D)
     @pytest.mark.nightly
     def test_batch_to_space_4D(self, params, ie_device, precision, ir_version, temp_dir,
                                use_new_frontend, use_old_api):
-        self._test(*self.create_batch_to_space_net(**params, ir_version=ir_version,
-                                                   use_new_frontend=use_new_frontend),
+        self._test(*self.create_batch_to_space_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_new_frontend=use_new_frontend, use_old_api=use_old_api)
 
     test_data_5D = [
-        dict(in_shape=[72, 2, 1, 4, 2], block_shape_value=[3, 4, 2],
-             crops_value=[[1, 2], [0, 0], [3, 0]],
-             out_shape=[3, 3, 4, 5, 2]),
-        # todo: enable these tests after supporting the general case on CPU
-        # dict(in_shape=[144, 2, 1, 4, 1], block_shape_value=[3, 4, 2, 2],
-        #      crops_value=[[1, 2], [0, 0], [3, 0], [0, 0]], out_shape=[3, 3, 4, 5, 2]),
+        dict(in_shape=[144, 2, 1, 4, 1], block_shape_value=[3, 4, 2, 2],
+             crops_value=[[1, 2], [0, 0], [3, 0], [0, 0]]),
     ]
 
     @pytest.mark.parametrize("params", test_data_5D)
     @pytest.mark.nightly
     def test_batch_to_space_5D(self, params, ie_device, precision, ir_version, temp_dir,
                                use_new_frontend, use_old_api):
-        self._test(*self.create_batch_to_space_net(**params, ir_version=ir_version,
-                                                   use_new_frontend=use_new_frontend),
+        self._test(*self.create_batch_to_space_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_new_frontend=use_new_frontend, use_old_api=use_old_api)

From 093990118da3fa330d1a551c25e924bf5a739a6e Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Tue, 4 Apr 2023 18:51:10 +0400
Subject: [PATCH 238/296] Remove legacy TransposeSinking transformation
 (#16731)

* delete TransposeSinkingOVTF transformation

* delete include from tf_lite frontend
---
 src/frontends/tensorflow/src/frontend.cpp     |   1 -
 .../include/pass/transpose_sinking.hpp        |  24 -
 .../src/pass/transpose_sinking.cpp            | 518 --------------
 .../tests/transpose_sinking.cpp               | 645 ------------------
 .../tensorflow_lite/src/frontend.cpp          |   1 -
 5 files changed, 1189 deletions(-)
 delete mode 100644 src/frontends/tensorflow_common/include/pass/transpose_sinking.hpp
 delete mode 100644 src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
 delete mode 100644 src/frontends/tensorflow_common/tests/transpose_sinking.cpp

diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 7bda635da38fc2..55e841e9102a17 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -20,7 +20,6 @@
 #include "openvino/pass/manager.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/log.hpp"
-#include "pass/transpose_sinking.hpp"
 #include "so_extension.hpp"
 #include "tf_framework_node.hpp"
 #include "transformations/common_optimizations/reverse_shape_and_type_infer.hpp"
diff --git a/src/frontends/tensorflow_common/include/pass/transpose_sinking.hpp b/src/frontends/tensorflow_common/include/pass/transpose_sinking.hpp
deleted file mode 100644
index 9753df19b9e8eb..00000000000000
--- a/src/frontends/tensorflow_common/include/pass/transpose_sinking.hpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/pass/pass.hpp"
-
-namespace ov {
-namespace frontend {
-namespace tensorflow {
-namespace pass {
-
-class TransposeSinking : public ov::pass::ModelPass {
-public:
-    OPENVINO_RTTI("ov::frontend::tensorflow::pass::TransposeSinking");
-    TransposeSinking() = default;
-    bool run_on_model(const std::shared_ptr<ov::Model>& function) override;
-};
-
-}  // namespace pass
-}  // namespace tensorflow
-}  // namespace frontend
-}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp b/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
deleted file mode 100644
index 5b8c5ba3a8b80e..00000000000000
--- a/src/frontends/tensorflow_common/src/pass/transpose_sinking.cpp
+++ /dev/null
@@ -1,518 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass/transpose_sinking.hpp"
-
-#include "openvino/op/util/op_types.hpp"
-#include "openvino/opsets/opset8.hpp"
-#include "openvino/pass/pattern/op/label.hpp"
-#include "openvino/util/common_util.hpp"
-#include "openvino/util/log.hpp"
-#include "utils.hpp"
-
-using namespace std;
-using namespace ov;
-using namespace ov::frontend::tensorflow;
-using namespace opset8;
-
-using TransposeMap = unordered_map<string, shared_ptr<Transpose>>;
-
-template <class T>
-static T apply_permutation(const T& input, AxisVector order) {
-    T output(input.size());
-    for (size_t i = 0; i < order.size(); i++) {
-        output[i] = input.at(order.at(i));
-    }
-    return output;
-}
-
-static AxisVector permutation_to_default_order(const AxisVector& axis_order) {
-    AxisVector out(axis_order.size());
-    for (size_t i = 0; i < axis_order.size(); i++) {
-        out.at(axis_order[i]) = i;
-    }
-    return out;
-}
-
-static AxisVector get_default_order(size_t rank) {
-    AxisVector default_order(rank);
-    std::iota(begin(default_order), end(default_order), 0);
-    return default_order;
-}
-
-static size_t get_static_rank(const Output<Node>& output) {
-    auto rank = output.get_partial_shape().rank();
-    OPENVINO_ASSERT(rank.is_static(), "Dynamic rank is not supported in TransposeSinking transformation.");
-    return rank.get_length();
-}
-
-template <typename T>
-static string describe(shared_ptr<Node> node) {
-    // ensure that it's either a reshape or a transpose
-    // TODO: use static_assert
-    if (!(std::is_base_of<Reshape, T>::value || std::is_base_of<Transpose, T>::value)) {
-        throw runtime_error("describe template specialization has to be either reshape or "
-                            "transpose");
-    }
-    stringstream ss;
-    auto transpose = as_type_ptr<T>(node);
-    auto const1 = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
-    if (transpose) {
-        ss << "transpose name: " << transpose->get_name();
-        ss << " , input = " << transpose->input_value(0).get_node()->get_name();
-        if (transpose->output(0).get_partial_shape().is_static()) {
-            ss << " , shape = " << ov::util::vector_to_string(transpose->output(0).get_shape());
-        }
-        if (const1) {
-            ss << " , axis order = " << ov::util::vector_to_string(const1->get_axis_vector_val());
-        } else {
-            ss << " , axis order = (unknown, not constant values)";
-        }
-    } else {
-        ss << "Node can not be cast to Transpose/Reshape operations.";
-    }
-    return ss.str();
-}
-
-static shared_ptr<Reshape> make_reshape(const Output<Node>& arg, const AxisVector& input_order) {
-    auto order = std::make_shared<Constant>(element::i64, Shape{input_order.size()}, input_order);
-    auto transpose = make_shared<Reshape>(arg, order, false);
-    OPENVINO_DEBUG << "Make Reshape " << describe<Reshape>(transpose);
-    return transpose;
-}
-
-static void write_transposemap(TransposeMap& reorders,
-                               const Output<Node>& target,
-                               const shared_ptr<Transpose>& transpose) {
-    auto name = target.get_node()->get_name() + "." + to_string(target.get_index());
-    OPENVINO_DEBUG << "Write TransposeMap[" << name << "] = " << describe<Transpose>(transpose);
-    reorders[name] = transpose;
-}
-
-static shared_ptr<Transpose> read_transposemap(TransposeMap& reorders, const Output<Node>& target) {
-    auto name = target.get_node()->get_name() + "." + to_string(target.get_index());
-    auto transpose = reorders.at(name);
-    OPENVINO_DEBUG << "Read TransposeMap[" << name << "]  -> " << describe<Transpose>(transpose);
-    return transpose;
-}
-
-static shared_ptr<Transpose> combine_transposes(const shared_ptr<Transpose>& t1, const shared_ptr<Transpose>& t2) {
-    auto default_order = get_default_order(get_static_rank(t1));
-    auto t1_const = as_type_ptr<Constant>(t1->input_value(1).get_node_shared_ptr());
-    auto t2_const = as_type_ptr<Constant>(t2->input_value(1).get_node_shared_ptr());
-
-    if (t1_const && t2_const) {
-        auto perm_t1 = apply_permutation(default_order, t1_const->get_axis_vector_val());
-        auto perm_t2 = apply_permutation(perm_t1, t2_const->get_axis_vector_val());
-
-        auto combined = make_transpose(t2->input_value(0), perm_t2);
-        OPENVINO_DEBUG << "Combining " << describe<Transpose>(t1) << " and " << describe<Transpose>(t2) << " into "
-                       << describe<Transpose>(combined);
-        return combined;
-    }
-    return {};
-}
-
-static void insert_transpose(const shared_ptr<Node>& target, const shared_ptr<Node>& transpose, size_t input_index) {
-    OPENVINO_DEBUG << "Inserting transpose at input " << target->get_name() << " input index " << input_index;
-    auto arg = target->input(input_index).get_source_output();
-    if (arg.get_partial_shape().is_static()) {
-        OPENVINO_DEBUG << "Arg shape: " << arg.get_shape();
-    }
-    auto new_order = as_type_ptr<Constant>(transpose->input_value(1).get_node_shared_ptr());
-    auto new_transpose = make_transpose(arg.get_node_shared_ptr(), new_order->get_axis_vector_val());
-    OPENVINO_DEBUG << "Inserting transpose " << describe<Transpose>(new_transpose) << " at input " << target->get_name()
-                   << " input index " << input_index;
-
-    target->input(input_index).replace_source_output(new_transpose->output(0));
-    if (std::dynamic_pointer_cast<Result>(target)) {
-        new_transpose->output(0).add_names(arg.get_names());
-        arg.set_names({});
-    }
-}
-
-static void delete_transpose(const shared_ptr<Node>& transpose) {
-    OPENVINO_DEBUG << "Removing transpose " << transpose->get_name();
-    if (!transpose->get_users().empty()) {
-        Output<Node> output = transpose->output(0);
-        OPENVINO_DEBUG << "output " << output.get_node_shared_ptr()->get_name();
-        OPENVINO_DEBUG << "target input size " << output.get_target_inputs().size();
-        output.replace(transpose->input_value(0));
-    }
-}
-
-static void mark_transpose_for_deletion(const shared_ptr<Node>& transpose,
-                                        set<shared_ptr<Node>>& transposes_to_delete) {
-    OPENVINO_DEBUG << "Marking transpose " << transpose->get_name() << " for deletion";
-    transposes_to_delete.insert(transpose);
-}
-
-static shared_ptr<Transpose> create_default_transpose(const Output<Node>& n) {
-    auto default_order = get_default_order(get_static_rank(n));
-    auto order = std::make_shared<Constant>(element::i64, Shape{default_order.size()}, default_order);
-    return make_shared<Transpose>(n, order);
-}
-
-// convert_binary_to_default_order is used when one of the arguments
-// of a binary op isn't in the default format (i.e. nhwc instead of nchw)
-// We normalize the "left" argument to match the order of the "right" argument
-// by either inserting a transpose or a reshape, depending on the shape of the
-// "left" argument.
-static void convert_binary_to_default_order(const shared_ptr<Node>& binary,
-                                            const Input<Node>& input,
-                                            const Output<Node>& right,
-                                            TransposeMap& reorders,
-                                            set<shared_ptr<Node>>& transposes_to_delete) {
-    auto left = input.get_source_output();
-    auto right_t = read_transposemap(reorders, right);
-    auto right_const = as_type_ptr<Constant>(right_t->input_value(1).get_node_shared_ptr());
-    auto perm_to_def = permutation_to_default_order(right_const->get_axis_vector_val());
-
-    // if right input is being implicitly broadcasted, insert a reshape
-    // instead of a transpose
-    shared_ptr<Node> new_node;
-    auto left_rank = get_static_rank(left);
-    if (left_rank < perm_to_def.size() && left.get_partial_shape().is_static()) {
-        auto left_shape = left.get_shape();
-        left_shape.insert(left_shape.begin(), perm_to_def.size() - left_rank, 1);
-
-        auto new_shape = apply_permutation(left_shape, perm_to_def);
-        new_node = make_reshape(left, new_shape);
-    } else if (left_rank == perm_to_def.size()) {
-        new_node = make_transpose(left, perm_to_def);
-    } else {
-        throw runtime_error("case not supported when converting binary to default order");
-    }
-    input.replace_source_output(new_node->output(0));
-
-    if (right.get_partial_shape().is_static()) {
-        OPENVINO_DEBUG << "right = " << ov::util::vector_to_string(right.get_shape()) << ", "
-                       << right.get_node_shared_ptr()->get_name();
-    } else {
-        OPENVINO_DEBUG << "right = "
-                       << "dynamic shape, " << right.get_node_shared_ptr()->get_name();
-    }
-    // this should now insert transpose on right
-    mark_transpose_for_deletion(right_t, transposes_to_delete);
-    write_transposemap(reorders, binary, right_t);
-}
-
-static void materialize_shapes(const shared_ptr<Node>& n,
-                               TransposeMap& reorders,
-                               set<shared_ptr<Node>>& transposes_to_delete) {
-    // For each node, create a default transpose for
-    // each of the outputs and store in the map
-    for (auto& it : n->outputs()) {
-        write_transposemap(reorders, it, create_default_transpose(it));
-    }
-
-    for (size_t i = 0; i < n->input_values().size(); i++) {
-        // materialize all pending transposes, flush pending transposes
-        auto arg = n->input_value(i);
-        auto arg_transpose = read_transposemap(reorders, arg);
-        OPENVINO_DEBUG << "Materializing " << describe<Transpose>(arg_transpose) << " for "
-                       << arg.get_node_shared_ptr()->get_name();
-        mark_transpose_for_deletion(arg_transpose, transposes_to_delete);
-        auto arg_transpose_order = as_type_ptr<Constant>(arg_transpose->input_value(1).get_node_shared_ptr());
-        if (arg_transpose_order &&
-            arg_transpose_order->get_axis_vector_val() != get_default_order(get_static_rank(arg))) {
-            // Insert if arg needs to be transposed.
-            insert_transpose(n, arg_transpose, i);
-        }
-    }
-}
-
-static bool sink_transpose(const shared_ptr<Transpose>& transpose,
-                           TransposeMap& reorders,
-                           set<shared_ptr<Node>>& transposes_to_delete) {
-    OPENVINO_DEBUG << "Sinking Transpose :" << describe<Transpose>(transpose);
-    auto transpose_in = transpose->input_value(0);
-    auto orig_transpose = read_transposemap(reorders, transpose_in);
-    // combine both transposes
-    auto new_transpose = combine_transposes(orig_transpose, transpose);
-    if (new_transpose) {
-        // remove original transpose now it's combined with a new one
-        // should be safe to remove an already detached node
-        mark_transpose_for_deletion(orig_transpose, transposes_to_delete);
-        // replace transpose with combined one
-        replace_node(transpose, new_transpose);
-        mark_transpose_for_deletion(new_transpose, transposes_to_delete);
-        write_transposemap(reorders, new_transpose, new_transpose);
-    } else {
-        // combine_transposes failed
-        // transpose remains in the graph
-        OPENVINO_DEBUG << "CombineTranspose has failed. Writing original transpose to the transpose map.";
-        return false;
-    }
-    return true;
-}
-
-static bool sink_unary(const shared_ptr<Node>& n,
-                       TransposeMap& reorders,
-                       set<shared_ptr<Node>>& /* transposes_to_delete */) {
-    auto arg_transpose = read_transposemap(reorders, n->input_value(0));
-    OPENVINO_DEBUG << "Propagating " << describe<Transpose>(arg_transpose) << " for " << n->get_name();
-    write_transposemap(reorders, n, arg_transpose);
-    return true;
-}
-
-static bool sink_binary(const shared_ptr<Node>& binary,
-                        TransposeMap& reorders,
-                        set<shared_ptr<Node>>& transposes_to_delete) {
-    auto left = binary->input_value(0);
-    auto right = binary->input_value(1);
-    auto left_t = read_transposemap(reorders, left);
-    auto right_t = read_transposemap(reorders, right);
-    auto left_const = as_type_ptr<Constant>(left_t->input_value(1).get_node_shared_ptr());
-    auto right_const = as_type_ptr<Constant>(right_t->input_value(1).get_node_shared_ptr());
-    if (!(left_const && right_const)) {
-        OPENVINO_DEBUG << "TransposeSinking failed for binary op " << binary->get_name()
-                       << "2nd inputs to Transposes must be constants.";
-        return false;
-    }
-
-    auto left_order = left_const->get_axis_vector_val();
-    auto right_order = right_const->get_axis_vector_val();
-
-    auto left_rank = get_static_rank(left);
-    auto right_rank = get_static_rank(right);
-    auto left_mismatch = left_order != get_default_order(left_rank);
-    auto right_mismatch = right_order != get_default_order(right_rank);
-
-    OPENVINO_DEBUG << "Sink binary " << binary->get_name()
-                   << " left transpose: " << ov::util::vector_to_string(left_order)
-                   << " left default: " << ov::util::vector_to_string(get_default_order(left_rank))
-                   << " right transpose: " << ov::util::vector_to_string(right_order)
-                   << " right default: " << ov::util::vector_to_string(get_default_order(right_rank));
-
-    if ((left_order.size() == right_order.size() && left_order == right_order) || (!left_mismatch && !right_mismatch)) {
-        // Propagate the reshape which matches the shape of the binary node
-        auto new_transpose = (binary->get_output_shape(0).size() == left.get_shape().size()) ? left_t : right_t;
-        OPENVINO_DEBUG << "Propagating " << describe<Transpose>(new_transpose) << " for " << binary->get_name();
-        write_transposemap(reorders, binary, new_transpose);
-        // at this point, both transposes will be eventually removed
-        mark_transpose_for_deletion(left_t, transposes_to_delete);
-        mark_transpose_for_deletion(right_t, transposes_to_delete);
-    } else {
-        try {
-            if (right_mismatch) {
-                convert_binary_to_default_order(binary, binary->input(0), right, reorders, transposes_to_delete);
-            } else {
-                if (left_mismatch) {
-                    convert_binary_to_default_order(binary, binary->input(1), left, reorders, transposes_to_delete);
-                }
-            }
-        } catch (const std::exception&) {
-            return false;
-        }
-    }
-    return true;
-}
-
-static bool sink_pad(shared_ptr<Pad> n, TransposeMap& reorders, set<shared_ptr<Node>>& /* transposes_to_delete */) {
-    auto n_in = n->input_value(0);
-    auto arg_transpose = read_transposemap(reorders, n_in);
-    describe<Transpose>(arg_transpose);
-    if (arg_transpose->get_output_partial_shape(0).is_static()) {
-        auto arg_transpose_order = as_type_ptr<Constant>(arg_transpose->input_value(1).get_node_shared_ptr());
-        auto order = arg_transpose_order->get_axis_vector_val();
-        // we need the correct input shape to produce the right output shape
-        // we are going to create a label of the right input shape,
-        // so a new pad will have the right shape
-        auto def_order = permutation_to_default_order(order);
-
-        auto input_shape = apply_permutation(arg_transpose->get_shape(), def_order);
-
-        auto dummy_correct_shape =
-            make_shared<ov::pass::pattern::op::Label>(arg_transpose->get_element_type(), input_shape);
-
-        auto pad_begin = apply_permutation(n->get_pads_begin(), def_order);
-        auto pad_end = apply_permutation(n->get_pads_end(), def_order);
-
-        auto new_begin = make_shared<Constant>(element::i64, Shape{pad_begin.size()}, pad_begin);
-        auto new_end = make_shared<Constant>(element::i64, Shape{pad_end.size()}, pad_end);
-        auto new_pad = make_shared<Pad>(dummy_correct_shape, new_begin, new_end, n->input_value(3), n->get_pad_mode());
-        replace_node(dummy_correct_shape, n->input_value(0).get_node_shared_ptr());
-        OPENVINO_DEBUG << "Replacing " << n->get_name() << " with " << new_pad->get_name();
-        replace_node(n, new_pad);
-        auto new_transpose = make_transpose(new_pad, order);
-        OPENVINO_DEBUG << "Propagating " << describe<Transpose>(new_transpose) << " for " << n->get_name();
-        write_transposemap(reorders, new_pad, new_transpose);
-    } else {
-        OPENVINO_DEBUG << "TransposeSinking failed for Pad op " << n->get_name()
-                       << " . Output shape of Transpose op must be static.";
-        return false;
-    }
-    return true;
-}
-
-static bool sink_concat(const shared_ptr<Concat>& n,
-                        TransposeMap& reorders,
-                        set<shared_ptr<Node>>& transposes_to_delete) {
-    auto n_in = n->input_value(0);
-    auto arg_transpose = read_transposemap(reorders, n_in);
-    if (arg_transpose->get_output_partial_shape(0).is_static()) {
-        auto arg_transpose_order = as_type_ptr<Constant>(arg_transpose->input_value(1).get_node_shared_ptr());
-        auto order = arg_transpose_order->get_axis_vector_val();
-        // we need the correct input shape to produce the right output shape
-        // we are going to create a label of the right input shape,
-        // so a new concat will have the right shape
-        auto def_order = permutation_to_default_order(order);
-
-        auto input_shape = apply_permutation(arg_transpose->get_shape(), def_order);
-
-        auto dummy_correct_shape =
-            make_shared<ov::pass::pattern::op::Label>(arg_transpose->get_element_type(), input_shape);
-
-        NodeVector new_args;
-        new_args.push_back(dummy_correct_shape);
-
-        for (size_t i = 1; i < n->get_input_size(); i++) {
-            auto iarg = n->input_value(i);
-            auto iarg_transpose = read_transposemap(reorders, iarg);
-            auto iarg_transpose_order = as_type_ptr<Constant>(iarg_transpose->input_value(1).get_node_shared_ptr());
-            auto iorder = iarg_transpose_order->get_axis_vector_val();
-            if (iorder != order) {
-                OPENVINO_DEBUG << " input order at " << i << "-th arg is different from first arg";
-                return false;
-            }
-
-            if (iarg_transpose->get_output_partial_shape(0).is_dynamic()) {
-                OPENVINO_DEBUG << "TransposeSinking failed for Concat op " << n->get_name()
-                               << " . Input Transpose ops"
-                                  " must have static shapes. ";
-                return false;
-            }
-            auto iinput_shape = apply_permutation(iarg_transpose->get_shape(), def_order);
-
-            auto idummy_correct_shape =
-                make_shared<ov::pass::pattern::op::Label>(iarg_transpose->get_element_type(), iinput_shape);
-            new_args.push_back(idummy_correct_shape);
-        }
-
-        auto new_axis = order.at(n->get_concatenation_axis());
-        auto new_concat = make_shared<Concat>(new_args, new_axis);
-        // put back the original arguments
-        for (size_t i = 0; i < new_concat->get_input_size(); i++) {
-            OPENVINO_DEBUG << "Replacing " << new_concat->get_name() << " input " << i << " with " << n->get_name()
-                           << " input " << i;
-            new_concat->input(i).replace_source_output(n->input_value(i));
-        }
-        OPENVINO_DEBUG << "Replacing " << n->get_name() << " with " << new_concat->get_name();
-        replace_node(n, new_concat);
-        auto new_transpose = make_transpose(new_concat, order);
-        OPENVINO_DEBUG << "Propagating " << describe<Transpose>(new_transpose) << " for " << n->get_name();
-        write_transposemap(reorders, new_concat, new_transpose);
-    } else {
-        OPENVINO_DEBUG << "TransposeSinking failed for Concat op " << n->get_name()
-                       << " . Output shape of Transpose op must be static.";
-        return false;
-    }
-    return true;
-}
-
-static bool sink_prelu(const shared_ptr<PRelu>& prelu,
-                       TransposeMap& reorders,
-                       set<shared_ptr<Node>>& transposes_to_delete) {
-    FRONT_END_GENERAL_CHECK(prelu, "Null pointer is given to PRelu node.");
-    FRONT_END_GENERAL_CHECK(prelu->get_input_size() > 1, "The PRelu node must contain at least two inputs.");
-    auto slope_shape = prelu->input_value(1).get_partial_shape();
-    if (slope_shape.is_static() && shape_size(slope_shape.to_shape()) == 1) {
-        // handle a case covering LeakyRelu decomposition
-        auto arg_transpose = read_transposemap(reorders, prelu->input_value(0));
-        OPENVINO_DEBUG << "Propagating " << describe<Transpose>(arg_transpose) << " for " << prelu->get_name();
-        write_transposemap(reorders, prelu, arg_transpose);
-    } else {
-        return false;
-    }
-    return true;
-}
-
-void purge_transposes(const set<shared_ptr<Node>>& transposes_to_delete) {
-    for (const auto& r : transposes_to_delete) {
-        delete_transpose(r);
-    }
-}
-
-// The goal of TransposeSinking is to remove
-// round-trip transposes(i.e. nhwc->nchw(nchw-only-op)->nhwc)
-// around nchw-only-op (e.g.Convolution, Batchnorm, Avg/MaxPool)
-// This is achieved by both **sinking**, propagating transposes
-// through ops towards op::Results,
-// or **swimming** Transposes up towards op::Parameter
-// For each op type we support we can either combine
-// two transposes by replacing the existing Transpose,
-// materialize pending transposes if they can't be propagated through op
-bool ov::frontend::tensorflow::pass::TransposeSinking::run_on_model(const shared_ptr<Model>& f) {
-    TransposeMap reorders;
-    set<shared_ptr<Node>> transposes_to_delete;
-    unordered_map<std::string, PartialShape> orig_result_out_shape;
-
-    // STEP 1 : Sink or Swim transposes away for op clusters
-    try {
-        for (const auto& n : f->get_ordered_ops()) {
-            OPENVINO_DEBUG << "Processing " << n->get_name();
-            // collect output shape of all Result nodes for a sanity check
-            if (ov::op::util::is_output(n)) {
-                orig_result_out_shape[n->get_name()] = n->get_output_partial_shape(0);
-            }
-
-            bool sink_res = false;
-            if (auto transpose = as_type_ptr<opset8::Transpose>(n)) {
-                sink_res = sink_transpose(transpose, reorders, transposes_to_delete);
-            } else if (ov::op::util::is_unary_elementwise_arithmetic(n) || as_type_ptr<Clamp>(n) ||
-                       as_type_ptr<Elu>(n) || as_type_ptr<SoftPlus>(n) || as_type_ptr<LogicalNot>(n)) {
-                // Some unary operations are inherrited from Op class
-                // so we need explicitly to check them
-                sink_res = sink_unary(n, reorders, transposes_to_delete);
-            } else if (ov::op::util::is_binary_elementwise_arithmetic(n)) {
-                sink_res = sink_binary(n, reorders, transposes_to_delete);
-            } else if (auto pad = as_type_ptr<Pad>(n)) {
-                sink_res = sink_pad(pad, reorders, transposes_to_delete);
-            } else if (auto concat = as_type_ptr<Concat>(n)) {
-                sink_res = sink_concat(concat, reorders, transposes_to_delete);
-            } else if (auto prelu = as_type_ptr<PRelu>(n)) {
-                sink_res = sink_prelu(prelu, reorders, transposes_to_delete);
-            }
-
-            if (!sink_res) {
-                materialize_shapes(n, reorders, transposes_to_delete);
-            }
-        }
-    } catch (...) {
-        OPENVINO_DEBUG << "Caught exception while sinking op";
-        purge_transposes(transposes_to_delete);
-        return false;
-    }
-
-    // STEP 2: purge all the transposes we either sunk or swam.
-    OPENVINO_DEBUG << "Purging transposes ";
-    purge_transposes(transposes_to_delete);
-
-    // STEP 3: fix wrong shape info wholesale
-    OPENVINO_DEBUG << "Fixing wrong shape info for the whole graph";
-    for (const auto& n : f->get_ordered_ops()) {
-        n->revalidate_and_infer_types();
-    }
-
-    const ResultVector& results = f->get_results();
-    for (const auto& r : results) {
-        // make sure shapes are always materialized before results
-        FRONT_END_GENERAL_CHECK(r->get_output_partial_shape(0) == r->get_input_partial_shape(0) &&
-                                    r->get_element_type() == r->input_value(0).get_element_type(),
-                                " op::Result = ",
-                                *r,
-                                ", Arg = ",
-                                r->input_value(0).get_node());
-
-        // make sure that after TransposeSinking pass the output_shape for Result
-        // does not change from the expected output_shape before the pass
-        FRONT_END_GENERAL_CHECK(r->get_output_partial_shape(0) == orig_result_out_shape[r->get_name()],
-                                " op::Result = ",
-                                *r,
-                                " expected output shape = ",
-                                orig_result_out_shape[r->get_name()]);
-    }
-    return true;
-}
diff --git a/src/frontends/tensorflow_common/tests/transpose_sinking.cpp b/src/frontends/tensorflow_common/tests/transpose_sinking.cpp
deleted file mode 100644
index 71c5142f09531e..00000000000000
--- a/src/frontends/tensorflow_common/tests/transpose_sinking.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass/transpose_sinking.hpp"
-
-#include <frontend/shared/include/utils.hpp>
-#include <openvino/frontend/manager.hpp>
-#include <openvino/opsets/opset7.hpp>
-#include <openvino/opsets/opset8.hpp>
-#include <openvino/pass/manager.hpp>
-
-#include "gtest/gtest.h"
-
-using namespace std;
-using namespace ov;
-using namespace opset8;
-using namespace frontend::tensorflow::pass;
-
-template <class T>
-int64_t count_ops_of_type(const shared_ptr<Model>& f) {
-    int64_t cnt = 0;
-    for (const auto& op : f->get_ops()) {
-        cnt += dynamic_pointer_cast<T>(op) != nullptr;
-    }
-    return cnt;
-}
-
-TEST(TransposeSinkingTest, DynamicShape) {
-    ov::PartialShape shape_nhwc(vector<Dimension>(4, Dimension::dynamic()));
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose = make_shared<Transpose>(a, ng_order);
-    auto absn = make_shared<Abs>(transpose);
-    auto absn2 = make_shared<Abs>(absn);
-    absn2->output(0).set_names({"out_name"});
-    auto res = make_shared<Result>(absn2);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{res}, ngraph::ParameterVector{a});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    EXPECT_EQ(new_transpose->output(0).get_names(), std::unordered_set<std::string>({"out_name"}));
-}
-
-TEST(TransposeSinkingTest, TensorNames) {
-    ngraph::Shape shape_nhwc{16, 28, 28, 1};
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose = make_shared<Transpose>(a, ng_order);
-    auto absn = make_shared<Abs>(transpose);
-    auto absn2 = make_shared<Abs>(absn);
-    absn2->output(0).set_names({"out_name"});
-    auto res = make_shared<Result>(absn2);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{res}, ngraph::ParameterVector{a});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    EXPECT_EQ(new_transpose->output(0).get_names(), std::unordered_set<std::string>({"out_name"}));
-}
-
-TEST(TransposeSinkingTest, TensorNamesCombineTransposes) {
-    ngraph::Shape shape_nhwc{16, 28, 28, 1};
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose_1 = make_shared<Transpose>(a, ng_order);
-    auto transpose_2 = make_shared<Transpose>(transpose_1, ng_order);
-    transpose_2->output(0).set_names({"out_name"});
-    auto res = make_shared<Result>(transpose_2);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{res}, ngraph::ParameterVector{a});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    EXPECT_EQ(new_transpose->output(0).get_names(), std::unordered_set<std::string>({"out_name"}));
-    size_t transpose_cnt = count_ops_of_type<Transpose>(func);
-    EXPECT_EQ(transpose_cnt, 1);
-}
-
-TEST(TransposeSinkingTest, EdgeSplitting) {
-    // checks if Transpose is pushed through Abs, but stopped by
-    // ReduceSum
-    ngraph::Shape shape_nhwc{16, 28, 28, 1};
-    ngraph::Shape shape_nchw{16, 1, 28, 28};
-
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose = make_shared<Transpose>(a, ng_order);
-    auto absn = make_shared<Abs>(transpose);
-    auto absn2 = make_shared<Abs>(absn);
-
-    auto axes = make_shared<Constant>(ngraph::element::i64, ngraph::Shape{4}, vector<int64_t>{0, 1, 2, 3});
-    auto sum = make_shared<ReduceSum>(transpose, axes, true);
-
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{absn2, sum}, ngraph::ParameterVector{a});
-    size_t before_count = count_ops_of_type<Transpose>(func);
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    ASSERT_EQ(before_count, 1);
-    size_t after_count = count_ops_of_type<Transpose>(func);
-    ASSERT_EQ(after_count, 2);
-    ASSERT_EQ(func->get_results().at(1)->input_value(0), sum);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), shape_nchw);
-}
-
-//            X (NHWC)
-//            |
-//         Transpose
-//            |
-//         AvgPool (NCHW)
-//            |
-//         Transpose
-//            |   Const (NHWC)
-//            |   /
-//            |  /
-//            | /
-//           Add (NHWC)
-//            |
-//          Result
-TEST(TransposeSinkingTest, PoolAdd1) {
-    ngraph::Shape input_shape{1, 3, 3, 1};  // NHWC (N=1, H=3, W=3, C=1)
-
-    auto input_type = ngraph::element::f32;
-
-    auto X = make_shared<Parameter>(input_type, input_shape);  // NHWC
-
-    auto ng_order1 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose1 = make_shared<Transpose>(X, ng_order1);  // NCHW (1,1,3,3)
-
-    auto avgpool = make_shared<AvgPool>(transpose1,
-                                        ngraph::Strides{1, 1},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{1, 1},
-                                        true,
-                                        ngraph::op::RoundingType::FLOOR,
-                                        ngraph::op::PadType::VALID);
-
-    auto ng_order2 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose2 = make_shared<Transpose>(avgpool, ng_order2);  // NHWC (1,3,3,1)
-
-    auto const1 = Constant::create(input_type, ngraph::Shape{1, 3, 3, 1}, {3});  // NHWC (1,3,3,1)
-    auto add1 = make_shared<Add>(transpose2, const1);
-    auto func = make_shared<ngraph::Function>(add1, ngraph::ParameterVector{X});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);
-    ASSERT_LE(before_count, after_count);
-    ASSERT_EQ(3, after_count);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), (ngraph::Shape{1, 3, 3, 1}));
-}
-
-TEST(TransposeSinkingTest, PoolAdd2) {
-    ngraph::Shape input_shape{1, 3, 3, 1};  // NHWC (N=1, H=3, W=3, C=1)
-
-    auto input_type = ngraph::element::f32;
-
-    auto X = make_shared<Parameter>(input_type, input_shape);  // NHWC
-
-    auto ng_order1 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose1 = make_shared<Transpose>(X, ng_order1);  // NCHW (1,1,3,3)
-
-    auto avgpool = make_shared<AvgPool>(transpose1,
-                                        ngraph::Strides{1, 1},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{1, 1},
-                                        true,
-                                        ngraph::op::RoundingType::FLOOR,
-                                        ngraph::op::PadType::VALID);
-
-    auto ng_order2 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose2 = make_shared<Transpose>(avgpool, ng_order2);  // NHWC (1,3,3,1)
-    auto maxpool = make_shared<opset7::MaxPool>(transpose1,
-                                                ngraph::Strides{1, 1},
-                                                ngraph::Shape{0, 0},
-                                                ngraph::Shape{0, 0},
-                                                ngraph::Shape{1, 1},
-                                                ngraph::op::RoundingType::FLOOR,
-                                                ngraph::op::PadType::VALID);
-
-    auto ng_order3 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose3 = make_shared<Transpose>(maxpool, ng_order3);
-
-    auto const1 = Constant::create(input_type, ngraph::Shape{1, 3, 3, 1}, {3});  // NHWC (1,3,3,1)
-    auto add1 = make_shared<Add>(transpose3, const1);
-    auto add2 = make_shared<Add>(add1, transpose2);
-    auto func = make_shared<ngraph::Function>(add2, ngraph::ParameterVector{X});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);  // 3
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);  // 4
-    ASSERT_LE(before_count, after_count);
-    ASSERT_EQ(4, after_count);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), (ngraph::Shape{1, 3, 3, 1}));
-}
-
-// Different rank constant input to Add1. After TransposeSinking the const
-// would need a Reshape to have the same order as the other input to
-// Add1.
-TEST(TransposeSinkingTest, PoolAdd3) {
-    ngraph::Shape input_shape{1, 3, 3, 1};  // NHWC (N=1, H=3, W=3, C=1)
-
-    auto input_type = ngraph::element::f32;
-
-    auto X = make_shared<Parameter>(input_type, input_shape);  // NHWC
-
-    auto ng_order1 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose1 = make_shared<Transpose>(X, ng_order1);  // NCHW (1,1,3,3)
-
-    auto avgpool = make_shared<AvgPool>(transpose1,
-                                        ngraph::Strides{1, 1},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{0, 0},
-                                        ngraph::Shape{1, 1},
-                                        true,
-                                        ngraph::op::RoundingType::FLOOR,
-                                        ngraph::op::PadType::VALID);
-
-    auto ng_order2 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose2 = make_shared<Transpose>(avgpool, ng_order2);  // NHWC (1,3,3,1)
-
-    auto const1 = Constant::create(input_type, ngraph::Shape{1}, {1});  // NHWC (1,3,3,1)
-    auto add1 = make_shared<Add>(transpose2, const1);
-    auto func = make_shared<ngraph::Function>(add1, ngraph::ParameterVector{X});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);
-    ASSERT_LE(after_count, before_count);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), (ngraph::Shape{1, 3, 3, 1}));
-}
-
-TEST(TransposeSinkingTest, Concat) {
-    // checks if Transpose is pushed through Concat
-    ngraph::Shape shape_nhwc{16, 28, 28, 1};
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto b = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-    auto to_nchw = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto a_transpose = make_shared<Transpose>(a, to_nchw);
-    auto b_transpose = make_shared<Transpose>(b, to_nchw);
-    auto concat = make_shared<Concat>(ngraph::OutputVector{a_transpose, b_transpose}, 0);
-    auto to_nhwc = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto c = make_shared<Transpose>(concat, to_nhwc);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{c}, ngraph::ParameterVector{a, b});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t transpose_count = count_ops_of_type<Transpose>(func);
-    ASSERT_EQ(0, transpose_count);
-    auto result = func->get_results().at(0)->input_value(0).get_node_shared_ptr();
-    ngraph::Shape expected_shape{32, 28, 28, 1};
-    ASSERT_EQ(result->get_output_shape(0), expected_shape);
-}
-
-TEST(TransposeSinkingTest, Concat_DummyShape) {
-    // checks if Transpose is pushed through Concat
-    ngraph::Shape shape1{4, 3, 3, 1};
-    ngraph::Shape shape2{4, 3, 3, 2};
-    ngraph::Shape shape3{4, 3, 3, 3};
-    ngraph::Shape shape4{4, 3, 3, 4};
-    auto to_nchw = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto to_nhwc = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-
-    auto a1 = make_shared<Parameter>(ngraph::element::i32, shape1);
-    auto a2 = make_shared<Parameter>(ngraph::element::i32, shape2);
-    auto a3 = make_shared<Parameter>(ngraph::element::i32, shape3);
-    auto a4 = make_shared<Parameter>(ngraph::element::i32, shape4);
-    auto a1_transpose = make_shared<Transpose>(a1, to_nchw);
-    auto a2_transpose = make_shared<Transpose>(a2, to_nchw);
-    auto a3_transpose = make_shared<Transpose>(a3, to_nchw);
-    auto a4_transpose = make_shared<Transpose>(a4, to_nchw);
-    auto concat = make_shared<Concat>(ngraph::NodeVector{a1_transpose, a2_transpose, a3_transpose, a4_transpose}, 1);
-    auto out = make_shared<Transpose>(concat, to_nchw);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{out}, ngraph::ParameterVector{a1, a2, a3, a4});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t transpose_count = count_ops_of_type<Transpose>(func);  // 1
-    ASSERT_EQ(1, transpose_count);
-    auto result = func->get_results().at(0)->input_value(0).get_node_shared_ptr();
-    ngraph::Shape expected_shape{4, 3, 10, 3};
-    ASSERT_EQ(result->get_output_shape(0), expected_shape);
-}
-
-// The Transpose should sink through Pad op but stopped by ReduceSum
-TEST(TransposeSinkingTest, Pad) {
-    ngraph::Shape shape_nhwc{100, 8, 8, 1};
-
-    auto a = make_shared<Parameter>(ngraph::element::f32, shape_nhwc);
-    auto pad_value = Constant::create<float>(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{0.0f});
-
-    ngraph::CoordinateDiff pad_end{0, 0, 0, 0};
-    ngraph::CoordinateDiff pad_begin{0, 1, 1, 0};
-
-    auto a_to_nchw = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto a_transpose = make_shared<Transpose>(a, a_to_nchw);
-
-    auto maxpool = make_shared<opset7::MaxPool>(a_transpose,
-                                                ngraph::Strides{2, 2},
-                                                ngraph::Shape{0, 0},
-                                                ngraph::Shape{0, 0},
-                                                ngraph::Shape{1, 1},
-                                                ngraph::op::RoundingType::FLOOR,
-                                                ngraph::op::PadType::VALID);
-
-    auto m_to_nhwc = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto m_transpose = make_shared<Transpose>(maxpool, m_to_nhwc);
-
-    shared_ptr<Constant> pads_begin_node, pads_end_node;
-    pads_begin_node = make_shared<Constant>(ngraph::element::i64, ngraph::Shape{pad_begin.size()}, pad_begin);
-    pads_end_node = make_shared<Constant>(ngraph::element::i64, ngraph::Shape{pad_end.size()}, pad_end);
-    auto pad = make_shared<Pad>(m_transpose, pads_begin_node, pads_end_node, pad_value, ngraph::op::PadMode::CONSTANT);
-
-    auto axes = make_shared<Constant>(ngraph::element::i64, ngraph::Shape{4}, vector<int64_t>{0, 1, 2, 3});
-    auto sum = make_shared<ReduceSum>(pad, axes, true);
-
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{sum}, ngraph::ParameterVector{a});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);  // 2
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);  // 2
-    ASSERT_EQ(after_count, before_count);
-    auto result = func->get_results().at(0)->input_value(0).get_node_shared_ptr();
-    ngraph::Shape expected_shape{1, 1, 1, 1};
-    ASSERT_EQ(result->get_output_shape(0), expected_shape);
-    auto out = ngraph::as_type_ptr<ReduceSum>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(out);
-}
-
-TEST(TransposeSinkingTest, SimpleUnary) {
-    ngraph::Shape shape_nhwc{16, 28, 28, 1};
-    ngraph::Shape shape_nchw{16, 1, 28, 28};
-    auto a = make_shared<Parameter>(ngraph::element::i32, shape_nhwc);
-
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose = make_shared<Transpose>(a, ng_order);
-
-    auto a_t = make_shared<Transpose>(a, ng_order);
-    auto absn = make_shared<Abs>(a_t);
-    auto absn2 = make_shared<Abs>(absn);
-
-    auto tf_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto absn2_t = make_shared<Transpose>(absn2, tf_order);
-
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{absn2_t}, ngraph::ParameterVector{a});
-    size_t before_count = count_ops_of_type<Transpose>(func);  // 2
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);  // 0
-    ASSERT_EQ(func->get_results().at(0)->input_value(0), absn2);
-    EXPECT_NE(before_count, after_count);
-    EXPECT_EQ(after_count, 0);
-}
-
-TEST(TransposeSinkingTest, SinkingThroughPreLUWithScalarSlope) {
-    auto input = make_shared<Parameter>(ov::element::f32, ov::Shape{1, 105, 30, 30});
-    auto transpose_before =
-        make_shared<Transpose>(input,
-                               make_shared<Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 3, 1}));
-
-    auto prelu = make_shared<PRelu>(transpose_before,
-                                    make_shared<Constant>(ov::element::f32, ov::Shape{1}, std::vector<float>{0.8f}));
-    auto transpose_after =
-        make_shared<Transpose>(prelu,
-                               make_shared<Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 3, 1, 2}));
-
-    auto model = make_shared<ov::Model>(ov::OutputVector{transpose_after}, ov::ParameterVector{input});
-    size_t before_count = count_ops_of_type<Transpose>(model);
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(model);
-
-    size_t after_count = count_ops_of_type<Transpose>(model);
-
-    EXPECT_EQ(before_count, 2);
-    EXPECT_EQ(after_count, 0);
-}
-
-TEST(TransposeSinkingTest, SinkingThroughPreLUWithNonScalarSlope) {
-    auto input = make_shared<Parameter>(ov::element::f32, ov::Shape{1, 3, 3, 3});
-    auto transpose_before =
-        make_shared<Transpose>(input,
-                               make_shared<Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 3, 1}));
-
-    auto prelu =
-        make_shared<PRelu>(transpose_before,
-                           make_shared<Constant>(ov::element::f32, ov::Shape{3}, std::vector<float>{0.8f, 0.7f, 0.1f}));
-    auto transpose_after =
-        make_shared<Transpose>(prelu,
-                               make_shared<Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 3, 1, 2}));
-
-    auto model = make_shared<ov::Model>(ov::OutputVector{transpose_after}, ov::ParameterVector{input});
-    size_t before_count = count_ops_of_type<Transpose>(model);
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(model);
-
-    size_t after_count = count_ops_of_type<Transpose>(model);
-
-    EXPECT_EQ(before_count, 2);
-    // Now Transpose Sinking is not applied to Prelu with non-scalar slope
-    EXPECT_EQ(after_count, 2);
-}
-
-/*            X (NCHW)
- *            |
- *         Transpose1
- *            |
- *         Split (NHWC)
- *           /  \
- *          /    \
- *   Transpose2 Transpose3 (NCHW)
- *       |        |
- * Const |        |   Const (NCHW)
- *  \    |        |   /
- *   \   |        |  /
- *    \  |        | /
- *     Add        Add (NCHW)
- *        \       /
- *         \     /
- *          \   /
- *           Add (NCHW)
- *            |
- *          Result (NCHW)
- */
-TEST(TransposeSinkingTest, MultiOutput) {
-    ngraph::Shape shape_nhwc{1, 4, 4, 1};
-    ngraph::Shape shape_nchw{1, 1, 4, 6};
-
-    auto input_type = ngraph::element::f32;
-
-    auto X = make_shared<Parameter>(input_type, shape_nchw);  // NCHW
-
-    auto ng_order1 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose1 = make_shared<Transpose>(X, ng_order1);  // NHWC (1, 4, 6, 1)
-    auto ng_split_dim = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{}, 2);
-
-    auto split = make_shared<Split>(transpose1, ng_split_dim, 2);  // (1, 4, 3, 1)
-
-    auto ng_order2 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose2 = make_shared<Transpose>(split, ng_order2);  // (1, 1, 4, 3) NCHW
-
-    auto ng_order3 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose3 = make_shared<Transpose>(split, ng_order3);  // (1, 1, 4, 3) NCHW
-
-    auto const1 = Constant::create(input_type, ngraph::Shape{1, 1, 4, 3}, {3});  // NCHW
-    auto add1 = make_shared<Add>(transpose2, const1);
-    auto const2 = Constant::create(input_type, ngraph::Shape{1, 1, 4, 3}, {3});  // NCHW
-    auto add2 = make_shared<Add>(transpose3, const2);
-    auto add3 = make_shared<Add>(add1, add2);
-    auto func = make_shared<ngraph::Function>(add3, ngraph::ParameterVector{X});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);  // 3
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);  // 4
-    ASSERT_LE(before_count, after_count);
-    ASSERT_EQ(4, after_count);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), (ngraph::Shape{1, 1, 4, 3}));
-}
-
-/*            X (NHWC)
- *            |
- *        Transpose (NCHW)
- *            |
- *         AvgPool0
- *            |
- *        Transpose0 (NHWC)
- *            |
- *          Split (NHWC)
- *           /  \
- *          /    \
- *   Transpose1 Transpose2 (NCHW)
- *       |         |
- *     AvgPool1  AvgPool2
- *       |         |
- *   Transpose3 Transpose4 (NHWC)
- *        \       /
- *         \     /
- *          \   /
- *          Concat (NHWC)
- * Const      /
- *   \       /
- *    \     /
- *     \   /
- *      \ /
- *      Add (NHWC)
- *       |
- *     Result
- */
-TEST(TransposeSinkingTest, AlexnetPattern) {
-    ngraph::Shape shape_nhwc{1, 55, 55, 96};
-    ngraph::Shape shape_nchw{1, 96, 55, 55};
-
-    auto input_type = ngraph::element::f32;
-
-    // X
-    auto X = make_shared<Parameter>(input_type, shape_nhwc);  // NHWC
-
-    // T -> AvgPool0 -> T0
-    auto ng_order = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose = make_shared<Transpose>(X, ng_order);  // NCHW
-    auto avgpool0 = make_shared<AvgPool>(transpose,
-                                         ngraph::Strides{1, 1},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{1, 1},
-                                         true,
-                                         ngraph::op::RoundingType::FLOOR,
-                                         ngraph::op::PadType::VALID);
-    auto ng_order0 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose0 = make_shared<Transpose>(avgpool0, ng_order0);  // NHWC
-
-    // Split
-    auto ng_split_dim = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{}, 3);
-    auto split = make_shared<Split>(transpose0, ng_split_dim, 2);  // NHWC
-
-    // T1 -> AvgPool1 -> T2
-    auto ng_order1 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose1 = make_shared<Transpose>(split, ng_order1);  // NCHW
-    auto avgpool1 = make_shared<AvgPool>(transpose1,
-                                         ngraph::Strides{1, 1},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{1, 1},
-                                         true,
-                                         ngraph::op::RoundingType::FLOOR,
-                                         ngraph::op::PadType::VALID);
-    auto ng_order2 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose2 = make_shared<Transpose>(avgpool1, ng_order2);  // NHWC
-
-    // T3 -> AvgPool2 -> T4
-    auto ng_order3 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
-    auto transpose3 = make_shared<Transpose>(split, ng_order1);  // NCHW
-    auto avgpool2 = make_shared<AvgPool>(transpose3,
-                                         ngraph::Strides{1, 1},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{0, 0},
-                                         ngraph::Shape{1, 1},
-                                         true,
-                                         ngraph::op::RoundingType::FLOOR,
-                                         ngraph::op::PadType::VALID);
-    auto ng_order4 = std::make_shared<Constant>(ngraph::element::u64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
-    auto transpose4 = make_shared<Transpose>(avgpool2, ng_order4);  // NHWC
-
-    // Concat
-    auto concat = make_shared<Concat>(ngraph::OutputVector{transpose2, transpose4}, 3);  // NHWC
-
-    // Add
-    auto const1 = Constant::create(input_type, ngraph::Shape{96}, {1});  // NCHW
-    auto add1 = make_shared<Add>(concat, const1);
-
-    auto func = make_shared<ngraph::Function>(add1, ngraph::ParameterVector{X});
-
-    ov::pass::Manager pass_manager;
-    size_t before_count = count_ops_of_type<Transpose>(func);
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t after_count = count_ops_of_type<Transpose>(func);
-    ASSERT_LE(after_count, before_count);
-    ASSERT_EQ(5, after_count);
-    auto new_transpose =
-        ngraph::as_type_ptr<Transpose>(func->get_results().at(0)->input_value(0).get_node_shared_ptr());
-    ASSERT_TRUE(new_transpose);
-    ASSERT_EQ(new_transpose->get_output_shape(0), (ngraph::Shape{1, 55, 55, 96}));
-}
-
-Output<Node> make_transpose(const Output<Node>& input, const vector<int64_t>& order) {
-    return std::make_shared<opset8::Transpose>(input, opset8::Constant::create(element::i64, {order.size()}, order));
-}
-
-TEST(TransposeSinkingTest, BinarySubTrickyShapes) {
-    auto a = make_shared<Parameter>(ngraph::element::i32, ngraph::Shape{1, 17});
-    auto a_t = make_transpose(a, {0, 1});
-    auto b = make_shared<Parameter>(ngraph::element::i32, ngraph::Shape{48, 48, 17});
-    auto b_t = make_transpose(b, {0, 1, 2});
-    auto binary = make_shared<Subtract>(a, b);
-
-    auto res = make_shared<Result>(binary);
-    auto func = make_shared<ngraph::Function>(ngraph::OutputVector{res}, ngraph::ParameterVector{a, b});
-
-    ov::pass::Manager pass_manager;
-    pass_manager.register_pass<TransposeSinking>();
-    pass_manager.run_passes(func);
-
-    size_t transpose_cnt = count_ops_of_type<Transpose>(func);
-    EXPECT_EQ(transpose_cnt, 0);
-}
\ No newline at end of file
diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp
index e6e9054f77520b..52931f8986104a 100644
--- a/src/frontends/tensorflow_lite/src/frontend.cpp
+++ b/src/frontends/tensorflow_lite/src/frontend.cpp
@@ -10,7 +10,6 @@
 #include "op_table.hpp"
 #include "openvino/frontend/tensorflow_lite/extension/op.hpp"
 #include "openvino/util/common_util.hpp"
-#include "pass/transpose_sinking.hpp"
 #include "so_extension.hpp"
 #include "tensor_lite_place.hpp"
 #include "tf_framework_node.hpp"

From 18c876bf2388bfd6d3ec48134e0c7a1ce3dad535 Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Tue, 4 Apr 2023 18:55:16 +0200
Subject: [PATCH 239/296] Update openvino_sphinx_theme.css (#16740)

---
 .../openvino_sphinx_theme/static/css/openvino_sphinx_theme.css   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/css/openvino_sphinx_theme.css b/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/css/openvino_sphinx_theme.css
index 2a6063da77b8b3..a9678d9a73249a 100644
--- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/css/openvino_sphinx_theme.css
+++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/static/css/openvino_sphinx_theme.css
@@ -60,6 +60,7 @@ body {
 .scrollbox {
     overflow-y:scroll;
     height:300px;
+    margin-bottom: 20px;
 }
 
 /* Syntax Highlighting */

From 45daa2095f85e1a828db756c6795bafad7984f94 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 5 Apr 2023 11:28:10 +0400
Subject: [PATCH 240/296] [TF FE] Add diagnostics capabilities via Framework
 nodes (#16706)

* [TF FE] Add diagnostics capabilities via Framework nodes

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Refactor normalize logic

* Applied code-review feedback: fix in get_unsupported_operations_and_failures

* Handle unknown exception type

* Store only first encountered failure

* Update src/frontends/tensorflow/tests/convert_unsupported.cpp

* Apply code-review ffeedback: use stringstream

* Correct Key for exception message

* Fix build

* Use helper for creation of fw node with exception message inside

* Add test for conversion with unknown exception

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 src/frontends/tensorflow/src/frontend.cpp     | 59 ++++++++++----
 .../tensorflow/src/translate_session.cpp      | 37 +++++++--
 .../tensorflow/tests/convert_unsupported.cpp  | 76 +++++++++++++++++++
 .../include/tf_framework_node.hpp             |  1 +
 4 files changed, 150 insertions(+), 23 deletions(-)

diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 55e841e9102a17..0fd32f7d77b8f2 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -31,24 +31,32 @@ using namespace ov;
 using namespace ov::frontend::tensorflow;
 
 namespace {
-std::vector<std::string> get_unconverted_types_from_model(const std::shared_ptr<Model>& model) {
-    std::vector<std::string> unconverted_ops_types;
+
+void get_unsupported_operations_and_failures(const std::shared_ptr<Model>& model,
+                                             std::vector<std::string>& unsupported_operations,
+                                             std::unordered_map<std::string, std::string>& failures) {
     for (const auto& node : model->get_ordered_ops()) {
         if (const auto& fw_node = ov::as_type_ptr<FrameworkNode>(node)) {
             auto op_type = fw_node->get_decoder()->get_op_type();
-            unconverted_ops_types.push_back(op_type);
+            auto fw_node_attrs = fw_node->get_attrs();
+            if (fw_node_attrs.find(FrameworkNode::failed_conversion_key) != fw_node_attrs.end() &&
+                failures.count(op_type) == 0) {
+                // save only the first encountered failure that is more improtant for developer
+                // that means the translator is found but the conversion is failed
+                failures[op_type] = fw_node_attrs.at(FrameworkNode::failed_conversion_key);
+            } else if (std::find(unsupported_operations.begin(), unsupported_operations.end(), op_type) ==
+                       unsupported_operations.end()) {
+                // found new unsupported operation
+                unsupported_operations.push_back(op_type);
+            }
         }
         if (const auto& fw_node = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(node)) {
             int subgraphs_size = static_cast<int>(fw_node->get_internal_subgraphs_size());
             for (int i = 0; i < subgraphs_size; ++i) {
-                auto internal_types = get_unconverted_types_from_model(fw_node->get_function(i));
-                unconverted_ops_types.insert(unconverted_ops_types.begin(),
-                                             internal_types.begin(),
-                                             internal_types.end());
+                get_unsupported_operations_and_failures(fw_node->get_function(i), unsupported_operations, failures);
             }
         }
     }
-    return unconverted_ops_types;
 }
 
 void translate_framework_node(const std::shared_ptr<FrameworkNode>& node,
@@ -194,15 +202,34 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
 std::shared_ptr<ov::Model> FrontEnd::convert(const ov::frontend::InputModel::Ptr& model) const {
     auto f = convert_partially(model);
 
-    auto unsupported_operations = get_unconverted_types_from_model(f);
+    std::unordered_map<std::string, std::string> failures;
+    std::vector<std::string> unsupported_operations;
+    get_unsupported_operations_and_failures(f, unsupported_operations, failures);
+
+    std::stringstream exception_message;
+    for (const auto& failure : failures) {
+        if (m_telemetry) {
+            // TODO: 105173 support anonymization of exception message in order to send to telemetry
+        }
+        exception_message << "[TensorFlow Frontend] Internal error: conversion is failed for " + failure.first +
+                                 " operation with a message:\n" + failure.second + "\n";
+    }
+
     if (m_telemetry) {
         for (const auto& unsupported_operation : unsupported_operations) {
             m_telemetry->send_event("error_cause", "tf_" + unsupported_operation);
         }
     }
-    FRONT_END_OP_CONVERSION_CHECK(
-        unsupported_operations.size() == 0,
-        "[TensorFlow Frontend] Internal error: No translator found for " + unsupported_operations[0] + " node.");
+    // TODO 107500: report the full list of unsupported operations
+    // also, communicate with MO for the fallback to the legacy FE
+    // via OpConversionFailure exception that will store all failures and unsupported_operations
+    if (unsupported_operations.size() > 0) {
+        exception_message << "[TensorFlow Frontend] Internal error: No translator found for " +
+                                 unsupported_operations[0] + " node.";
+    }
+
+    bool is_conversion_successful = ((unsupported_operations.size() == 0) && (failures.size() == 0));
+    FRONT_END_OP_CONVERSION_CHECK(is_conversion_successful, exception_message.str());
 
     return f;
 }
@@ -289,9 +316,11 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
         manager.run_passes(model);
     }
 
-    // TODO: TSGeneral can fail on models with Framework nodes (not converted to OV opset)
-    auto unsupported_ops = get_unconverted_types_from_model(model);
-    if (unsupported_ops.size() > 0) {
+    // TODO 107554: TSGeneral can fail on models with Framework nodes (not converted to OV opset)
+    std::unordered_map<std::string, std::string> failures;
+    std::vector<std::string> unsupported_operations;
+    get_unsupported_operations_and_failures(model, unsupported_operations, failures);
+    if (unsupported_operations.size() > 0 || failures.size() > 0) {
         return;
     }
 
diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp
index 165979a3ce848d..b06652a2979eb2 100644
--- a/src/frontends/tensorflow/src/translate_session.cpp
+++ b/src/frontends/tensorflow/src/translate_session.cpp
@@ -5,6 +5,7 @@
 #include "translate_session.hpp"
 
 #include "input_model.hpp"
+#include "openvino/op/util/framework_node.hpp"
 #include "openvino/opsets/opset10.hpp"
 #include "openvino/opsets/opset8.hpp"
 #include "tf_framework_node.hpp"
@@ -56,6 +57,20 @@ static bool apply_saved_model_names(std::shared_ptr<ov::Node> node,
     }
     return false;
 }
+
+// it creates framework node and saves exception message in the node attribute
+ov::OutputVector create_fw_node_with_exception(const std::shared_ptr<DecoderBase>& decoder,
+                                               const ov::OutputVector& inputs,
+                                               size_t num_outputs,
+                                               const std::string& operation_name,
+                                               const std::string& exception_message) {
+    ov::op::util::FrameworkNodeAttrs attrs;
+    attrs[FrameworkNode::failed_conversion_key] = exception_message;
+    auto fw_node = std::make_shared<FrameworkNode>(decoder, inputs, num_outputs);
+    fw_node->set_attrs(attrs);
+    set_node_name(operation_name, fw_node);
+    return fw_node->outputs();
+}
 }  // namespace
 
 TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model,
@@ -242,14 +257,20 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                 auto translator = m_translator_map->at(operation_decoder->get_op_type());
                 NodeContext node_context(operation_decoder, ov_inputs, this);
                 ov_outputs = translator(node_context);
-            } catch (const std::exception&) {
-                // continue translation by replacing with FrameworkNode
-                // in case of any failures in translators due to their limitation
-                auto fw_node = std::make_shared<FrameworkNode>(operation_decoder,
-                                                               ov_inputs,
-                                                               operation_place->get_output_ports().size());
-                set_node_name(operation_name, fw_node);
-                ov_outputs = fw_node->outputs();
+            } catch (const std::exception& ex) {
+                // save the root-cause of the translation failure
+                ov_outputs = create_fw_node_with_exception(operation_decoder,
+                                                           ov_inputs,
+                                                           operation_place->get_output_ports().size(),
+                                                           operation_name,
+                                                           ex.what());
+            } catch (...) {
+                // save unknown exception type
+                ov_outputs = create_fw_node_with_exception(operation_decoder,
+                                                           ov_inputs,
+                                                           operation_place->get_output_ports().size(),
+                                                           operation_name,
+                                                           "Unknown exception type");
             }
         } else if (auto body_ov_model = get_body_ov_model(operation_type)) {
             inject_body_model(body_ov_model, operation_type, ov_inputs, ov_outputs);
diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp
index bfd9729575a94a..1c336061da9b15 100644
--- a/src/frontends/tensorflow/tests/convert_unsupported.cpp
+++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp
@@ -4,7 +4,10 @@
 
 #include <openvino/frontend/decoder.hpp>
 #include <openvino/frontend/exception.hpp>
+#include <openvino/frontend/extension.hpp>
 #include <openvino/frontend/manager.hpp>
+#include <openvino/frontend/node_context.hpp>
+#include <openvino/frontend/tensorflow/exception.hpp>
 #include <openvino/op/util/framework_node.hpp>
 #include <openvino/opsets/opset10.hpp>
 
@@ -75,6 +78,39 @@ shared_ptr<Model> convert_model_partially(const string& model_path) {
 
     return model;
 }
+
+shared_ptr<Model> convert_model(const string& model_path, const ConversionExtension::Ptr& conv_ext = nullptr) {
+    FrontEndManager fem;
+    auto front_end = fem.load_by_framework(TF_FE);
+    if (!front_end) {
+        throw "TensorFlow Frontend is not initialized";
+    }
+    if (conv_ext) {
+        front_end->add_extension(conv_ext);
+    }
+    auto model_filename = FrontEndTestUtils::make_model_path(string(TEST_TENSORFLOW_MODELS_DIRNAME) + model_path);
+    auto input_model = front_end->load(model_filename);
+    if (!input_model) {
+        throw "Input model is not read";
+    }
+
+    auto model = front_end->convert(input_model);
+    return model;
+}
+
+ov::OutputVector incorrect_less_translator(const ov::frontend::NodeContext& node) {
+    // NOTE: pay attention that this is a fake translator for Less operation
+    // only serves for testing purposes
+    TENSORFLOW_OP_VALIDATION(node, false, "Less expects ten inputs.");
+    return {};
+}
+
+ov::OutputVector add_translator_with_unknown_exception(const ov::frontend::NodeContext& node) {
+    // NOTE: pay attention that this is a fake translator for Add operation
+    // only serves for testing purposes
+    throw 0;
+    return {};
+}
 }  // namespace
 
 TEST(FrontEndConvertModelTest, test_unsupported_op) {
@@ -145,3 +181,43 @@ TEST_F(TransformationTestsF, ModelWithDynamicType) {
         model_ref = make_shared<Model>(OutputVector{log1p_node}, ParameterVector{x});
     }
 }
+
+TEST(FrontEndConvertModelTest, test_unsupported_tf1_while_and_incorrect_less_translator) {
+    shared_ptr<Model> model = nullptr;
+    try {
+        auto conv_ext = std::make_shared<ov::frontend::ConversionExtension>("Less", incorrect_less_translator);
+        model = convert_model("model_tf1_while/model_tf1_while.pbtxt", conv_ext);
+        FAIL() << "TensorFlow 1 While is not supported and the fake translator registered in TF FE but conversion "
+                  "passed without errors. "
+                  "OpConversionFailure is expected.";
+    } catch (const OpConversionFailure& error) {
+        string error_message = error.what();
+        string ref_message = "Less expects ten inputs.\n"
+                             "\n"
+                             "[TensorFlow Frontend] Internal error: No translator found for Enter node.";
+        ASSERT_TRUE(error_message.find(ref_message) != string::npos);
+        ASSERT_EQ(model, nullptr);
+    } catch (...) {
+        FAIL() << "Conversion of TensorFlow 1 While failed by wrong reason.";
+    }
+}
+
+TEST(FrontEndConvertModelTest, conversion_with_unknown_exception) {
+    shared_ptr<Model> model = nullptr;
+    try {
+        auto conv_ext =
+            std::make_shared<ov::frontend::ConversionExtension>("Add", add_translator_with_unknown_exception);
+        model = convert_model("model_tf1_while/model_tf1_while.pbtxt", conv_ext);
+        FAIL() << "TensorFlow 1 While is not supported and the fake translator registered in TF FE but conversion "
+                  "passed without errors. "
+                  "OpConversionFailure is expected.";
+    } catch (const OpConversionFailure& error) {
+        string error_message = error.what();
+        string ref_message = "Unknown exception type\n"
+                             "[TensorFlow Frontend] Internal error: No translator found for Enter node.";
+        ASSERT_TRUE(error_message.find(ref_message) != string::npos);
+        ASSERT_EQ(model, nullptr);
+    } catch (...) {
+        FAIL() << "Conversion of TensorFlow 1 While failed by wrong reason.";
+    }
+}
diff --git a/src/frontends/tensorflow_common/include/tf_framework_node.hpp b/src/frontends/tensorflow_common/include/tf_framework_node.hpp
index 0d31c1dad43574..cffba769751a7d 100644
--- a/src/frontends/tensorflow_common/include/tf_framework_node.hpp
+++ b/src/frontends/tensorflow_common/include/tf_framework_node.hpp
@@ -15,6 +15,7 @@ namespace tensorflow {
 
 class FrameworkNode : public ov::op::util::FrameworkNode {
 public:
+    static constexpr const char* failed_conversion_key = "tensorflow::FrameworkNode::failed_conversion_key";
     OPENVINO_OP("FrameworkNode", "util", ::ov::op::util::FrameworkNode);
 
     FrameworkNode(const std::shared_ptr<DecoderBase>& decoder, const OutputVector& inputs, size_t num_outputs)

From f9bd2d2c1e0606eef7737b1405178fd16c59f48e Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 5 Apr 2023 09:28:48 +0200
Subject: [PATCH 241/296] [ie transformations] improve SoftMax fusion for
 better mixed precision inference (#16574)

* improve SoftMax fusion

* style and unit-test fix

* more precise SoftMax unit-tests

* rewritten SoftMaxFusion with single matcher

* fixes for align_mixed_fp32_fp16_types_test.cpp and mark_subgraph_to_keep_in_mixed_precision_test.cpp

* add include for pass/pattern/op/or.hpp

* get rank only when necessary

* style-fix

* add comment why SoftmaxFusion is called manually

* fix copy_runtime_info
---
 .../common_optimizations/softmax_fusion.hpp   | 34 +++++++-
 ...k_subgraphs_to_keep_in_mixed_precision.cpp |  4 +-
 .../common_optimizations/softmax_fusion.cpp   | 63 +++++++++------
 .../align_mixed_fp32_fp16_types_test.cpp      | 19 +++--
 ...bgraph_to_keep_in_mixed_precision_test.cpp |  4 -
 .../common_optimizations/softmax_fusion.cpp   | 80 +++++++++++++------
 .../src/plugin/transformations_pipeline.cpp   |  5 ++
 7 files changed, 141 insertions(+), 68 deletions(-)

diff --git a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp
index 965c6a9747564f..337bb65bd21630 100644
--- a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp
+++ b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp
@@ -17,7 +17,7 @@ class TRANSFORMATIONS_API SoftmaxFusion;
 
 /**
  * @ingroup ie_transformation_common_api
- * @brief SoftmaxFusion transformation replaces following graph:
+ * @brief SoftmaxFusion transformation replaces following graphs:
  *
  *            +---------------+
  *            │               │
@@ -63,6 +63,38 @@ class TRANSFORMATIONS_API SoftmaxFusion;
  *             │             │
  *             +-------------+
  *
+ *  and
+ *            +---------------+
+ *            │               │
+ *            │     input     │
+ *            │               │
+ *            +---------------+
+ *                    |
+ *                    |
+ *                    |
+ *                    v
+ *            +---------------+
+ *            │               │
+ *            │      Exp      │
+ *            │               │
+ *            +---------------+
+ *                │      │
+ *                │      v
+ *                │ +-----------+
+ *                │ │           │
+ *                │ │ ReduceSum │
+ *                │ │           │
+ *                │ +-----------+
+ *                │      │
+ *                │      │
+ *                v      v
+ *             +-------------+
+ *             |             │
+ *             |     Div     │
+ *             │             │
+ *             +-------------+
+ *
+ *
  * to a single Softmax node
  *
  * * Restrictions:
diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
index 1cf7f2fdee8576..157bce8df83a4a 100644
--- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
@@ -337,18 +337,18 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
     Manager manager(get_pass_config());
     // Mark root of Division with eps pattern to keep in FP32
     REGISTER_PASS(manager, MarkDivWithEps)
-
     REGISTER_PASS(manager, MarkExpInReduceOpPath)
-    REGISTER_PASS(manager, MarkNormalizationOps)
 
     // both Up and Down propagations are needed.
     // Why both of them are needed is explained in comments in passes declarations.
     REGISTER_PASS(manager, PropagateDownMarkToKeepInMixedPrecision)
+
     auto propagate_up = manager.register_pass<BackwardGraphRewrite>();
     ADD_MATCHER(propagate_up, PropagateUpMarkToKeepInMixedPrecision)
 
     // Mark nodes in ShapeOf subgraphs to keep in FP32
     REGISTER_PASS(manager, MarkPrecisionSensitiveShapeOfSubgraphs)
+    REGISTER_PASS(manager, MarkNormalizationOps)
     manager.run_passes(m);
 
     for (auto& node : m->get_ops()) {
diff --git a/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp
index 41fed3d350420d..accbec8b42d87e 100644
--- a/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp
@@ -8,6 +8,7 @@
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
 #include <openvino/opsets/opset6.hpp>
+#include <openvino/pass/pattern/op/or.hpp>
 #include <vector>
 
 #include "itt.hpp"
@@ -15,56 +16,66 @@
 
 ov::pass::SoftmaxFusion::SoftmaxFusion() {
     MATCHER_SCOPE(SoftmaxFusion);
-    auto data_pattern = pass::pattern::any_input(pattern::has_static_rank());
+
+    auto data_pattern = pass::pattern::any_input(pass::pattern::has_static_rank());
     auto reduce_max_axes_pattern = ngraph::pattern::wrap_type<opset6::Constant>();
     auto reduce_max_pattern = ngraph::pattern::wrap_type<opset6::ReduceMax>({data_pattern, reduce_max_axes_pattern});
     auto sub_pattern = ngraph::pattern::wrap_type<opset6::Subtract>({data_pattern, reduce_max_pattern});
-    auto exp_pattern = ngraph::pattern::wrap_type<opset6::Exp>({sub_pattern});
+
+    auto exp_input = std::make_shared<pattern::op::Or>(OutputVector{sub_pattern, data_pattern});
+    auto exp_pattern = ngraph::pattern::wrap_type<opset6::Exp>({exp_input});
+
     auto reduce_sum_axes_pattern = ngraph::pattern::wrap_type<opset6::Constant>();
     auto reduce_sum_pattern = ngraph::pattern::wrap_type<opset6::ReduceSum>({exp_pattern, reduce_sum_axes_pattern});
     auto div_pattern = ngraph::pattern::wrap_type<opset6::Divide>({exp_pattern, reduce_sum_pattern});
 
-    ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+    ov::matcher_pass_callback callback = [=](pass::pattern::Matcher& m) {
         if (transformation_callback(m.get_match_root()))
             return false;
 
         const auto& pattern_map = m.get_pattern_value_map();
 
-        auto reduce_max_axes =
-            std::dynamic_pointer_cast<opset6::Constant>(pattern_map.at(reduce_max_axes_pattern).get_node_shared_ptr());
-        if (!reduce_max_axes || shape_size(reduce_max_axes->get_shape()) != 1)
-            return false;
         auto reduce_sum_axes =
             std::dynamic_pointer_cast<opset6::Constant>(pattern_map.at(reduce_sum_axes_pattern).get_node_shared_ptr());
         if (!reduce_sum_axes || shape_size(reduce_sum_axes->get_shape()) != 1)
             return false;
-
-        int64_t reduce_max_axis = reduce_max_axes->cast_vector<int64_t>()[0];
         int64_t reduce_sum_axis = reduce_sum_axes->cast_vector<int64_t>()[0];
-        if (reduce_max_axis < 0 || reduce_sum_axis < 0) {
-            const auto& pshape = pattern_map.at(data_pattern).get_partial_shape();
-            auto rank = pshape.rank().get_length();
-            if (reduce_max_axis < 0)
+        if (reduce_sum_axis < 0) {
+            const auto& rank = pattern_map.at(data_pattern).get_partial_shape().rank().get_length();
+            reduce_sum_axis += rank;
+        }
+
+        auto exp_input_is_subtract = pattern_map.count(sub_pattern) != 0;
+        if (exp_input_is_subtract) {
+            auto reduce_max_axes = std::dynamic_pointer_cast<opset6::Constant>(
+                pattern_map.at(reduce_max_axes_pattern).get_node_shared_ptr());
+            if (!reduce_max_axes || shape_size(reduce_max_axes->get_shape()) != 1)
+                return false;
+            int64_t reduce_max_axis = reduce_max_axes->cast_vector<int64_t>()[0];
+
+            if (reduce_max_axis < 0) {
+                const auto& rank = pattern_map.at(data_pattern).get_partial_shape().rank().get_length();
                 reduce_max_axis += rank;
-            if (reduce_sum_axis < 0)
-                reduce_sum_axis += rank;
+            }
+
+            if (reduce_max_axis != reduce_sum_axis)
+                return false;
         }
-        if (reduce_max_axis != reduce_sum_axis)
-            return false;
 
         auto softmax = register_new_node<opset6::Softmax>(pattern_map.at(data_pattern), reduce_sum_axis);
         auto div = pattern_map.at(div_pattern).get_node_shared_ptr();
         softmax->set_friendly_name(div->get_friendly_name());
 
-        copy_runtime_info(
-            {
-                pattern_map.at(reduce_max_pattern).get_node_shared_ptr(),
-                pattern_map.at(sub_pattern).get_node_shared_ptr(),
-                pattern_map.at(exp_pattern).get_node_shared_ptr(),
-                pattern_map.at(reduce_sum_pattern).get_node_shared_ptr(),
-                div,
-            },
-            softmax);
+        NodeVector fused_nodes;
+        fused_nodes.push_back(pattern_map.at(exp_pattern).get_node_shared_ptr());
+        fused_nodes.push_back(pattern_map.at(reduce_sum_pattern).get_node_shared_ptr());
+        fused_nodes.push_back(div);
+        if (exp_input_is_subtract) {
+            fused_nodes.push_back(pattern_map.at(reduce_max_pattern).get_node_shared_ptr());
+            fused_nodes.push_back(pattern_map.at(sub_pattern).get_node_shared_ptr());
+        }
+        copy_runtime_info(fused_nodes, softmax);
+
         replace_node(div, softmax);
 
         return true;
diff --git a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp
index 6127453b84274d..80635215c8aef1 100644
--- a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp
+++ b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp
@@ -171,10 +171,10 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_4) {
         auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
         auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
         auto mvn_1 = make_shared<MVN>(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
+        auto convert_to_f16_1 = make_shared<Convert>(mvn_1, element::f32);
         auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
-        auto add_1 = make_shared<Add>(mvn_1, addition_const);
-        auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
-        auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
+        auto add_1 = make_shared<Add>(convert_to_f16_1, addition_const);
+        auto matmul_1 = make_shared<MatMul>(add_1, input_2);
 
         model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
     }
@@ -209,17 +209,16 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) {
         auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
         auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 56, 224});
 
-        auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
-
         auto split_axis = Constant::create(element::i64, Shape{}, {3});
-        auto split = make_shared<Split>(convert_to_f32_1, split_axis, 4);
+        auto split = make_shared<Split>(input_1, split_axis, 4);
 
         auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
-        auto mvn_1 = make_shared<MVN>(split->output(0), reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
+        auto convert_to_f32_1 = make_shared<Convert>(split->output(0), element::f32);
+        auto mvn_1 = make_shared<MVN>(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
+        auto convert_to_f16_1 = make_shared<Convert>(mvn_1, element::f32);
         auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
-        auto add_1 = make_shared<Add>(mvn_1, addition_const);
-        auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
-        auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
+        auto add_1 = make_shared<Add>(convert_to_f16_1, addition_const);
+        auto matmul_1 = make_shared<MatMul>(add_1, input_2);
 
         // todo: without Converts to fp16 because of GPU
         auto result_1 = make_shared<Result>(matmul_1);
diff --git a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
index 3a8bca535b9cef..1cf4214d77fef3 100644
--- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
+++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
@@ -442,8 +442,6 @@ TEST(TransformationTests, keep_precission_sensitive_fp32_4) {
         auto matmul_1 = make_shared<MatMul>(unsqueeze_1, input_2);
 
         // marking nodes to be kept in fp32 for mixed precision
-        disable_fp16_compression(addition_const);
-        disable_fp16_compression(unsqueeze_1);
         disable_fp16_compression(mvn_1);
 
         model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
@@ -487,8 +485,6 @@ TEST(TransformationTests, keep_precission_sensitive_fp32_5) {
         auto matmul_1 = make_shared<MatMul>(unsqueeze_1, input_2);
 
         // marking nodes to be kept in fp32 for mixed precision
-        disable_fp16_compression(addition_const);
-        disable_fp16_compression(unsqueeze_1);
         disable_fp16_compression(normalizel2_1);
 
         model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
diff --git a/src/common/transformations/tests/common_optimizations/softmax_fusion.cpp b/src/common/transformations/tests/common_optimizations/softmax_fusion.cpp
index 4027c8500fe51f..e64d8b3daac478 100644
--- a/src/common/transformations/tests/common_optimizations/softmax_fusion.cpp
+++ b/src/common/transformations/tests/common_optimizations/softmax_fusion.cpp
@@ -17,6 +17,7 @@ using namespace testing;
 using namespace ngraph;
 
 class SoftmaxFusionFixture : public ::testing::TestWithParam<std::tuple<int64_t, int64_t>> {};
+class SoftmaxFusionSimplePatternFixture : public ::testing::TestWithParam<std::tuple<int64_t>> {};
 
 TEST_P(SoftmaxFusionFixture, SoftmaxFusion) {
     Shape shape{1, 1, 256};
@@ -67,25 +68,16 @@ INSTANTIATE_TEST_SUITE_P(SoftmaxFusionTests,
                                            std::make_tuple(2, -1),
                                            std::make_tuple(2, 2)));
 
-class NegativeSoftmaxFusionFixture
-    : public ::testing::TestWithParam<std::tuple<std::vector<int64_t>, std::vector<int64_t>>> {};
-
-TEST_P(NegativeSoftmaxFusionFixture, NegativeSoftmaxFusion) {
-    Shape shape{1, 1, 256};
+TEST_P(SoftmaxFusionSimplePatternFixture, SoftmaxFusionSimplePatternTest) {
+    Shape shape{1, 3, 256, 256};
     auto params = GetParam();
-    auto reduce_max_axes_val = std::get<0>(params);
-    auto reduce_sum_axes_val = std::get<1>(params);
+    auto reduce_axis_val = std::get<0>(params);
     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
     {
         auto data = std::make_shared<opset6::Parameter>(element::f32, shape);
-        auto reduce_max_axes =
-            opset6::Constant::create(element::i64, Shape{reduce_max_axes_val.size()}, reduce_max_axes_val);
-        auto reduce_max = std::make_shared<opset6::ReduceMax>(data, reduce_max_axes);
-        auto sub = std::make_shared<opset6::Subtract>(data, reduce_max);
-        auto exp = std::make_shared<opset6::Exp>(sub);
-        auto reduce_sum_axes =
-            opset6::Constant::create(element::i64, Shape{reduce_sum_axes_val.size()}, reduce_sum_axes_val);
-        auto reduce_sum = std::make_shared<opset6::ReduceSum>(exp, reduce_sum_axes);
+        auto exp = std::make_shared<opset6::Exp>(data);
+        auto reduce_axis = opset6::Constant::create(element::i64, Shape{}, {reduce_axis_val});
+        auto reduce_sum = std::make_shared<opset6::ReduceSum>(exp, reduce_axis, true);
         auto div = std::make_shared<opset6::Divide>(exp, reduce_sum);
         f = std::make_shared<Function>(NodeVector{div}, ParameterVector{data});
 
@@ -100,16 +92,10 @@ TEST_P(NegativeSoftmaxFusionFixture, NegativeSoftmaxFusion) {
     }
     {
         auto data = std::make_shared<opset6::Parameter>(element::f32, shape);
-        auto reduce_max_axes =
-            opset6::Constant::create(element::i64, Shape{reduce_max_axes_val.size()}, reduce_max_axes_val);
-        auto reduce_max = std::make_shared<opset6::ReduceMax>(data, reduce_max_axes);
-        auto sub = std::make_shared<opset6::Subtract>(data, reduce_max);
-        auto exp = std::make_shared<opset6::Exp>(sub);
-        auto reduce_sum_axes =
-            opset6::Constant::create(element::i64, Shape{reduce_sum_axes_val.size()}, reduce_sum_axes_val);
-        auto reduce_sum = std::make_shared<opset6::ReduceSum>(exp, reduce_sum_axes);
-        auto div = std::make_shared<opset6::Divide>(exp, reduce_sum);
-        f_ref = std::make_shared<Function>(NodeVector{div}, ParameterVector{data});
+        if (reduce_axis_val < 0)
+            reduce_axis_val += shape.size();
+        auto softmax = std::make_shared<opset6::Softmax>(data, reduce_axis_val);
+        f_ref = std::make_shared<Function>(NodeVector{softmax}, ParameterVector{data});
     }
 
     auto fc =
@@ -118,6 +104,50 @@ TEST_P(NegativeSoftmaxFusionFixture, NegativeSoftmaxFusion) {
     ASSERT_TRUE(res.valid) << res.message;
 }
 
+INSTANTIATE_TEST_SUITE_P(SoftmaxFusionSimplePatternTests,
+                         SoftmaxFusionSimplePatternFixture,
+                         ::testing::Values(std::make_tuple(0),
+                                           std::make_tuple(1),
+                                           std::make_tuple(2),
+                                           std::make_tuple(-1),
+                                           std::make_tuple(-2)));
+
+class NegativeSoftmaxFusionFixture
+    : public ::testing::TestWithParam<std::tuple<std::vector<int64_t>, std::vector<int64_t>>> {};
+
+TEST_P(NegativeSoftmaxFusionFixture, NegativeSoftmaxFusion) {
+    // ReduceMax arguments do not match conditions, therefore these nodes
+    // are not included into final SoftMax node
+    Shape shape{1, 1, 256};
+    auto params = GetParam();
+    auto reduce_max_axes_val = std::get<0>(params);
+    auto reduce_sum_axes_val = std::get<1>(params);
+    std::shared_ptr<Function> f(nullptr);
+
+    auto data = std::make_shared<opset6::Parameter>(element::f32, shape);
+    auto reduce_max_axes =
+        opset6::Constant::create(element::i64, Shape{reduce_max_axes_val.size()}, reduce_max_axes_val);
+    auto reduce_max = std::make_shared<opset6::ReduceMax>(data, reduce_max_axes);
+    auto sub = std::make_shared<opset6::Subtract>(data, reduce_max);
+    auto exp = std::make_shared<opset6::Exp>(sub);
+    auto reduce_sum_axes =
+        opset6::Constant::create(element::i64, Shape{reduce_sum_axes_val.size()}, reduce_sum_axes_val);
+    auto reduce_sum = std::make_shared<opset6::ReduceSum>(exp, reduce_sum_axes);
+    auto div = std::make_shared<opset6::Divide>(exp, reduce_sum);
+    f = std::make_shared<Function>(NodeVector{div}, ParameterVector{data});
+
+    auto unh = std::make_shared<ngraph::pass::UniqueNamesHolder>();
+    pass::Manager m;
+    m.register_pass<pass::InitUniqueNames>(unh);
+    m.register_pass<ov::pass::InitNodeInfo>();
+    m.register_pass<ov::pass::SoftmaxFusion>();
+    m.register_pass<pass::CheckUniqueNames>(unh);
+    m.run_passes(f);
+    ASSERT_NO_THROW(check_rt_info(f));
+    ASSERT_EQ(count_ops_of_type<opset6::ReduceMax>(f), 1);
+    ASSERT_EQ(count_ops_of_type<opset6::Subtract>(f), 1);
+}
+
 INSTANTIATE_TEST_SUITE_P(NegativeSoftmaxFusionTests,
                          NegativeSoftmaxFusionFixture,
                          ::testing::ValuesIn(std::vector<std::tuple<std::vector<int64_t>, std::vector<int64_t>>>{
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 7ce86295f1c338..5097e22d01a3b7 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -44,6 +44,7 @@
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
 #include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
 #include <transformations/common_optimizations/transpose_sinking.hpp>
+#include <transformations/common_optimizations/softmax_fusion.hpp>
 
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
@@ -190,6 +191,10 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
         type_to_fuse_map empty_fuse_map = {};
         manager.register_pass<ov::pass::Validate>();
+
+        // fuse softmax patterns so that they will not be marked as precision sensitive in ConvertPrecision
+        manager.register_pass<ov::pass::SoftmaxFusion>();
+
         //  call ConvertPrecision with keep_precision_sensitive_in_fp32 = true
         manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map, empty_fuse_map, true);
 

From c474f564a99acf3c17c76a62c8282e2dd526e71b Mon Sep 17 00:00:00 2001
From: Tatiana Savina <tatiana.savina@intel.com>
Date: Wed, 5 Apr 2023 09:34:07 +0200
Subject: [PATCH 242/296]  DOCS Change sample path  (#16738)

* change path

* change path for sample

* change architecture in path

* change windows sample comment
---
 docs/dev/installing.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/dev/installing.md b/docs/dev/installing.md
index 34297285e5adcb..cbe92b0e82b67f 100644
--- a/docs/dev/installing.md
+++ b/docs/dev/installing.md
@@ -175,16 +175,18 @@ The following commands run the Image Classification Code Sample using the [`dog.
 Linux and macOS:
 
 ```sh
-cd ~/inference_engine_cpp_samples_build/intel64/Release
+cd ~/openvino_cpp_samples_build/<architecture>/Release
 ./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d CPU
 ```
+where the <architecture> is the output of ``uname -m``, for example, ``intel64``, ``armhf``, or ``aarch64``.
 
 Windows:
 
 ```bat
-cd  %USERPROFILE%\Documents\Intel\OpenVINO\inference_engine_samples_build\intel64\Release
+cd  %USERPROFILE%\Documents\Intel\OpenVINO\openvino_cpp_samples_build\<architecture>\Release
 .\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d CPU
 ```
+where the <architecture> is either ``intel64`` or ``aarch64`` depending on the platform architecture.
 
 When the sample application is complete, you see the label and confidence data for the top 10 categories on the display:
 

From f2d4c9603204fd0560e74a3b800e23e938256ed7 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Wed, 5 Apr 2023 11:39:47 +0400
Subject: [PATCH 243/296] Fixed add_output for new subgraph (#16726)

---
 src/core/include/openvino/core/model.hpp |  2 ++
 src/core/src/model.cpp                   | 15 +++++++++++---
 src/core/tests/model.cpp                 | 25 ++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/src/core/include/openvino/core/model.hpp b/src/core/include/openvino/core/model.hpp
index e5d0158e538ff4..dae178b011a93f 100644
--- a/src/core/include/openvino/core/model.hpp
+++ b/src/core/include/openvino/core/model.hpp
@@ -10,6 +10,7 @@
 #include <memory>
 #include <mutex>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "openvino/core/any.hpp"
@@ -554,6 +555,7 @@ class OPENVINO_API Model : public std::enable_shared_from_this<Model> {
     // of weak_ptr not to increase node ref counter to prevent the situation when
     // node has no consumers but still exists in a graph.
     mutable std::vector<std::weak_ptr<Node>> m_cached_ordered_ops;
+    mutable std::unordered_set<Node*> m_cached_ops;
 
     mutable std::unordered_map<std::string, Output<Node>> m_cached_output_names;
     mutable std::unordered_map<std::string, std::weak_ptr<Node>> m_cached_op_names;
diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp
index 142514be45384b..d4fd18df72cc7c 100644
--- a/src/core/src/model.cpp
+++ b/src/core/src/model.cpp
@@ -318,6 +318,7 @@ std::vector<shared_ptr<ov::Node>> ov::Model::get_ordered_ops() const {
     m_cached_ordered_ops.clear();
     for_each(order.cbegin(), order.cend(), [this](const shared_ptr<Node>& node) {
         m_cached_ordered_ops.push_back(node);
+        m_cached_ops.insert(node.get());
         node->insert_info(m_shared_rt_info);
     });
     m_cached_output_names.clear();
@@ -923,6 +924,9 @@ ov::Output<ov::Node> ov::Model::add_output(const std::string& op_name, size_t ou
 }
 
 ov::Output<ov::Node> ov::Model::add_output(const ov::Output<ov::Node>& port) {
+    auto cache_valid = [&]() {
+        return m_cached_ops.count(port.get_node());
+    };
     if (ov::op::util::is_output(port.get_node()))
         return port;
     for (const auto& input : port.get_target_inputs()) {
@@ -934,9 +938,14 @@ ov::Output<ov::Node> ov::Model::add_output(const ov::Output<ov::Node>& port) {
     auto result = std::make_shared<ov::op::v0::Result>(port);
     m_results.push_back(result);
     if (m_shared_rt_info->get_use_topological_cache()) {
-        // Full update of topological cache is not needed, 'result' can be just inserted to the end
-        m_cached_ordered_ops.push_back(result);
-        result->insert_info(m_shared_rt_info);  // Just for consistency, not required for Result nodes
+        if (cache_valid()) {
+            // Full update of topological cache is not needed, 'result' can be just inserted to the end
+            m_cached_ordered_ops.push_back(result);
+            m_cached_ops.insert(result.get());
+            result->insert_info(m_shared_rt_info);  // Just for consistency, not required for Result nodes
+        } else {
+            m_shared_rt_info->set_use_topological_cache(false);
+        }
     }
     return result->output(0);
 }
diff --git a/src/core/tests/model.cpp b/src/core/tests/model.cpp
index f5550c62c79f11..3ba65dd8c0a44d 100644
--- a/src/core/tests/model.cpp
+++ b/src/core/tests/model.cpp
@@ -1034,6 +1034,31 @@ TEST(model, add_output_port) {
     EXPECT_EQ(f->get_results()[1]->input_value(0).get_node(), relu1.get());
 }
 
+TEST(model, add_output_to_new_subgraph) {
+    auto arg0 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1});
+    arg0->set_friendly_name("data");
+    arg0->get_output_tensor(0).set_names({"input"});
+
+    auto relu1 = std::make_shared<ov::opset8::Relu>(arg0);
+    relu1->set_friendly_name("relu1");
+    relu1->get_output_tensor(0).set_names({"relu_t1"});
+
+    auto relu2 = std::make_shared<ov::opset8::Relu>(relu1);
+    relu2->set_friendly_name("relu2");
+    relu2->get_output_tensor(0).set_names({"relu_t2"});
+    auto f = std::make_shared<ov::Model>(relu2, ov::ParameterVector{arg0});
+    f->validate_nodes_and_infer_types();
+
+    EXPECT_EQ(f->get_results().size(), 1);
+
+    ov::Output<ov::Node> out;
+    EXPECT_NO_THROW(
+        out = f->add_output(ov::opset8::Constant::create(ov::element::i32, {1}, std::vector<int32_t>{1})->output(0)));
+    EXPECT_NO_THROW(f->get_ordered_ops());
+    EXPECT_EQ(out.get_node(), f->get_results()[1].get());
+    EXPECT_EQ(f->get_results().size(), 2);
+}
+
 TEST(model, add_output_incorrect_tensor_name) {
     auto arg0 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1});
     arg0->set_friendly_name("data");

From 73ab0dd065fd720fe9a082d14852adea370834a1 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 5 Apr 2023 11:16:27 +0300
Subject: [PATCH 244/296] Fixing run_timest python script for input and output
 precision (#16661)

* Fixing run_timest python script for input and output precision

* Update code according to the PR review

* Update run_timetest according to the last review

* Add input_precision and output_precision to test_timetest as well

* Set input/output precision per model
---
 tests/time_tests/scripts/run_timetest.py      | 10 ++++++----
 tests/time_tests/test_runner/test_timetest.py |  8 ++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/time_tests/scripts/run_timetest.py b/tests/time_tests/scripts/run_timetest.py
index d935f48aa7dcb7..24795f790e4bc9 100644
--- a/tests/time_tests/scripts/run_timetest.py
+++ b/tests/time_tests/scripts/run_timetest.py
@@ -61,8 +61,8 @@ def prepare_executable_cmd(args: dict):
         str(args["executable"].resolve(strict=True)),
         "-m", str(args["model"].resolve(strict=True)),
         "-d", args["device"],
-        "-ip", args["input_precision"],
-        "-op", args["output_precision"],
+        *["-ip", args["input_precision"] if args["input_precision"] else ""],
+        *["-op", args["output_precision"] if args["output_precision"] else ""],
         "-c" if args["model_cache"] else ""
     ]
 
@@ -146,13 +146,15 @@ def cli_parser():
                         action="store_true",
                         help="Enable model cache usage")
     parser.add_argument("-ip",
+                        default="",
                         dest="input_precision",
                         type=str,
-                        help="Model input precision")
+                        help="Change input model precision")
     parser.add_argument("-op",
+                        default="",
                         dest="output_precision",
                         type=str,
-                        help="Model output precision")
+                        help="Change output model precision")
 
     args = parser.parse_args()
 
diff --git a/tests/time_tests/test_runner/test_timetest.py b/tests/time_tests/test_runner/test_timetest.py
index 1ada3f82f2dad0..28acb628545e3f 100644
--- a/tests/time_tests/test_runner/test_timetest.py
+++ b/tests/time_tests/test_runner/test_timetest.py
@@ -56,6 +56,12 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache, model_
     assert model_path, "Model path is empty"
     model_path = Path(expand_env_vars(model_path))
 
+    # Prepare input precision from model configuration
+    input_precision = instance["model"].get("input_precision")
+
+    # Prepare output precision from model configuration
+    output_precision = instance["model"].get("output_precision")
+
     # Copy model to a local temporary directory
     model_dir = temp_dir / "model"
     shutil.copytree(model_path.parent, model_dir)
@@ -67,6 +73,8 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache, model_
         "model": Path(model_path),
         "device": instance["device"]["name"],
         "niter": niter,
+        "input_precision": input_precision,
+        "output_precision": output_precision,
         "model_cache": model_cache,
     }
     logging.info("Run timetest once to generate any cache")

From 837f5a7d53c46c0e76138f330a688bf1cf564066 Mon Sep 17 00:00:00 2001
From: Ekaterina Aidova <ekaterina.aidova@intel.com>
Date: Wed, 5 Apr 2023 12:44:25 +0400
Subject: [PATCH 245/296] [PT FE]:  fix aten::index inconsistent reshape
 (#16741)

* [PT FE]:  fix aten::index inconsistent reshape

* add index name, return false

* Update src/frontends/pytorch/src/transforms/aten_index_replacer.cpp
---
 .../src/transforms/aten_index_replacer.cpp    | 55 ++++++++++---------
 tests/layer_tests/pytorch_tests/test_index.py | 47 +++++++++++++---
 2 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp
index 93d4214add1fdd..cf9bd5c9fa2d02 100644
--- a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp
@@ -47,12 +47,12 @@ std::shared_ptr<Node> flatten(const Output<Node>& value, size_t axis) {
     } else {
         const auto value_shape = std::make_shared<v3::ShapeOf>(value, element::i32);
         const auto value_rank = std::make_shared<v3::ShapeOf>(value_shape, element::i32);
-        const auto axis_node = v0::Constant::create(element::i32, Shape{}, {axis});
-        auto start = v0::Constant::create(element::i32, Shape{}, {0});
-        auto step = v0::Constant::create(element::i32, Shape{}, {1});
+        const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis});
+        auto start = v0::Constant::create(element::i32, Shape{1}, {0});
+        auto step = v0::Constant::create(element::i32, Shape{1}, {1});
         const auto first_part_dims = std::make_shared<v8::Slice>(value_shape, start, axis_node, step);
         auto zero = v0::Constant::create(element::i32, {}, {0});
-        auto first_part_dims_length = std::make_shared<ov::op::v1::ReduceProd>(first_part_dims, zero, true);
+        auto first_part_dims_length = std::make_shared<v1::ReduceProd>(first_part_dims, zero, true);
 
         auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1});
 
@@ -70,7 +70,7 @@ AtenIndexToSelect::AtenIndexToSelect() {
         if (!index_op) {
             return false;
         }
-        auto input_node = index_op->input_value(0).get_node_shared_ptr();
+        auto input_node = index_op->input_value(0);
         auto indicies = index_op->input_value(1).get_node_shared_ptr();
         auto list_indicies = cast_fw_node(indicies, "prim::ListConstruct");
         if (list_indicies) {
@@ -108,10 +108,10 @@ AtenIndexToSelect::AtenIndexToSelect() {
                         continue;
                     }
                 }
-                auto id_dtype = ids[i].get_node_shared_ptr()->get_element_type();
+                auto id_dtype = ids[i].get_element_type();
                 if (id_dtype == element::boolean || id_dtype == element::u8) {
-                    auto idx = std::make_shared<ov::op::v0::Convert>(ids[i], element::u8);
-                    auto nonzero = std::make_shared<ov::op::v3::NonZero>(idx);
+                    auto idx = std::make_shared<v0::Convert>(ids[i], element::u8);
+                    auto nonzero = std::make_shared<v3::NonZero>(idx, element::i32);
                     auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0});
                     auto masked_id = std::make_shared<v1::Transpose>(nonzero, input_order);
                     masked_indicies.push_back(masked_id);
@@ -125,30 +125,32 @@ AtenIndexToSelect::AtenIndexToSelect() {
 
             // all indicies prim::Constant(None), return input as is
             if (advanced_ids.size() == 0) {
-                copy_runtime_info({index_op, input_node}, input_node);
-                replace_node(index_op, input_node);
+                replace_node(index_op, input_node.get_node_shared_ptr());
                 return true;
             }
             // perform gather for single element case
             if (advanced_ids.size() == 1) {
                 auto index = masked_indicies[advanced_ids[0]];
-                index = std::make_shared<v0::Convert>(index, element::i32);
                 if (is_masked_bool[advanced_ids[0]]) {
                     auto gather = std::make_shared<v8::GatherND>(input_node, index);
-                    copy_runtime_info({index_op, input_node, indicies}, gather);
+                    copy_runtime_info({index_op, indicies}, gather);
+                    gather->set_friendly_name(index_op->get_friendly_name());
                     replace_node(index_op, gather);
                     return true;
                 }
+                index = std::make_shared<v0::Convert>(index, element::i32);
                 auto dim = v0::Constant::create(element::i32, Shape{}, {advanced_ids[0]});
                 auto gather = std::make_shared<v8::Gather>(input_node, index, dim);
-                copy_runtime_info({index_op, input_node, indicies}, gather);
+                copy_runtime_info({index_op, indicies}, gather);
+                gather->set_friendly_name(index_op->get_friendly_name());
                 replace_node(index_op, gather);
                 return true;
             }
             auto adv_idx_count = advanced_ids.size();
-            auto rank = input_node->get_input_partial_shape(0).rank();
+            auto rank = input_node.get_partial_shape().rank();
+            // index transformation supports only tensors with static rank
             if (rank.is_dynamic()) {
-                FRONT_END_CHECK_IMPLEMENTED(false, "indexing for tensor with dynamic rank is not implemented ");
+                return false;
             }
             auto input_shape = std::make_shared<v3::ShapeOf>(input_node, element::i32);
             auto zero = v0::Constant::create(element::i32, Shape{}, {0});
@@ -166,9 +168,11 @@ AtenIndexToSelect::AtenIndexToSelect() {
             auto transposed_input = std::make_shared<v1::Transpose>(input_node, transpose_dims);
             auto flatten_input = flatten(transposed_input, adv_idx_count);
             auto cum_adv_index = masked_indicies[advanced_ids[adv_idx_count - 1]];
+            cum_adv_index = std::make_shared<v0::Convert>(cum_adv_index, element::i32);
             auto multiplier = input_dims->output(advanced_ids[adv_idx_count - 1]);
-            for (int i = static_cast<int>(adv_idx_count) - 2; i > 0; i--) {
-                auto adv_index = std::make_shared<v1::Multiply>(masked_indicies[i], multiplier);
+            for (int i = static_cast<int>(adv_idx_count) - 2; i > -1; i--) {
+                auto m_idx = std::make_shared<v0::Convert>(masked_indicies[i], element::i32);
+                auto adv_index = std::make_shared<v1::Multiply>(m_idx, multiplier);
                 cum_adv_index = std::make_shared<v1::Add>(cum_adv_index, adv_index);
                 auto input_id = advanced_ids[i];
                 multiplier = std::make_shared<v1::Multiply>(multiplier, input_dims->output(input_id));
@@ -204,7 +208,7 @@ AtenIndexToSelect::AtenIndexToSelect() {
                     v0::Constant::create(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute);
                 gather = std::make_shared<v1::Transpose>(gather, permute_indicies);
                 // unfold advanced index axes
-                for (size_t i = 0; i <= advanced_ids[0]; i++) {
+                for (size_t i = 0; i < advanced_ids[0]; i++) {
                     concat_dims.push_back(input_dims->output(i));
                 }
                 concat_dims.push_back(cum_adv_index_shape_tensor);
@@ -223,8 +227,9 @@ AtenIndexToSelect::AtenIndexToSelect() {
             }
             auto final_shape = std::make_shared<v0::Concat>(concat_dims, 0);
             gather = std::make_shared<v1::Reshape>(gather, final_shape, false);
-            copy_runtime_info({index_op, input_node, indicies}, gather);
+            copy_runtime_info({index_op, indicies}, gather);
             replace_node(index_op, gather);
+            gather->set_friendly_name(index_op->get_friendly_name());
             return true;
 
         } else {
@@ -234,28 +239,28 @@ AtenIndexToSelect::AtenIndexToSelect() {
                 // index is None, stay input as is
                 const auto& attrs = const_input->get_attrs();
                 if (attrs.find("none_value") != attrs.end()) {
-                    copy_runtime_info({index_op, input_node, indicies}, input_node);
-                    replace_node(index_op, input_node);
+                    replace_node(index_op, input_node.get_node_shared_ptr());
                     return true;
                 }
             }
             auto index_dtype = indicies->get_output_element_type(0);
             if (index_dtype == element::boolean || index_dtype == element::u8) {
-                auto nonzero = std::make_shared<v3::NonZero>(indicies);
+                auto nonzero = std::make_shared<v3::NonZero>(indicies, element::i32);
                 auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0});
                 auto masked_id = std::make_shared<v1::Transpose>(nonzero, input_order);
                 auto gather = std::make_shared<v8::GatherND>(input_node, masked_id);
-                copy_runtime_info({index_op, input_node, indicies}, gather);
+                copy_runtime_info({index_op, indicies}, gather);
                 replace_node(index_op, gather);
                 return true;
             }
-            if (index_dtype != element::i32 && index_dtype != element::i32) {
+            if (index_dtype != element::i32) {
                 indicies = std::make_shared<ov::op::v0::Convert>(indicies, element::i32);
             }
             auto dim = v0::Constant::create(element::i32, Shape{}, {0});
             auto gather = std::make_shared<v8::Gather>(input_node, indicies, dim);
-            copy_runtime_info({index_op, input_node, indicies}, gather);
+            copy_runtime_info({index_op, indicies}, gather);
             replace_node(index_op, gather);
+            gather->set_friendly_name(index_op->get_friendly_name());
             return true;
         }
         return false;
diff --git a/tests/layer_tests/pytorch_tests/test_index.py b/tests/layer_tests/pytorch_tests/test_index.py
index 967ef4c98afb6e..c4e303d244f0d0 100644
--- a/tests/layer_tests/pytorch_tests/test_index.py
+++ b/tests/layer_tests/pytorch_tests/test_index.py
@@ -24,7 +24,6 @@ class aten_index_getitem(torch.nn.Module):
 
             def forward(self, x, idx):
                 return x.__getitem__(idx)
-        
 
         class aten_index_list_bool(torch.nn.Module):
 
@@ -52,13 +51,14 @@ def forward(self, x, idx):
     @pytest.mark.precommit
     @pytest.mark.parametrize("case", ["list", "getitem"])
     @pytest.mark.parametrize(("input_shape", "idx"), [
-        ((1,), np.array(0).astype(int)), 
-        ([2, 3], np.array(-1).astype(int)), 
-        ([4, 5, 6], np.array((1, 2)).astype(int)), 
-        ([7, 8, 9], np.array((-1,  2, -3)).astype(int)), 
+        ((1,), np.array(0).astype(int)),
+        ([2, 3], np.array(-1).astype(int)),
+        ([4, 5, 6], np.array((1, 2)).astype(int)),
+        ([7, 8, 9], np.array((-1,  2, -3)).astype(int)),
         ([2, 2, 3, 4], np.array((1,)).astype(int))])
     def test_index(self, input_shape, idx, case, ie_device, precision, ir_version):
-        self._test(*self.create_model(case), ie_device, precision, ir_version, kwargs_to_prepare_input={"input_shape": input_shape, "idx": idx})
+        self._test(*self.create_model(case), ie_device, precision, ir_version,
+                   kwargs_to_prepare_input={"input_shape": input_shape, "idx": idx})
 
     @pytest.mark.nightly
     @pytest.mark.precommit
@@ -68,6 +68,37 @@ def test_index(self, input_shape, idx, case, ie_device, precision, ir_version):
         ((2, 2, 5), np.zeros([2, 2, 5]).astype(bool)),
         ((2, 2, 5), np.ones([2, 2, 5]).astype(bool)),
         ((2, 2, 5), np.random.rand(2, 2, 5) > 0)
-        ])
+    ])
     def test_index_bool(self, input_shape, idx, case, ie_device, precision, ir_version):
-        self._test(*self.create_model(case), ie_device, precision, ir_version, kwargs_to_prepare_input={"input_shape": input_shape, "idx": idx})
\ No newline at end of file
+        self._test(*self.create_model(case), ie_device, precision, ir_version,
+                   kwargs_to_prepare_input={"input_shape": input_shape, "idx": idx})
+
+
+class TestIndexRange(PytorchLayerTest):
+    def _prepare_input(self, input_shape, idx):
+        import numpy as np
+        return (np.random.randn(*input_shape).astype(np.float32), np.array(idx).astype(np.int32))
+
+    def create_model(self):
+        import torch
+
+        class aten_index_unsqueeze(torch.nn.Module):
+
+            def forward(self, x, y):
+                x = x.reshape(x.shape[0], -1)
+                return x[torch.arange(x.shape[0]), y]
+
+        ref_net = None
+
+        return aten_index_unsqueeze(), ref_net, "aten::index"
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize(("input_shape", "idx"), (
+        ((1, 1), [0]),
+        ([2, 3], [1, 2]),
+        ([7, 8, 9], [1]),
+        ([2, 2, 3, 4], [0])))
+    def test_index_range(self, input_shape, idx, ie_device, precision, ir_version):
+        self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={
+                   "input_shape": input_shape, "idx": idx}, trace_model=True, dynamic_shapes=False)

From 409843423344966308836ef9ad4c28c28bc56223 Mon Sep 17 00:00:00 2001
From: Anastasiia Pnevskaia <anastasia.popova@intel.com>
Date: Wed, 5 Apr 2023 12:48:13 +0200
Subject: [PATCH 246/296] Parameter list and descriptions for
 mo.convert_model() method in docstring (#16459)

* Added convert_model() params docs.

* Added auto-generating of most cli params.

* Added auto-generating of cli params.

* Small correction.

* Removed wrong change.

* Corrected default values.

* Fixed errors, added tests.

* Small correction.

* Corrected params descriptions, moved cli specific params to separate file.

* Moved params specifics to utils/help.py.
---
 .../mo_python_api_tests/mo_convert_help.py    |   7 +
 .../mo_python_api_tests/test_mo_help.py       |  39 ++
 tools/mo/automation/package_BOM.txt           |   1 +
 tools/mo/openvino/tools/mo/convert.py         | 314 ++++++++-
 tools/mo/openvino/tools/mo/convert_impl.py    |  80 ++-
 tools/mo/openvino/tools/mo/main.py            |   2 +-
 tools/mo/openvino/tools/mo/main_paddle.py     |   2 +-
 .../mo/openvino/tools/mo/utils/cli_parser.py  | 655 ++++--------------
 tools/mo/openvino/tools/mo/utils/help.py      | 164 +++++
 .../mo/unit_tests/mo/utils/cli_parser_test.py |  49 +-
 10 files changed, 753 insertions(+), 560 deletions(-)
 create mode 100644 tests/layer_tests/mo_python_api_tests/mo_convert_help.py
 create mode 100644 tests/layer_tests/mo_python_api_tests/test_mo_help.py
 create mode 100644 tools/mo/openvino/tools/mo/utils/help.py

diff --git a/tests/layer_tests/mo_python_api_tests/mo_convert_help.py b/tests/layer_tests/mo_python_api_tests/mo_convert_help.py
new file mode 100644
index 00000000000000..ff9c334b61461b
--- /dev/null
+++ b/tests/layer_tests/mo_python_api_tests/mo_convert_help.py
@@ -0,0 +1,7 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino.tools.mo import convert_model
+
+if __name__ == "__main__":
+    convert_model(help=True)
diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_help.py b/tests/layer_tests/mo_python_api_tests/test_mo_help.py
new file mode 100644
index 00000000000000..e88dee11debacf
--- /dev/null
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_help.py
@@ -0,0 +1,39 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import os
+import sys
+import unittest
+from openvino.tools.mo import mo
+from openvino.tools.mo.utils.cli_parser import get_mo_convert_params
+from pathlib import Path
+
+from common.utils.common_utils import shell
+
+
+class TestSubprocessMoConvert(unittest.TestCase):
+    def test_mo_convert(self):
+        mo_convert_params = get_mo_convert_params()
+
+        # Test cli tool help
+        mo_path = Path(mo.__file__).parent
+        mo_runner = mo_path.joinpath('main.py').as_posix()
+        params = [sys.executable, mo_runner, "--help"]
+        _, mo_output, _ = shell(params)
+
+        # We don't expect PyTorch specific parameters to be in help message of the MO tool.
+        for group in mo_convert_params:
+            if group == 'Pytorch-specific parameters:':
+                continue
+            for param_name in group:
+                assert param_name in mo_output
+
+        # Test Python API help
+        mo_help_file = os.path.join(os.path.dirname(__file__), "mo_convert_help.py")
+        params = [sys.executable, mo_help_file]
+        _, mo_output, _ = shell(params)
+
+        for group in mo_convert_params:
+            for param_name in group:
+                assert param_name in mo_output
\ No newline at end of file
diff --git a/tools/mo/automation/package_BOM.txt b/tools/mo/automation/package_BOM.txt
index edef7c7f617837..a22c55e0441419 100644
--- a/tools/mo/automation/package_BOM.txt
+++ b/tools/mo/automation/package_BOM.txt
@@ -1036,6 +1036,7 @@ openvino/tools/mo/utils/find_inputs.py
 openvino/tools/mo/utils/get_ov_update_message.py
 openvino/tools/mo/utils/graph.py
 openvino/tools/mo/utils/guess_framework.py
+openvino/tools/mo/utils/help.py
 openvino/tools/mo/utils/ie_version.py
 openvino/tools/mo/utils/import_extensions.py
 openvino/tools/mo/utils/ir_engine/__init__.py
diff --git a/tools/mo/openvino/tools/mo/convert.py b/tools/mo/openvino/tools/mo/convert.py
index cc1f55b181cdb9..228df63c2e702f 100644
--- a/tools/mo/openvino/tools/mo/convert.py
+++ b/tools/mo/openvino/tools/mo/convert.py
@@ -1,9 +1,11 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-
+import os
+import pathlib
 from collections import namedtuple
+from typing import Any
 
-from openvino.frontend import FrontEndManager
+from openvino.runtime import PartialShape, Shape, Layout
 
 from openvino.tools.mo.convert_impl import _convert
 from openvino.tools.mo.utils.cli_parser import get_all_cli_parser
@@ -13,12 +15,83 @@
 LayoutMap = namedtuple("LayoutMap", ["source_layout", "target_layout"])
 
 
-def convert_model(input_model=None, **args):
+def convert_model(
+        input_model: [str, pathlib.Path, Any] = None,
+
+        # Optional parameters
+        help: bool = False,
+        framework: [str] = None,
+
+        # Framework-agnostic parameters
+        input: [str, list, tuple, InputCutInfo] = None,
+        output: [str, list] = None,
+        input_shape: [str, PartialShape, Shape, list] = None,
+        batch: int = None,
+        mean_values: [str, dict, list] = (),
+        scale_values: [str, dict, list] = (),
+        scale: [str, float] = None,
+        reverse_input_channels: bool = False,
+        source_layout: [str, Layout, dict] = (),
+        target_layout: [str, Layout, dict] = (),
+        layout: [str, Layout, LayoutMap, list, dict] = (),
+        compress_to_fp16: bool = True,
+        extensions: [str, pathlib.Path, list, Any] = None,
+        transform: [str, list, tuple] = "",
+        transformations_config: [str, pathlib.Path] = None,
+        silent: bool = True,
+        log_level: str = 'ERROR',
+        version: bool = None,
+        progress: bool = False,
+        stream_output: bool = False,
+
+        # PyTorch-specific parameters:
+        example_input: Any = None,
+        onnx_opset_version: int = None,
+
+        # TensorFlow*-specific parameters
+        input_model_is_text: bool = None,
+        input_checkpoint: [str, pathlib.Path] = None,
+        input_meta_graph: [str, pathlib.Path] = None,
+        saved_model_dir: [str, pathlib.Path] = None,
+        saved_model_tags: [str, list] = None,
+        tensorflow_custom_operations_config_update: [str, pathlib.Path] = None,
+        tensorflow_object_detection_api_pipeline_config: [str, pathlib.Path] = None,
+        tensorboard_logdir: [str, pathlib.Path] = None,
+        tensorflow_custom_layer_libraries: [str, pathlib.Path] = None,
+
+        # MXNet-specific parameters:
+        input_symbol: [str, pathlib.Path] = None,
+        nd_prefix_name: str = None,
+        pretrained_model_name: str = None,
+        save_params_from_nd: bool = None,
+        legacy_mxnet_model: bool = None,
+        enable_ssd_gluoncv: bool = False,
+
+        # Caffe*-specific parameters:
+        input_proto: [str, pathlib.Path] = None,
+        caffe_parser_path: [str, pathlib.Path] = os.path.join(os.path.dirname(__file__), 'front', 'caffe', 'proto'),
+        k: [str, pathlib.Path] = os.path.join(os.path.dirname(__file__), 'front', 'caffe', 'CustomLayersMapping.xml'),
+        disable_omitting_optional: bool = False,
+        enable_flattening_nested_params: bool = False,
+
+        # Kaldi-specific parameters:
+        counts: [str, pathlib.Path] = None,
+        remove_output_softmax: bool = False,
+        remove_memory: bool = False,
+
+        **args
+):
     """
     Converts the model from original framework to OpenVino Model.
 
     Args:
-        input_model:
+        :param help:
+            Print available parameters.
+        :param framework:
+            Name of the framework used to train the input model.
+
+    Framework-agnostic parameters:
+        :param input_model:
             Model object in original framework (PyTorch, Tensorflow) or path to model file.
             Tensorflow*: a file with a pre-trained model (binary or text .pb file after freezing).
             Caffe*: a model proto file with model weights
@@ -43,17 +116,236 @@ def convert_model(input_model=None, **args):
             tf.train.checkpoint
             tf.python.training.tracking.base.Trackable for case when it is output from tf.saved_model.load()
 
-    Run convert(help=true) to list all available parameters.
+        :param input:
+            Input can be set by passing a list of InputCutInfo objects or by a list
+            of tuples. Each tuple should contain input name and optionally input
+            type or input shape. Example: input=("op_name", PartialShape([-1,
+            3, 100, 100]), Type(np.float32)). Alternatively input can be set by
+            a string or list of strings of the following format. Quoted list of comma-separated
+            input nodes names with shapes, data types, and values for freezing.
+            The order of inputs in converted model is the same as order of specified
+            operation names. The shape and value are specified as comma-separated
+            lists. The data type of input node is specified in braces and can have
+            one of the values: f64 (float64), f32 (float32), f16 (float16), i64
+            (int64), i32 (int32), u8 (uint8), boolean (bool). Data type is optional.
+            If it's not specified explicitly then there are two options: if input
+            node is a parameter, data type is taken from the original node dtype,
+            if input node is not a parameter, data type is set to f32. Example, to set
+            `input_1` with shape [1,100], and Parameter node `sequence_len` with
+            scalar input with value `150`, and boolean input `is_training` with
+            `False` value use the following format: "input_1[1,100],sequence_len->150,is_training->False".
+            Another example, use the following format to set input port 0 of the node
+            `node_name1` with the shape [3,4] as an input node and freeze output
+            port 1 of the node `node_name2` with the value [20,15] of the int32 type
+            and shape [2]: "0:node_name1[3,4],node_name2:1[2]{i32}->[20,15]".
+        :param output:
+            The name of the output operation of the model or list of names. For TensorFlow*,
+            do not add :0 to this name.The order of outputs in converted model is the
+            same as order of specified operation names.
+        :param input_shape:
+            Input shape(s) that should be fed to an input node(s) of the model. Input
+            shapes can be defined by passing a list of objects of type PartialShape,
+            Shape, [Dimension, ...] or [int, ...] or by a string of the following
+            format. Shape is defined as a comma-separated list of integer numbers
+            enclosed in parentheses or square brackets, for example [1,3,227,227]
+            or (1,227,227,3), where the order of dimensions depends on the framework
+            input layout of the model. For example, [N,C,H,W] is used for ONNX* models
+            and [N,H,W,C] for TensorFlow* models. The shape can contain undefined
+            dimensions (? or -1) and should fit the dimensions defined in the input
+            operation of the graph. Boundaries of undefined dimension can be specified
+            with ellipsis, for example [1,1..10,128,128]. One boundary can be
+            undefined, for example [1,..100] or [1,3,1..,1..]. If there are multiple
+            inputs in the model, --input_shape should contain definition of shape
+            for each input separated by a comma, for example: [1,3,227,227],[2,4]
+            for a model with two inputs with 4D and 2D shapes. Alternatively, specify
+            shapes with the --input option.
+        :param batch:
+            Input batch size
+        :param mean_values:
+            Mean values to be used for the input image per channel. Mean values can
+            be set by passing a dictionary, where key is input name and value is mean
+            value. For example mean_values={'data':[255,255,255],'info':[255,255,255]}.
+            Or mean values can be set by a string of the following format. Values to
+            be provided in the (R,G,B) or [R,G,B] format. Can be defined for desired
+            input of the model, for example: "--mean_values data[255,255,255],info[255,255,255]".
+            The exact meaning and order of channels depend on how the original model
+            was trained.
+        :param scale_values:
+            Scale values to be used for the input image per channel. Scale values
+            can be set by passing a dictionary, where key is input name and value is
+            scale value. For example scale_values={'data':[255,255,255],'info':[255,255,255]}.
+            Or scale values can be set by a string of the following format. Values
+            are provided in the (R,G,B) or [R,G,B] format. Can be defined for desired
+            input of the model, for example: "--scale_values data[255,255,255],info[255,255,255]".
+            The exact meaning and order of channels depend on how the original model
+            was trained. If both --mean_values and --scale_values are specified,
+            the mean is subtracted first and then scale is applied regardless of
+            the order of options in command line.
+        :param scale:
+            All input values coming from original network inputs will be divided
+            by this value. When a list of inputs is overridden by the --input parameter,
+            this scale is not applied for any input that does not match with the original
+            input of the model. If both --mean_values and --scale  are specified,
+            the mean is subtracted first and then scale is applied regardless of
+            the order of options in command line.
+        :param reverse_input_channels:
+            Switch the input channels order from RGB to BGR (or vice versa). Applied
+            to original inputs of the model if and only if a number of channels equals
+            3. When --mean_values/--scale_values are also specified, reversing
+            of channels will be applied to user's input data first, so that numbers
+            in --mean_values and --scale_values go in the order of channels used
+            in the original model. In other words, if both options are specified,
+            then the data flow in the model looks as following: Parameter -> ReverseInputChannels
+            -> Mean apply-> Scale apply -> the original body of the model.
+        :param source_layout:
+            Layout of the input or output of the model in the framework. Layout can
+            be set by passing a dictionary, where key is input name and value is LayoutMap
+            object. Or layout can be set by string of the following format. Layout
+            can be specified in the short form, e.g. nhwc, or in complex form, e.g.
+            "[n,h,w,c]". Example for many names: "in_name1([n,h,w,c]),in_name2(nc),out_name1(n),out_name2(nc)".
+            Layout can be partially defined, "?" can be used to specify undefined
+            layout for one dimension, "..." can be used to specify undefined layout
+            for multiple dimensions, for example "?c??", "nc...", "n...c", etc.
+        :param target_layout:
+            Same as --source_layout, but specifies target layout that will be in
+            the model after processing by ModelOptimizer.
+        :param layout:
+            Combination of --source_layout and --target_layout. Can't be used
+            with either of them. If model has one input it is sufficient to specify
+            layout of this input, for example --layout nhwc. To specify layouts
+            of many tensors, names must be provided, for example: --layout "name1(nchw),name2(nc)".
+            It is possible to instruct ModelOptimizer to change layout, for example:
+                --layout "name1(nhwc->nchw),name2(cn->nc)".
+            Also "*" in long layout form can be used to fuse dimensions, for example "[n,c,...]->[n*c,...]".
+        :param compress_to_fp16:
+            If the original model has FP32 weights or biases, they are compressed
+            to FP16. All intermediate data is kept in original precision. Option
+            can be specified alone as "--compress_to_fp16", or explicit True/False
+            values can be set, for example: "--compress_to_fp16=False", or "--compress_to_fp16=True"
+        :param extensions:
+            Paths to libraries (.so or .dll) with extensions, comma-separated
+            list of paths, objects derived from BaseExtension class or lists of
+            objects. For the legacy MO path (if `--use_legacy_frontend` is used),
+            a directory or a comma-separated list of directories with extensions
+            are supported. To disable all extensions including those that are placed
+            at the default location, pass an empty string.
+        :param transform:
+            Apply additional transformations. 'transform' can be set by a list
+            of tuples, where the first element is transform name and the second element
+            is transform parameters. For example: [('LowLatency2', {{'use_const_initializer':
+            False}}), ...]"--transform transformation_name1[args],transformation_name2..."
+            where [args] is key=value pairs separated by semicolon. Examples:
+                     "--transform LowLatency2" or
+                     "--transform Pruning" or
+                     "--transform LowLatency2[use_const_initializer=False]" or
+                     "--transform "MakeStateful[param_res_names=
+            {'input_name_1':'output_name_1','input_name_2':'output_name_2'}]""
+            Available transformations: "LowLatency2", "MakeStateful", "Pruning"
+        :param transformations_config:
+            Use the configuration file with transformations description or pass
+            object derived from BaseExtension class. Transformations file can
+            be specified as relative path from the current directory, as absolute
+            path or as relative path from the mo root directory.
+        :param silent:
+            Prevent any output messages except those that correspond to log level
+            equals ERROR, that can be set with the following option: --log_level.
+            By default, log level is already ERROR.
+        :param log_level:
+            Logger level of logging massages from MO.
+            Expected one of ['CRITICAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'].
+        :param version:
+            Version of Model Optimizer
+        :param progress:
+            Enable model conversion progress display.
+        :param stream_output:
+            Switch model conversion progress display to a multiline mode.
+
+    PyTorch-specific parameters:
+        :param example_input:
+            Sample of model input in original framework. For PyTorch it can be torch.Tensor.
+        :param onnx_opset_version:
+            Version of ONNX opset that is used for converting from PyTorch to ONNX.
+
+    TensorFlow*-specific parameters:
+        :param input_model_is_text:
+            TensorFlow*: treat the input model file as a text protobuf format. If
+            not specified, the Model Optimizer treats it as a binary file by default.
+        :param input_checkpoint:
+            TensorFlow*: variables file to load.
+        :param input_meta_graph:
+            Tensorflow*: a file with a meta-graph of the model before freezing
+        :param saved_model_dir:
+            TensorFlow*: directory with a model in SavedModel format of TensorFlow
+            1.x or 2.x version.
+        :param saved_model_tags:
+            Group of tag(s) of the MetaGraphDef to load, in string format, separated
+            by ','. For tag-set contains multiple tags, all tags must be passed in.
+        :param tensorflow_custom_operations_config_update:
+            TensorFlow*: update the configuration file with node name patterns
+            with input/output nodes information.
+        :param tensorflow_object_detection_api_pipeline_config:
+            TensorFlow*: path to the pipeline configuration file used to generate
+            model created with help of Object Detection API.
+        :param tensorboard_logdir:
+            TensorFlow*: dump the input graph to a given directory that should be
+            used with TensorBoard.
+        :param tensorflow_custom_layer_libraries:
+            TensorFlow*: comma separated list of shared libraries with TensorFlow*
+            custom operations implementation.
+
+    MXNet-specific parameters:
+        :param input_symbol:
+            Symbol file (for example, model-symbol.json) that contains a topology
+            structure and layer attributes
+        :param nd_prefix_name:
+            Prefix name for args.nd and argx.nd files.
+        :param pretrained_model_name:
+            Name of a pretrained MXNet model without extension and epoch number.
+            This model will be merged with args.nd and argx.nd files
+        :param save_params_from_nd:
+            Enable saving built parameters file from .nd files
+        :param legacy_mxnet_model:
+            Enable MXNet loader to make a model compatible with the latest MXNet
+            version. Use only if your model was trained with MXNet version lower
+            than 1.0.0
+        :param enable_ssd_gluoncv:
+            Enable pattern matchers replacers for converting gluoncv ssd topologies.
+
+    Caffe*-specific parameters:
+        :param input_proto:
+            Deploy-ready prototxt file that contains a topology structure and
+            layer attributes
+        :param caffe_parser_path:
+            Path to Python Caffe* parser generated from caffe.proto
+        :param k:
+            Path to CustomLayersMapping.xml to register custom layers
+        :param disable_omitting_optional:
+            Disable omitting optional attributes to be used for custom layers.
+            Use this option if you want to transfer all attributes of a custom layer
+            to IR. Default behavior is to transfer the attributes with default values
+            and the attributes defined by the user to IR.
+        :param enable_flattening_nested_params:
+            Enable flattening optional params to be used for custom layers. Use
+            this option if you want to transfer attributes of a custom layer to IR
+            with flattened nested parameters. Default behavior is to transfer
+            the attributes without flattening nested parameters.
+
+    Kaldi-specific parameters:
+        :param counts:
+            Path to the counts file
+        :param remove_output_softmax:
+            Removes the SoftMax layer that is the output layer
+        :param remove_memory:
+            Removes the Memory layer and use additional inputs outputs instead
 
     Returns:
         openvino.runtime.Model
     """
-    args.update({'input_model': input_model})
+    params = locals()
     logger_state = get_logger_state()
-
-    cli_parser = get_all_cli_parser(FrontEndManager())
-    framework = None if 'framework' not in args else args['framework']
-
-    ov_model, _ = _convert(cli_parser, framework, args)
+    del params['args']
+    params.update(args)
+    cli_parser = get_all_cli_parser()
+    ov_model, _ = _convert(cli_parser, framework, params)
     restore_logger_state(logger_state)
     return ov_model
diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py
index abcf84b2613bb5..8e68777697a38f 100644
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -31,7 +31,7 @@
     get_common_cli_options, get_freeze_placeholder_values, get_kaldi_cli_options, get_layout_values, \
     get_mean_scale_dictionary, get_mxnet_cli_options, get_onnx_cli_options, \
     get_placeholder_shapes, get_tf_cli_options, parse_transform, parse_tuple_pairs, \
-    mo_convert_params, get_model_name_from_args, depersonalize
+    get_model_name_from_args, depersonalize, get_mo_convert_params
 
 from openvino.tools.mo.utils.error import Error
 from openvino.tools.mo.utils.find_ie_version import find_ie_version
@@ -131,6 +131,8 @@ def print_argv(argv: argparse.Namespace, is_caffe: bool, is_tf: bool, is_mxnet:
 def arguments_post_parsing(argv: argparse.Namespace):
     use_legacy_frontend = argv.use_legacy_frontend
     use_new_frontend = argv.use_new_frontend
+    if argv.extensions is None:
+        argv.extensions = [import_extensions.default_path()]
 
     if use_new_frontend and use_legacy_frontend:
         raise Error('Options --use_new_frontend and --use_legacy_frontend must not be used simultaneously '
@@ -608,36 +610,53 @@ def driver(argv: argparse.Namespace, non_default_params: dict):
 
 
 def args_dict_to_list(cli_parser, **kwargs):
+    # This method is needed to prepare args from convert_model() for args_parse().
+    # The method will not be needed when cli_parser checks are moved from cli_parser to a separate pass.
+    import inspect
+    from openvino.tools.mo import convert_model
+    signature = inspect.signature(convert_model)
     result = []
     for key, value in kwargs.items():
-        if value is not None and cli_parser.get_default(key) != value:
-            # skip parser checking for non str objects
-            if not isinstance(value, str):
-                continue
-            result.append('--{}'.format(key))
-            if not isinstance(value, bool):
-                result.append(value)
+        if value is None:
+            continue
+        if key in signature.parameters and signature.parameters[key].default == value:
+            continue
+        if cli_parser.get_default(key) == value:
+            continue
+        # skip parser checking for non str objects
+        if not isinstance(value, (str, bool)):
+            continue
+        result.append('--{}'.format(key))
+        if not isinstance(value, bool):
+            result.append(value)
 
     return result
 
 
 def get_non_default_params(argv, cli_parser):
     import numbers
+    import inspect
+    from openvino.tools.mo import convert_model
+
+    signature = inspect.signature(convert_model)
     # make dictionary with parameters which have non-default values to be serialized in IR in rt_info
     non_default_params = {}
     for arg, arg_value in vars(argv).items():
-        if arg_value != cli_parser.get_default(arg):
-            value = depersonalize(arg_value, arg)
-            # Skip complex classes in params to prevent
-            # serializing it to rt_info
-            if isinstance(value, (str, bool, numbers.Number)):
-                non_default_params[arg] = value
+        if arg in signature.parameters and arg_value == signature.parameters[arg].default:
+            continue
+        if arg_value == cli_parser.get_default(arg):
+            continue
+        value = depersonalize(arg_value, arg)
+        # Skip complex classes in params to prevent
+        # serializing it to rt_info
+        if isinstance(value, (str, bool, numbers.Number)):
+            non_default_params[arg] = value
     return non_default_params
 
 
 def params_to_string(**kwargs):
     all_params = {}
-    for key, value in mo_convert_params.items():
+    for key, value in get_mo_convert_params().items():
         all_params.update(value)
 
     for key, value in kwargs.items():
@@ -649,7 +668,7 @@ def params_to_string(**kwargs):
 
 
 def add_line_breaks(text: str, char_num: int, line_break: str):
-    words = text.split(" ")
+    words = text.replace('\n', "\n ").split(" ")
     cnt = 0
     for i, w in enumerate(words):
         cnt += len(w)
@@ -664,26 +683,12 @@ def add_line_breaks(text: str, char_num: int, line_break: str):
 
 
 def show_mo_convert_help():
+    mo_convert_params = get_mo_convert_params()
     for group_name, group in mo_convert_params.items():
-        if group_name == "optional":
-            print("optional arguments:")
-        elif group_name == "fw_agnostic":
-            print("Framework-agnostic parameters:")
-        elif group_name == "tf":
-            print("TensorFlow*-specific parameters:")
-        elif group_name == "caffe":
-            print("Caffe*-specific parameters:")
-        elif group_name == "mxnet":
-            print("Mxnet-specific parameters:")
-        elif group_name == "kaldi":
-            print("Kaldi-specific parameters:")
-        elif group_name == "pytorch":
-            print("Pytorch-specific parameters:")
-        else:
-            raise Error("Unknown parameters group {}.".format(group_name))
+        print(group_name)
         for param_name in group:
             param_data = group[param_name]
-            text = param_data.description.format(param_data.possible_types_python_api)
+            text = param_data.description.replace("    ", '')
             text = add_line_breaks(text, 56, "\n\t\t\t")
             print("  --{} {}".format(param_name, text))
         print()
@@ -708,7 +713,7 @@ def pack_params_to_args_namespace(args: dict, cli_parser: argparse.ArgumentParse
 
         # get list of all available params for convert_model()
         all_params = {}
-        for key, value in mo_convert_params.items():
+        for key, value in get_mo_convert_params().items():
             all_params.update(value)
 
         # check that there are no unknown params provided
@@ -760,7 +765,13 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args):
 
         argv = pack_params_to_args_namespace(args, cli_parser)
 
+        argv.feManager = FrontEndManager()
+        frameworks = list(set(['tf', 'caffe', 'mxnet', 'kaldi', 'onnx'] + (get_available_front_ends(argv.feManager)
+                                                                           if argv.feManager else [])))
+        framework = argv.framework if hasattr(argv, 'framework') and argv.framework is not None else framework
         if framework is not None:
+            assert framework in frameworks, "error: argument --framework: invalid choice: '{}'. " \
+                                            "Expected one of {}.".format(framework, frameworks)
             setattr(argv, 'framework', framework)
 
         # send telemetry with params info
@@ -784,7 +795,6 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args):
             else:
                 argv.framework = model_framework
 
-        argv.feManager = FrontEndManager()
         ov_model, legacy_path = driver(argv, {"conversion_parameters": non_default_params})
 
         # add MO meta data to model
diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py
index 08d02514130ba3..e4937f4703d5f1 100644
--- a/tools/mo/openvino/tools/mo/main.py
+++ b/tools/mo/openvino/tools/mo/main.py
@@ -96,4 +96,4 @@ def main(cli_parser: argparse.ArgumentParser, framework=None):
 
 if __name__ == "__main__":
     from openvino.tools.mo.utils.cli_parser import get_all_cli_parser
-    sys.exit(main(get_all_cli_parser(FrontEndManager()), None))
+    sys.exit(main(get_all_cli_parser(), None))
diff --git a/tools/mo/openvino/tools/mo/main_paddle.py b/tools/mo/openvino/tools/mo/main_paddle.py
index 19d4b3a0a96301..0807972d59a133 100644
--- a/tools/mo/openvino/tools/mo/main_paddle.py
+++ b/tools/mo/openvino/tools/mo/main_paddle.py
@@ -10,4 +10,4 @@
 
 if __name__ == "__main__":
     from openvino.tools.mo.main import main
-    sys.exit(main(get_all_cli_parser(FrontEndManager()), 'paddle'))
+    sys.exit(main(get_all_cli_parser(), 'paddle'))
diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py
index a497006e6488a7..c574ad9ad94caa 100644
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -13,6 +13,7 @@
 from operator import xor
 from typing import List, Union
 import numbers
+import inspect
 
 import numpy as np
 from openvino.runtime import Layout, PartialShape, Dimension, Shape, Type
@@ -21,10 +22,10 @@
 from openvino.tools.mo.front.extractor import split_node_in_port
 from openvino.tools.mo.middle.passes.convert_data_type import destination_type_to_np_data_type
 from openvino.tools.mo.middle.passes.convert_data_type import np_data_type_to_destination_type
-from openvino.tools.mo.utils import import_extensions
 from openvino.tools.mo.utils.error import Error
 from openvino.tools.mo.utils.utils import refer_to_faq_msg, get_mo_root_dir
 from openvino.tools.mo.utils.version import get_version
+from openvino.tools.mo.utils.help import get_convert_model_help_specifics, get_to_string_methods_for_params
 
 
 def extension_path_to_str_or_extensions_class(extension):
@@ -46,7 +47,7 @@ def transformations_config_to_str(value):
 
 def extensions_to_str_or_extensions_class(extensions):
     if extensions is None:
-        return [import_extensions.default_path()]
+        return None
     extensions_list = []
     if isinstance(extensions, str):
         extensions_list = extensions.split(',')
@@ -415,300 +416,67 @@ def transform_param_to_str(value):
 
 
 ParamDescription = namedtuple("ParamData",
-                              ["description", "possible_types_command_line", "possible_types_python_api", "to_string"])
-mo_convert_params = {
-    'optional':
-    {
-    'help': ParamDescription(
-        'Print available parameters.', '', '', None),
-    'framework': ParamDescription(
-        'Name of the framework used to train the input model.', '', '', None),
-    },
-    'fw_agnostic':
-    {
-    'input_model': ParamDescription(
-        '{} Tensorflow*: a file with a pre-trained model ' +
-        ' (binary or text .pb file after freezing).\n' +
-        ' Caffe*: a model proto file with model weights', '',
-        'Model object in original framework (PyTorch, Tensorflow) or path to model file. \n' +
-        'Supported object formats of input model:\n PyTorch - torch.nn.Module, torch.jit.ScriptModule, torch.jit.ScriptFunction' +
-        'TF - tf.compat.v1.GraphDef, tf.compat.v1.wrap_function, tf.compat.v1.session\n ' +
-        'TF2 / Keras - tf.keras.Model, tf.keras.layers.Layer, tf.function, tf.Module, tf.train.checkpoint, ' +
-        'tf.python.training.tracking.base.Trackable for case when it is output from tf.saved_model.load().\n' +
-        'File formats examples:\n',
-        path_to_str_or_object),
-    'model_name': ParamDescription(
-        'Model_name parameter passed to the final create_ir transform. ' +
-        'This parameter is used to name ' +
-        'a network in a generated IR and output .xml/.bin files.', '', '', None),
-    'input_shape': ParamDescription(
-        'Input shape(s) that should be fed to an input node(s) of the model. {}'
-        'Shape is defined as a comma-separated list of integer numbers enclosed in '
-        'parentheses or square brackets, for example [1,3,227,227] or (1,227,227,3), where '
-        'the order of dimensions depends on the framework input layout of the model. '
-        'For example, [N,C,H,W] is used for ONNX* models and [N,H,W,C] for TensorFlow* '
-        'models. The shape can contain undefined dimensions (? or -1) and '
-        'should fit the dimensions defined in the input '
-        'operation of the graph. Boundaries of undefined dimension can be specified with '
-        'ellipsis, for example [1,1..10,128,128]. One boundary can be undefined, for '
-        'example [1,..100] or [1,3,1..,1..]. If there are multiple inputs in the model, '
-        '--input_shape should contain definition of shape for each input separated by a '
-        'comma, for example: [1,3,227,227],[2,4] for a model with two inputs with 4D and 2D '
-        'shapes. Alternatively, specify shapes with the --input option.', '',
-        'Input shapes can be defined by passing a list of objects of type '
-        'PartialShape, Shape, [Dimension, ...] or [int, ...] or by a string '
-        'of the following format. ', input_shape_to_str),
-    'scale': ParamDescription(
-        'All input values coming from original network inputs will be ' +
-        'divided by this ' +
-        'value. When a list of inputs is overridden by the --input ' +
-        'parameter, this scale ' +
-        'is not applied for any input that does not match with ' +
-        'the original input of the model. ' +
-        'If both --mean_values and --scale  are specified, ' +
-        'the mean is subtracted first and then scale is applied ' +
-        'regardless of the order of options in command line.', '', '', None),
-    'reverse_input_channels': ParamDescription(
-        'Switch the input channels order from RGB to BGR (or vice versa). Applied to '
-        'original inputs of the model if and only if a number of channels equals 3. '
-        'When --mean_values/--scale_values are also specified, reversing of channels will '
-        'be applied to user\'s input data first, so that numbers in --mean_values '
-        'and --scale_values go in the order of channels used in the original model. '
-        'In other words, if both options are specified, then the data flow in the model '
-        'looks as following: '
-        'Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model.',
-        '', '', None),
-    'log_level': ParamDescription(
-        'Logger level', '', '', None),
-    'input': ParamDescription(
-        '{}Quoted list of comma-separated input nodes names with shapes, data types, '
-        'and values for freezing. The order of inputs in converted model is the same as '
-        'order of specified operation names. The shape and value are specified as comma-separated '
-        'lists. The data type of input node is specified in braces and '
-        'can have one of the values: f64 (float64), f32 (float32), f16 (float16), '
-        'i64 (int64), i32 (int32), u8 (uint8), boolean (bool). Data type is optional. '
-        'If it\'s not specified explicitly then there are two options: '
-        'if input node is a parameter, data type is taken from the original node dtype, '
-        'if input node is not a parameter, data type is set to f32. '
-        'Example, to set `input_1` with shape [1,100], and Parameter node `sequence_len` '
-        'with scalar input with value `150`, and boolean input `is_training` with '
-        '`False` value use the following format: '
-        '"input_1[1,100],sequence_len->150,is_training->False". '
-        'Another example, use the following format to set input port 0 of the node '
-        '`node_name1` with the shape [3,4] as an input node and freeze output port 1 '
-        'of the node `node_name2` with the value [20,15] of the int32 type and shape [2]: '
-        '"0:node_name1[3,4],node_name2:1[2]{{i32}}->[20,15]".', '',
-        'Input can be set by passing a list of InputCutInfo objects or by a list of tuples. '
-        'Each tuple should contain input name and optionally input type or input shape. '
-        'Example: input=("op_name", PartialShape([-1, 3, 100, 100]), Type(np.float32)). '
-        'Alternatively input can be set by a string or list of strings of the following format. ',
-        input_to_str),
-    'output': ParamDescription(
-        'The name of the output operation of the model or list of names. ' +
-        'For TensorFlow*, do not add :0 to this name.'
-        'The order of outputs in converted model is the same as order of '
-        'specified operation names.', '', '', str_list_to_str),
-    'mean_values': ParamDescription(
-        'Mean values to be used for the input image per channel. {}' +
-        'Values to be provided in the (R,G,B) or [R,G,B] format. ' +
-        'Can be defined for desired input of the model, for example: ' +
-        '"--mean_values data[255,255,255],info[255,255,255]". ' +
-        'The exact meaning and order ' +
-        'of channels depend on how the original model was trained.', '',
-        'Mean values can be set by passing a dictionary, '
-        'where key is input name and value is mean value. '
-        'For example mean_values={\'data\':[255,255,255],\'info\':[255,255,255]}. '
-        'Or mean values can be set by a string of the following format. ',
-        mean_scale_value_to_str),
-    'scale_values': ParamDescription(
-        'Scale values to be used for the input image per channel. {}' +
-        'Values are provided in the (R,G,B) or [R,G,B] format. ' +
-        'Can be defined for desired input of the model, for example: ' +
-        '"--scale_values data[255,255,255],info[255,255,255]". ' +
-        'The exact meaning and order ' +
-        'of channels depend on how the original model was trained. ' +
-        'If both --mean_values and --scale_values are specified, ' +
-        'the mean is subtracted first and then scale is applied ' +
-        'regardless of the order of options in command line.', '',
-        'Scale values can be set by passing a dictionary, '
-        'where key is input name and value is scale value. '
-        'For example scale_values={\'data\':[255,255,255],\'info\':[255,255,255]}. '
-        'Or scale values can be set by a string of the following format. ',
-        mean_scale_value_to_str),
-    'source_layout': ParamDescription(
-        'Layout of the input or output of the model in the framework. {}Layout can'
-        ' be specified in the short form, e.g. nhwc, or in complex form, e.g. "[n,h,w,c]".'
-        ' Example for many names: '
-        '"in_name1([n,h,w,c]),in_name2(nc),out_name1(n),out_name2(nc)". Layout can be '
-        'partially defined, "?" can be used to specify undefined layout for one dimension, '
-        '"..." can be used to specify undefined layout for multiple dimensions, for example '
-        '"?c??", "nc...", "n...c", etc.', '',
-        'Layout can be set by passing a dictionary, where key is input name and value is '
-        'LayoutMap object. Or layout can be set by string of the following format. ',
-        source_target_layout_to_str),
-    'target_layout': ParamDescription(
-        'Same as --source_layout, but specifies target layout that will be in the model '
-        'after processing by ModelOptimizer.', '', '', source_target_layout_to_str),
-    'layout': ParamDescription(
-        'Combination of --source_layout and --target_layout. Can\'t be used with either of '
-        'them. If model has one input it is sufficient to specify layout of this input, for'
-        ' example --layout nhwc. To specify layouts of many tensors, names must be provided,'
-        ' for example: --layout "name1(nchw),name2(nc)". It is possible to instruct '
-        'ModelOptimizer to change layout, for example: '
-        '--layout "name1(nhwc->nchw),name2(cn->nc)". Also "*" in long layout form can be'
-        ' used to fuse dimensions, for example "[n,c,...]->[n*c,...]".', '', '', layout_param_to_str),
-    'compress_to_fp16': ParamDescription(
-        'If the original model has FP32 weights or biases, they are compressed to FP16. '
-        'All intermediate data is kept in original precision. Option can be specified alone as "--compress_to_fp16", '
-        'or explicit True/False values can be set, for example: "--compress_to_fp16=False", or "--compress_to_fp16=True"',
-        '', '', None),
-    'transform': ParamDescription(
-        'Apply additional transformations. {}' +
-        '"--transform transformation_name1[args],transformation_name2..." ' +
-        'where [args] is key=value pairs separated by semicolon. ' +
-        'Examples:' +
-        '          "--transform LowLatency2" or \n' +
-        '          "--transform Pruning" or \n' +
-        '          "--transform LowLatency2[use_const_initializer=False]" or \n' +
-        '          "--transform \"MakeStateful[param_res_names=\n'
-        '{{\'input_name_1\':\'output_name_1\',\'input_name_2\':\'output_name_2\'}}]\"" ' +
-        'Available transformations: "LowLatency2", "MakeStateful", "Pruning"', 'Usage: ',
-        '\'transform\' can be set by a list of tuples, where the first element is '
-        'transform name and the second element is transform parameters. '
-        'For example: [(\'LowLatency2\', {{\'use_const_initializer\': False}}), ...]',
-        transform_param_to_str),
-    'extensions': ParamDescription(
-        "{} For the legacy MO path (if `--use_legacy_frontend` is used), "
-        "a directory or a comma-separated list of directories with extensions are supported. "
-        "To disable all extensions including those that are placed at the default location, "
-        "pass an empty string.",
-        "Paths or a comma-separated list of paths to libraries (.so or .dll) with extensions.",
-        "Paths to libraries (.so or .dll) with extensions, comma-separated list of paths, "
-        "objects derived from BaseExtension class or lists of objects.",
-        extensions_to_str_or_extensions_class),
-    'batch': ParamDescription(
-        'Input batch size', '', '', batch_to_int),
-    'silent': ParamDescription(
-        'Prevent any output messages except those that correspond to log level equals '
-        'ERROR, that can be set with the following option: --log_level. '
-        'By default, log level is already ERROR. ', '', '', None),
-    'version': ParamDescription(
-        "Version of Model Optimizer", '', '', None
-    ),
-    'static_shape': ParamDescription(
-        'Enables IR generation for fixed input shape (folding `ShapeOf` operations and '
-        'shape-calculating sub-graphs to `Constant`). Changing model input shape using '
-        'the OpenVINO Runtime API in runtime may fail for such an IR.', '', '', None),
-    'progress': ParamDescription(
-        'Enable model conversion progress display.', '', '', None),
-    'stream_output': ParamDescription(
-        'Switch model conversion progress display to a multiline mode.', '', '', None),
-    'transformations_config': ParamDescription(
-        'Use the configuration file with transformations '
-        'description{}. Transformations file can be specified as relative path '
-        'from the current directory, as absolute path or as a'
-        'relative path from the mo root directory.', '',
-        ' or pass object derived from BaseExtension class.',
-        transformations_config_to_str),
-    'use_new_frontend': ParamDescription(
-        'Force the usage of new Frontend of Model Optimizer for model conversion into IR. '
-        'The new Frontend is C++ based and is available for ONNX* and PaddlePaddle* models. '
-        'Model optimizer uses new Frontend for ONNX* and PaddlePaddle* by default that means '
-        '`--use_new_frontend` and `--use_legacy_frontend` options are not specified.', '', '', None),
-    'use_legacy_frontend': ParamDescription(
-        'Force the usage of legacy Frontend of Model Optimizer for model conversion into IR. '
-        'The legacy Frontend is Python based and is available for TensorFlow*, ONNX*, MXNet*, '
-        'Caffe*, and Kaldi* models.', '', '', None),
-    },
-    "caffe":
-    {
-    'input_proto': ParamDescription(
-        'Deploy-ready prototxt file that contains a topology structure ' +
-        'and layer attributes', '', '', path_to_str),
-    'caffe_parser_path': ParamDescription(
-        'Path to Python Caffe* parser generated from caffe.proto', '', '',
-        path_to_str),
-    'k': ParamDescription(
-        'Path to CustomLayersMapping.xml to register custom layers', '', '', path_to_str),
-    'disable_omitting_optional': ParamDescription(
-        'Disable omitting optional attributes to be used for custom layers. ' +
-        'Use this option if you want to transfer all attributes of a custom layer to IR. ' +
-        'Default behavior is to transfer the attributes with default values '
-        'and the attributes defined by the user to IR.',
-        '', '', None),
-    'enable_flattening_nested_params': ParamDescription(
-        'Enable flattening optional params to be used for custom layers. ' +
-        'Use this option if you want to transfer attributes of a custom layer to IR with flattened nested parameters. ' +
-        'Default behavior is to transfer the attributes without flattening nested parameters.', '', '', None),
-    },
-    "tf":
-    {
-    'input_model_is_text': ParamDescription(
-        'TensorFlow*: treat the input model file as a text protobuf format. If not specified, ' +
-        'the Model Optimizer treats it as a binary file by default.', '', '', None),
-    'input_checkpoint': ParamDescription(
-        'TensorFlow*: variables file to load.', '', '', path_to_str),
-    'input_meta_graph': ParamDescription(
-        'Tensorflow*: a file with a meta-graph of the model before freezing', '', '',
-        path_to_str),
-    'saved_model_dir': ParamDescription(
-        'TensorFlow*: directory with a model in SavedModel format '
-        'of TensorFlow 1.x or 2.x version.', '', '', path_to_str),
-    'saved_model_tags': ParamDescription(
-        "Group of tag(s) of the MetaGraphDef to load, in string format, separated by ','. "
-        "For tag-set contains multiple tags, all tags must be passed in.", '', '', str_list_to_str),
-    'tensorflow_custom_operations_config_update': ParamDescription(
-        'TensorFlow*: update the configuration file with node name patterns with input/output '
-        'nodes information.', '', '', path_to_str),
-    'tensorflow_object_detection_api_pipeline_config': ParamDescription(
-        'TensorFlow*: path to the pipeline configuration file used to generate model created '
-        'with help of Object Detection API.', '', '', path_to_str),
-    'tensorboard_logdir': ParamDescription(
-        'TensorFlow*: dump the input graph to a given directory that should be used with TensorBoard.', '', '',
-        path_to_str),
-    'tensorflow_custom_layer_libraries': ParamDescription(
-        'TensorFlow*: comma separated list of shared libraries with TensorFlow* custom '
-        'operations implementation.', '', '', path_to_str),
-    },
-    "mxnet":
-    {
-    'input_symbol': ParamDescription(
-        'Symbol file (for example, model-symbol.json) that contains a topology structure ' +
-        'and layer attributes', '', '', path_to_str),
-    'nd_prefix_name': ParamDescription(
-        "Prefix name for args.nd and argx.nd files.", '', '', None),
-    'pretrained_model_name': ParamDescription(
-        "Name of a pretrained MXNet model without extension and epoch number. "
-        "This model will be merged with args.nd and argx.nd files",
-        '', '', None),
-    'save_params_from_nd': ParamDescription(
-        "Enable saving built parameters file from .nd files", '', '', None),
-    'legacy_mxnet_model': ParamDescription(
-        "Enable MXNet loader to make a model compatible with the latest MXNet version. "
-        "Use only if your model was trained with MXNet version lower than 1.0.0",
-        '', '', None),
-    'enable_ssd_gluoncv': ParamDescription(
-        "Enable pattern matchers replacers for converting gluoncv ssd topologies.",
-        '', '', None),
-    },
-    "kaldi":
-    {
-    'counts': ParamDescription(
-        "Path to the counts file", '', '', path_to_str),
-    'remove_output_softmax': ParamDescription(
-        "Removes the SoftMax layer that is the output layer", '', '', None),
-    'remove_memory': ParamDescription(
-        "Removes the Memory layer and use additional inputs outputs instead", '', '',
-        None),
-    },
-    "pytorch":
-    {
-    'example_input': ParamDescription('Sample of model input in original framework. '
-                                       'For PyTorch it can be torch.Tensor.', '', '', None),
-    'onnx_opset_version': ParamDescription('Version of ONNX opset that is used for converting from PyTorch to ONNX.',
-                                           '', '', None),
-    }
-}
+                              ["description", "cli_tool_description", "to_string"])
+
+
+def get_mo_convert_params():
+    mo_convert_docs = openvino.tools.mo.convert_model.__doc__
+    mo_convert_params = {}
+    group = "Optional parameters:"
+    mo_convert_params[group] = {}
+
+    mo_convert_docs = mo_convert_docs[:mo_convert_docs.find('Returns:')]
+
+    while len(mo_convert_docs) > 0:
+        param_idx1 = mo_convert_docs.find(":param")
+        if param_idx1 == -1:
+            break
+        param_idx2 = mo_convert_docs.find(":", param_idx1+1)
+        param_name = mo_convert_docs[param_idx1+len(':param '):param_idx2]
+
+        param_description_idx = mo_convert_docs.find(":param", param_idx2+1)
+        param_description = mo_convert_docs[param_idx2+1: param_description_idx]
+
+        group_name_idx = param_description.rfind('\n\n')
+        group_name = ''
+        if group_name_idx != -1:
+            group_name = param_description[group_name_idx:].strip()
+
+        param_description = param_description[:group_name_idx]
+        param_description = param_description.strip()
+
+        mo_convert_params[group][param_name] = ParamDescription(param_description, "", None)
+
+        mo_convert_docs = mo_convert_docs[param_description_idx:]
+
+        if group_name != '':
+            mo_convert_params[group_name] = {}
+            group = group_name
+
+    # TODO: remove this when internal converting of params to string is removed
+    params_converted_to_string = get_to_string_methods_for_params()
+
+    params_with_paths = get_params_with_paths_list()
+    cli_tool_specific_descriptions = get_convert_model_help_specifics()
+
+    for group_name, param_group in mo_convert_params.items():
+        for param_name, d in param_group.items():
+            to_str_method = None
+            if param_name in params_converted_to_string:
+                to_str_method = params_converted_to_string[param_name]
+            elif param_name in params_with_paths:
+                to_str_method = path_to_str
+
+            cli_tool_description = None
+            if param_name in cli_tool_specific_descriptions:
+                cli_tool_description = cli_tool_specific_descriptions[param_name]
+
+            desc = ParamDescription(d.description,
+                                    cli_tool_description,
+                                    to_str_method)
+            mo_convert_params[group_name][param_name] = desc
+
+    return mo_convert_params
 
 
 class DeprecatedStoreTrue(argparse.Action):
@@ -945,16 +713,70 @@ def writable_dir(path: str):
             raise Error('The directory "{}" is not writable'.format(cur_path))
 
 
+def add_args_by_description(args_group, params_description):
+    signature = inspect.signature(openvino.tools.mo.convert_model)
+    filepath_args = get_params_with_paths_list()
+    cli_tool_specific_descriptions = get_convert_model_help_specifics()
+    for param_name, param_description in params_description.items():
+        if param_name == 'help':
+            continue
+        cli_param_name = "--"+param_name
+        if cli_param_name not in args_group._option_string_actions:
+            # Get parameter specifics
+            param_specifics = cli_tool_specific_descriptions[param_name] if param_name in \
+                                                                            cli_tool_specific_descriptions else {}
+            help_text = param_specifics['description'] if 'description' in param_specifics \
+                else param_description.description
+            action = param_specifics['action'] if 'action' in param_specifics else None
+            param_type = param_specifics['type'] if 'type' in param_specifics else None
+            param_alias = param_specifics['aliases'] if 'aliases' in param_specifics else {}
+            param_version = param_specifics['version'] if 'version' in param_specifics else None
+            param_choices = param_specifics['choices'] if 'choices' in param_specifics else None
+
+            # Bool params common setting
+            if signature.parameters[param_name].annotation == bool and param_name != 'version':
+                args_group.add_argument(
+                    cli_param_name, *param_alias,
+                    type=check_bool if param_type is None else param_type,
+                    nargs="?",
+                    const=True,
+                    help=help_text,
+                    default=signature.parameters[param_name].default)
+            # File paths common setting
+            elif param_name in filepath_args:
+                action = action if action is not None else CanonicalizePathCheckExistenceAction
+                args_group.add_argument(
+                    cli_param_name, *param_alias,
+                    type=str if param_type is None else param_type,
+                    action=action,
+                    help=help_text,
+                    default=signature.parameters[param_name].default)
+            # Other params
+            else:
+                additional_params = {}
+                if param_version is not None:
+                    additional_params['version'] = param_version
+                if param_type is not None:
+                    additional_params['type'] = param_type
+                if param_choices is not None:
+                    additional_params['choices'] = param_choices
+                args_group.add_argument(
+                    cli_param_name, *param_alias,
+                    help=help_text,
+                    default=signature.parameters[param_name].default,
+                    action=action,
+                    **additional_params
+                )
+
+
 def get_common_cli_parser(parser: argparse.ArgumentParser = None):
     if not parser:
         parser = argparse.ArgumentParser()
     common_group = parser.add_argument_group('Framework-agnostic parameters')
-    mo_convert_params_common = mo_convert_params['fw_agnostic']
-    # Common parameters
-    common_group.add_argument('--input_model', '-w', '-m',
-                              help=mo_convert_params_common['input_model'].description,
-                              action=CanonicalizePathCheckExistenceAction,
-                              type=readable_file_or_dir)
+    mo_convert_params = get_mo_convert_params()
+    mo_convert_params_common = mo_convert_params['Framework-agnostic parameters:']
+
+    # Command line tool specific params
     common_group.add_argument('--model_name', '-n',
                               help='Model_name parameter passed to the final create_ir transform. ' +
                                    'This parameter is used to name ' +
@@ -965,90 +787,8 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               default=get_absolute_path('.'),
                               action=CanonicalizePathAction,
                               type=writable_dir)
-    common_group.add_argument('--input_shape',
-                              help=mo_convert_params_common['input_shape'].description.format(
-                                  mo_convert_params_common['input_shape'].possible_types_command_line))
-    common_group.add_argument('--scale', '-s',
-                              type=float,
-                              help='All input values coming from original network inputs will be ' +
-                                   'divided by this ' +
-                                   'value. When a list of inputs is overridden by the --input ' +
-                                   'parameter, this scale ' +
-                                   'is not applied for any input that does not match with ' +
-                                   'the original input of the model.' +
-                                   'If both --mean_values and --scale  are specified, ' +
-                                   'the mean is subtracted first and then scale is applied ' +
-                                   'regardless of the order of options in command line.')
-    common_group.add_argument('--reverse_input_channels',
-                              help='Switch the input channels order from RGB to BGR (or vice versa). Applied to '
-                                   'original inputs of the model if and only if a number of channels equals 3. '
-                                   'When --mean_values/--scale_values are also specified, reversing of channels will '
-                                   'be applied to user\'s input data first, so that numbers in --mean_values '
-                                   'and --scale_values go in the order of channels used in the original model. '
-                                   'In other words, if both options are specified, then the data flow in the model '
-                                   'looks as following: Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model.',
-                              action='store_true')
-    common_group.add_argument('--log_level',
-                              help='Logger level',
-                              choices=['CRITICAL', 'ERROR', 'WARN', 'WARNING', 'INFO',
-                                       'DEBUG', 'NOTSET'],
-                              default='ERROR')
-    common_group.add_argument('--input',
-                              help=mo_convert_params_common['input'].description.format(
-                                  mo_convert_params_common['input'].possible_types_command_line))
-    common_group.add_argument('--output',
-                              help=mo_convert_params_common['output'].description.format(
-                                  mo_convert_params_common['output'].possible_types_command_line))
-    common_group.add_argument('--mean_values', '-ms',
-                              help=mo_convert_params_common['mean_values'].description.format(
-                                  mo_convert_params_common['mean_values'].possible_types_command_line),
-                              default=())
-    common_group.add_argument('--scale_values',
-                              help=mo_convert_params_common['scale_values'].description.format(
-                                  mo_convert_params_common['scale_values'].possible_types_command_line),
-                              default=())
-    common_group.add_argument('--source_layout',
-                              help=mo_convert_params_common['source_layout'].description.format(
-                                  mo_convert_params_common['source_layout'].possible_types_command_line),
-                              default=())
-    common_group.add_argument('--target_layout',
-                              help=mo_convert_params_common['target_layout'].description.format(
-                                  mo_convert_params_common['target_layout'].possible_types_command_line),
-                              default=())
-    common_group.add_argument('--layout',
-                              help=mo_convert_params_common['layout'].description.format(
-                                  mo_convert_params_common['layout'].possible_types_command_line),
-                              default=())
-    common_group.add_argument('--compress_to_fp16',
-                              help=mo_convert_params_common['compress_to_fp16'].description,
-                              type=check_bool,
-                              nargs="?",
-                              const=True,
-                              default=True)
-    common_group.add_argument('--transform',
-                              help=mo_convert_params_common['transform'].description.format(
-                                  mo_convert_params_common['transform'].possible_types_command_line),
-                              default="")
-    # we use CanonicalizeDirCheckExistenceAction instead of readable_dirs to handle empty strings
-    common_group.add_argument("--extensions",
-                              help=mo_convert_params_common['extensions'].description.format(
-                                  mo_convert_params_common['extensions'].possible_types_command_line),
-                              default=[import_extensions.default_path()],
-                              action=CanonicalizeExtensionsPathCheckExistenceAction,
-                              type=readable_dirs_or_files_or_empty)
-    common_group.add_argument("--batch", "-b",
-                              type=check_positive,
-                              default=None,
-                              help=mo_convert_params_common['batch'].description)
-    common_group.add_argument("--version",
-                              action='version',
-                              version='Version of Model Optimizer is: {}'.format(get_version()),
-                              help=mo_convert_params_common['version'].description)
-
-    common_group.add_argument('--silent',
-                              help=mo_convert_params_common['silent'].description,
-                              type=check_bool,
-                              default=True)
+
+    # Deprecated params
     common_group.add_argument('--freeze_placeholder_with_value',
                               help='Replaces input layer with constant node with '
                                    'provided value, for example: "node_name->True". '
@@ -1056,24 +796,22 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                                    'Use --input option to specify a value for freezing.',
                               default=None)
     common_group.add_argument('--static_shape',
-                              help=mo_convert_params_common['static_shape'].description,
-                              action='store_true', default=False)
-    common_group.add_argument('--progress',
-                              help=mo_convert_params_common['progress'].description,
+                              help='Enables IR generation for fixed input shape (folding `ShapeOf` operations and '
+                                   'shape-calculating sub-graphs to `Constant`). Changing model input shape using '
+                                   'the OpenVINO Runtime API in runtime may fail for such an IR.',
                               action='store_true', default=False)
-    common_group.add_argument('--stream_output',
-                              help=mo_convert_params_common['stream_output'].description,
-                              action='store_true', default=False)
-    common_group.add_argument('--transformations_config',
-                              help=mo_convert_params_common['transformations_config'].description.format(
-                                  mo_convert_params_common['transformations_config'].possible_types_command_line),
-                              action=CanonicalizeTransformationPathCheckExistenceAction)
     common_group.add_argument("--use_new_frontend",
-                              help=mo_convert_params_common['use_new_frontend'].description,
+                              help='Force the usage of new Frontend of Model Optimizer for model conversion into IR. '
+                                   'The new Frontend is C++ based and is available for ONNX* and PaddlePaddle* models. '
+                                   'Model optimizer uses new Frontend for ONNX* and PaddlePaddle* by default that means '
+                                   '`--use_new_frontend` and `--use_legacy_frontend` options are not specified.',
                               action='store_true', default=False)
     common_group.add_argument("--use_legacy_frontend",
-                              help=mo_convert_params_common['use_legacy_frontend'].description,
+                              help='Force the usage of legacy Frontend of Model Optimizer for model conversion into IR. '
+                                   'The legacy Frontend is Python based and is available for TensorFlow*, ONNX*, MXNet*, '
+                                   'Caffe*, and Kaldi* models.',
                               action='store_true', default=False)
+    add_args_by_description(common_group, mo_convert_params_common)
     return parser
 
 
@@ -1181,32 +919,8 @@ def get_caffe_cli_parser(parser: argparse.ArgumentParser = None):
         get_common_cli_parser(parser=parser)
 
     caffe_group = parser.add_argument_group('Caffe*-specific parameters')
-    mo_convert_params_caffe = mo_convert_params['caffe']
-
-    caffe_group.add_argument('--input_proto', '-d',
-                             help=mo_convert_params_caffe['input_proto'].description,
-                             type=str,
-                             action=CanonicalizePathCheckExistenceAction)
-    caffe_group.add_argument('--caffe_parser_path',
-                             help=mo_convert_params_caffe['caffe_parser_path'].description,
-                             type=str,
-                             default=os.path.join(os.path.dirname(__file__), os.pardir, 'front', 'caffe', 'proto'),
-                             action=CanonicalizePathCheckExistenceAction)
-    caffe_group.add_argument('-k',
-                             help=mo_convert_params_caffe['k'].description,
-                             type=str,
-                             default=os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'extensions',
-                                                  'front', 'caffe',
-                                                  'CustomLayersMapping.xml'),
-                             action=CanonicalizePathCheckExistenceAction)
-    caffe_group.add_argument('--disable_omitting_optional',
-                             help=mo_convert_params_caffe['disable_omitting_optional'].description,
-                             action='store_true',
-                             default=False)
-    caffe_group.add_argument('--enable_flattening_nested_params',
-                             help=mo_convert_params_caffe['enable_flattening_nested_params'].description,
-                             action='store_true',
-                             default=False)
+    mo_convert_params_caffe = get_mo_convert_params()['Caffe*-specific parameters:']
+    add_args_by_description(caffe_group, mo_convert_params_caffe)
     return parser
 
 
@@ -1221,39 +935,10 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
     if not parser:
         parser = argparse.ArgumentParser(usage='%(prog)s [options]')
         get_common_cli_parser(parser=parser)
-    mo_convert_params_tf = mo_convert_params['tf']
+    mo_convert_params_tf = get_mo_convert_params()['TensorFlow*-specific parameters:']
 
     tf_group = parser.add_argument_group('TensorFlow*-specific parameters')
-    tf_group.add_argument('--input_model_is_text',
-                          help=mo_convert_params_tf['input_model_is_text'].description,
-                          action='store_true')
-    tf_group.add_argument('--input_checkpoint', type=str, default=None,
-                          help=mo_convert_params_tf['input_checkpoint'].description,
-                          action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--input_meta_graph',
-                          help=mo_convert_params_tf['input_meta_graph'].description,
-                          action=CanonicalizePathCheckExistenceAction,
-                          type=readable_file)
-    tf_group.add_argument('--saved_model_dir', default=None,
-                          help=mo_convert_params_tf['saved_model_dir'].description,
-                          action=CanonicalizePathCheckExistenceAction,
-                          type=readable_dirs)
-    tf_group.add_argument('--saved_model_tags', type=str, default=None,
-                          help=mo_convert_params_tf['saved_model_tags'].description)
-    tf_group.add_argument('--tensorflow_custom_operations_config_update',
-                          help=mo_convert_params_tf['tensorflow_custom_operations_config_update'].description,
-                          action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--tensorflow_object_detection_api_pipeline_config',
-                          help=mo_convert_params_tf['tensorflow_object_detection_api_pipeline_config'].description,
-                          action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--tensorboard_logdir',
-                          help=mo_convert_params_tf['tensorboard_logdir'].description,
-                          default=None,
-                          action=CanonicalizePathCheckExistenceAction)
-    tf_group.add_argument('--tensorflow_custom_layer_libraries',
-                          help=mo_convert_params_tf['tensorflow_custom_layer_libraries'].description,
-                          default=None,
-                          action=CanonicalizePathCheckExistenceAction)
+    add_args_by_description(tf_group, mo_convert_params_tf)
     return parser
 
 
@@ -1269,29 +954,9 @@ def get_mxnet_cli_parser(parser: argparse.ArgumentParser = None):
         parser = argparse.ArgumentParser(usage='%(prog)s [options]')
         get_common_cli_parser(parser=parser)
 
-    mx_group = parser.add_argument_group('Mxnet-specific parameters')
-    mo_convert_params_mxnet = mo_convert_params['mxnet']
-
-    mx_group.add_argument('--input_symbol',
-                          help=mo_convert_params_mxnet['input_symbol'].description,
-                          type=str,
-                          action=CanonicalizePathCheckExistenceAction)
-    mx_group.add_argument("--nd_prefix_name",
-                          help=mo_convert_params_mxnet['nd_prefix_name'].description,
-                          default=None)
-    mx_group.add_argument("--pretrained_model_name",
-                          help=mo_convert_params_mxnet['pretrained_model_name'].description,
-                          default=None)
-    mx_group.add_argument("--save_params_from_nd",
-                          action='store_true',
-                          help=mo_convert_params_mxnet['save_params_from_nd'].description)
-    mx_group.add_argument("--legacy_mxnet_model",
-                          action='store_true',
-                          help=mo_convert_params_mxnet['legacy_mxnet_model'].description)
-    mx_group.add_argument("--enable_ssd_gluoncv",
-                          action='store_true',
-                          help=mo_convert_params_mxnet['enable_ssd_gluoncv'].description,
-                          default=False)
+    mx_group = parser.add_argument_group('MXNet-specific parameters')
+    mo_convert_params_mxnet = get_mo_convert_params()['MXNet-specific parameters:']
+    add_args_by_description(mx_group, mo_convert_params_mxnet)
 
     return parser
 
@@ -1309,22 +974,8 @@ def get_kaldi_cli_parser(parser: argparse.ArgumentParser = None):
         get_common_cli_parser(parser=parser)
 
     kaldi_group = parser.add_argument_group('Kaldi-specific parameters')
-    mo_convert_params_kaldi = mo_convert_params['kaldi']
-
-    kaldi_group.add_argument("--counts",
-                             help=mo_convert_params_kaldi['counts'].description,
-                             default=None,
-                             action=CanonicalizePathCheckExistenceIfNeededAction)
-
-    kaldi_group.add_argument("--remove_output_softmax",
-                             help=mo_convert_params_kaldi['remove_output_softmax'].description,
-                             action='store_true',
-                             default=False)
-
-    kaldi_group.add_argument("--remove_memory",
-                             help=mo_convert_params_kaldi['remove_memory'].description,
-                             action='store_true',
-                             default=False)
+    mo_convert_params_kaldi = get_mo_convert_params()['Kaldi-specific parameters:']
+    add_args_by_description(kaldi_group, mo_convert_params_kaldi)
     return parser
 
 
@@ -1343,7 +994,7 @@ def get_onnx_cli_parser(parser: argparse.ArgumentParser = None):
     return parser
 
 
-def get_all_cli_parser(frontEndManager=None):
+def get_all_cli_parser():
     """
     Specifies cli arguments for Model Optimizer
 
@@ -1352,17 +1003,10 @@ def get_all_cli_parser(frontEndManager=None):
         ArgumentParser instance
     """
     parser = argparse.ArgumentParser(usage='%(prog)s [options]')
-
-    frameworks = list(set(['tf', 'caffe', 'mxnet', 'kaldi', 'onnx'] +
-                          (get_available_front_ends(frontEndManager) if frontEndManager else [])))
-
-    parser.add_argument('--framework',
-                        help='Name of the framework used to train the input model.',
-                        type=str,
-                        choices=frameworks)
+    mo_convert_params_optional = get_mo_convert_params()['Optional parameters:']
+    add_args_by_description(parser, mo_convert_params_optional)
 
     get_common_cli_parser(parser=parser)
-
     get_tf_cli_parser(parser=parser)
     get_caffe_cli_parser(parser=parser)
     get_mxnet_cli_parser(parser=parser)
@@ -2264,7 +1908,6 @@ def depersonalize(value: str, key: str):
     dir_keys = [
         'output_dir', 'extensions', 'saved_model_dir', 'tensorboard_logdir', 'caffe_parser_path'
     ]
-
     if isinstance(value, list):
         updated_value = []
         for elem in value:
diff --git a/tools/mo/openvino/tools/mo/utils/help.py b/tools/mo/openvino/tools/mo/utils/help.py
new file mode 100644
index 00000000000000..f0eca0f467bb67
--- /dev/null
+++ b/tools/mo/openvino/tools/mo/utils/help.py
@@ -0,0 +1,164 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+def get_convert_model_help_specifics():
+    from openvino.tools.mo.utils.cli_parser import CanonicalizeTransformationPathCheckExistenceAction, \
+        CanonicalizePathCheckExistenceAction, CanonicalizeExtensionsPathCheckExistenceAction, \
+        CanonicalizePathCheckExistenceIfNeededAction, readable_file_or_dir, readable_dirs_or_files_or_empty, \
+        check_positive
+    from openvino.tools.mo.utils.version import get_version
+    return {
+        'input_model':
+            {'description':
+                 'Tensorflow*: a file with a pre-trained model '
+                 '(binary or text .pb file after freezing). '
+                 'Caffe*: a model proto file with model weights.', 'action': CanonicalizePathCheckExistenceAction,
+             'type': readable_file_or_dir,
+             'aliases': {'-w', '-m'}},
+        'input_shape':
+            {'description':
+                 'Input shape(s) that should be fed to an input node(s) '
+                 'of the model. Shape is defined as a comma-separated '
+                 'list of integer numbers enclosed in parentheses or '
+                 'square brackets, for example [1,3,227,227] or '
+                 '(1,227,227,3), where the order of dimensions depends '
+                 'on the framework input layout of the model. For '
+                 'example, [N,C,H,W] is used for ONNX* models and '
+                 '[N,H,W,C] for TensorFlow* models. The shape can '
+                 'contain undefined dimensions (? or -1) and should fit '
+                 'the dimensions defined in the input operation of the '
+                 'graph. Boundaries of undefined dimension can be '
+                 'specified with ellipsis, for example '
+                 '[1,1..10,128,128]. One boundary can be undefined, for '
+                 'example [1,..100] or [1,3,1..,1..]. If there are '
+                 'multiple inputs in the model, --input_shape should '
+                 'contain definition of shape for each input separated '
+                 'by a comma, for example: [1,3,227,227],[2,4] for a '
+                 'model with two inputs with 4D and 2D shapes. '
+                 'Alternatively, specify shapes with the --input option.'},
+        'input':
+            {'description':
+                 'Quoted list of comma-separated input nodes names with '
+                 'shapes, data types, and values for freezing. The order '
+                 'of inputs in converted model is the same as order of '
+                 'specified operation names. The shape and value are '
+                 'specified as comma-separated lists. The data type of '
+                 'input node is specified in braces and can have one of '
+                 'the values: f64 (float64), f32 (float32), f16 '
+                 '(float16), i64 (int64), i32 (int32), u8 (uint8), '
+                 'boolean (bool). Data type is optional. If it\'s not '
+                 'specified explicitly then there are two options: if '
+                 'input node is a parameter, data type is taken from the '
+                 'original node dtype, if input node is not a parameter, '
+                 'data type is set to f32. Example, to set `input_1` '
+                 'with shape [1,100], and Parameter node `sequence_len` '
+                 'with scalar input with value `150`, and boolean input '
+                 '`is_training` with `False` value use the following '
+                 'format: \n '
+                 '\"input_1[1,100],sequence_len->150,is_training->False\". '
+                 'Another example, use the following format to set input '
+                 'port 0 of the node `node_name1` with the shape [3,4] '
+                 'as an input node and freeze output port 1 of the node '
+                 '\"node_name2\" with the value [20,15] of the int32 type '
+                 'and shape [2]: \n '
+                 '\"0:node_name1[3,4],node_name2:1[2]{i32}->[20,15]\".'},
+        'mean_values':
+            {'description':
+                 'Mean values to be used for the input image per '
+                 'channel. Values to be provided in the (R,G,B) or '
+                 '[R,G,B] format. Can be defined for desired input of '
+                 'the model, for example: "--mean_values '
+                 'data[255,255,255],info[255,255,255]". The exact '
+                 'meaning and order of channels depend on how the '
+                 'original model was trained.'},
+        'scale_values':
+            {'description':
+                 'Scale values to be used for the input image per '
+                 'channel. Values are provided in the (R,G,B) or [R,G,B] '
+                 'format. Can be defined for desired input of the model, '
+                 'for example: "--scale_values '
+                 'data[255,255,255],info[255,255,255]". The exact '
+                 'meaning and order of channels depend on how the '
+                 'original model was trained. If both --mean_values and '
+                 '--scale_values are specified, the mean is subtracted '
+                 'first and then scale is applied regardless of the '
+                 'order of options in command line.'},
+        'source_layout':
+            {'description':
+                 'Layout of the input or output of the model in the '
+                 'framework. Layout can be specified in the short form, '
+                 'e.g. nhwc, or in complex form, e.g. \"[n,h,w,c]\". '
+                 'Example for many names: \"in_name1([n,h,w,c]),in_name2('
+                 'nc),out_name1(n),out_name2(nc)\". Layout can be '
+                 'partially defined, \"?\" can be used to specify '
+                 'undefined layout for one dimension, \"...\" can be used '
+                 'to specify undefined layout for multiple dimensions, '
+                 'for example \"?c??\", \"nc...\", \"n...c\", etc.'},
+        'transform':
+            {'description':
+                 'Apply additional transformations. Usage: \"--transform '
+                 'transformation_name1[args],transformation_name2...\" '
+                 'where [args] is key=value pairs separated by '
+                 'semicolon. Examples: \"--transform LowLatency2\" or \"--'
+                 'transform Pruning" or "--transform '
+                 'LowLatency2[use_const_initializer=False]" or "--'
+                 'transform "MakeStateful[param_res_names= {\'input_name_1\':'
+                 '\'output_name_1\',\'input_name_2\':\'output_name_2\'}]\" \n'
+                 'Available transformations: "LowLatency2", "MakeStateful", "Pruning"'},
+        'extensions':
+            {'description':
+                 'Paths or a comma-separated list of paths to libraries '
+                 '(.so or .dll) with extensions. For the legacy MO path '
+                 '(if `--use_legacy_frontend` is used), a directory or a '
+                 'comma-separated list of directories with extensions '
+                 'are supported. To disable all extensions including '
+                 'those that are placed at the default location, pass an empty string.',
+             'action': CanonicalizeExtensionsPathCheckExistenceAction,
+             'type': readable_dirs_or_files_or_empty},
+        'transformations_config':
+            {'description':
+                 'Use the configuration file with transformations '
+                 'description. Transformations file can be specified as '
+                 'relative path from the current directory, as absolute '
+                 'path or as arelative path from the mo root directory.',
+             'action': CanonicalizeTransformationPathCheckExistenceAction},
+        'counts':
+            {'action': CanonicalizePathCheckExistenceIfNeededAction},
+        'version':
+            {'action': 'version',
+             'version': 'Version of Model Optimizer is: {}'.format(get_version())},
+        'scale':
+            {'type': float,
+             'aliases': {'-s'}},
+        'batch':
+            {'type': check_positive,
+             'aliases': {'-b'}},
+        'input_proto':
+            {'aliases': {'-d'}},
+        'log_level':
+            {'choices': ['CRITICAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET']}
+    }
+
+
+# TODO: remove this when internal converting of params to string is removed
+def get_to_string_methods_for_params():
+    from openvino.tools.mo.utils.cli_parser import path_to_str_or_object, input_shape_to_str, str_list_to_str, \
+        mean_scale_value_to_str, source_target_layout_to_str, layout_param_to_str, transform_param_to_str, \
+        extensions_to_str_or_extensions_class, batch_to_int, transformations_config_to_str, input_to_str
+    return {
+        'input_model': path_to_str_or_object,
+        'input_shape': input_shape_to_str,
+        'input': input_to_str,
+        'output': str_list_to_str,
+        'mean_values': mean_scale_value_to_str,
+        'scale_values': mean_scale_value_to_str,
+        'source_layout': source_target_layout_to_str,
+        'target_layout': source_target_layout_to_str,
+        'layout': layout_param_to_str,
+        'transform': transform_param_to_str,
+        'extensions': extensions_to_str_or_extensions_class,
+        'batch': batch_to_int,
+        'transformations_config': transformations_config_to_str,
+        'saved_model_tags': str_list_to_str
+    }
diff --git a/tools/mo/unit_tests/mo/utils/cli_parser_test.py b/tools/mo/unit_tests/mo/utils/cli_parser_test.py
index b63ac4c41137db..61ac2236500007 100644
--- a/tools/mo/unit_tests/mo/utils/cli_parser_test.py
+++ b/tools/mo/unit_tests/mo/utils/cli_parser_test.py
@@ -4,7 +4,6 @@
 import argparse
 import numpy
 import os
-import pathlib
 import shutil
 import sys
 import tempfile
@@ -16,13 +15,13 @@
 from openvino.tools.mo.utils.cli_parser import get_placeholder_shapes, get_tuple_values, get_mean_scale_dictionary, \
     get_model_name, \
     parse_tuple_pairs, check_positive, writable_dir, readable_dirs, \
-    readable_file, get_freeze_placeholder_values, parse_transform, check_available_transforms, get_layout_values, get_data_type_from_input_value, get_all_cli_parser
+    readable_file, get_freeze_placeholder_values, parse_transform, check_available_transforms, get_layout_values, get_all_cli_parser, \
+    get_mo_convert_params
 from openvino.tools.mo.convert_impl import pack_params_to_args_namespace
 from openvino.tools.mo.convert import InputCutInfo, LayoutMap
 from openvino.tools.mo.utils.error import Error
 from unit_tests.mo.unit_test_with_mocked_telemetry import UnitTestWithMockedTelemetry
 from openvino.runtime import PartialShape, Dimension, Layout
-from openvino.frontend import FrontEndManager
 
 
 class TestingMeanScaleGetter(UnitTestWithMockedTelemetry):
@@ -1978,7 +1977,7 @@ def test_mo_convert_params(self):
                 'layout': {"a": LayoutMap("nchw","nhwc"), "b": "nc"},
                 'transform': ('LowLatency2', {'use_const_initializer': False})}
 
-        cli_parser = get_all_cli_parser(FrontEndManager())
+        cli_parser = get_all_cli_parser()
         argv = pack_params_to_args_namespace(args, cli_parser)
 
         assert argv.input_model == args['input_model']
@@ -2001,7 +2000,7 @@ def test_mo_convert_params(self):
 
     def test_not_existing_dir(self):
         args = {"input_model": "abc"}
-        cli_parser = get_all_cli_parser(FrontEndManager())
+        cli_parser = get_all_cli_parser()
 
         with self.assertRaisesRegex(Error, "The \"abc\" is not existing file or directory"):
             pack_params_to_args_namespace(args, cli_parser)
@@ -2009,7 +2008,45 @@ def test_not_existing_dir(self):
     def test_unknown_params(self):
         args = {"input_model": os.path.dirname(__file__),
                 "a": "b"}
-        cli_parser = get_all_cli_parser(FrontEndManager())
+        cli_parser = get_all_cli_parser()
 
         with self.assertRaisesRegex(Error, "Unrecognized argument: a"):
             pack_params_to_args_namespace(args, cli_parser)
+
+
+class TestConvertModelParamsParsing(unittest.TestCase):
+    def test_mo_convert_params_parsing(self):
+        ref_params = {
+            'Optional parameters:': {'help', 'framework'},
+            'Framework-agnostic parameters:': {'input_model', 'input_shape', 'scale', 'reverse_input_channels',
+                                               'log_level', 'input', 'output', 'mean_values', 'scale_values', 'source_layout',
+                                               'target_layout', 'layout', 'compress_to_fp16', 'transform', 'extensions',
+                                               'batch', 'silent', 'version', 'progress', 'stream_output',
+                                               'transformations_config'},
+            'Caffe*-specific parameters:': {'input_proto', 'caffe_parser_path', 'k', 'disable_omitting_optional',
+                                            'enable_flattening_nested_params'},
+            'TensorFlow*-specific parameters:': {'input_model_is_text', 'input_checkpoint', 'input_meta_graph',
+                                                 'saved_model_dir', 'saved_model_tags',
+                                                 'tensorflow_custom_operations_config_update',
+                                                 'tensorflow_object_detection_api_pipeline_config',
+                                                 'tensorboard_logdir', 'tensorflow_custom_layer_libraries'},
+            'MXNet-specific parameters:': {'input_symbol', 'nd_prefix_name', 'pretrained_model_name', 'save_params_from_nd',
+                                           'legacy_mxnet_model', 'enable_ssd_gluoncv'},
+            'Kaldi-specific parameters:': {'counts', 'remove_output_softmax', 'remove_memory'},
+            'PyTorch-specific parameters:': {'example_input', 'onnx_opset_version'}
+        }
+
+        params = get_mo_convert_params()
+        for group_name in ref_params:
+            assert group_name in params
+            assert params[group_name].keys() == ref_params[group_name]
+
+        cli_parser = get_all_cli_parser()
+        for group_name, params in ref_params.items():
+            for param_name in params:
+                param_name = '--' + param_name
+                if group_name == 'PyTorch-specific parameters:':
+                    assert param_name not in cli_parser._option_string_actions
+                else:
+                    assert param_name in cli_parser._option_string_actions
+

From f5e199c494ea2544ca89beb43298b411e792452b Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 5 Apr 2023 16:20:51 +0400
Subject: [PATCH 247/296] [GPU] Don't reorder weights when can reinterpret
 (#16714)

* [GPU] Don't reorder weights when can reinterpret

* [GPU] Test fixes
---
 .../graph_optimizer/prepare_buffer_fusing.cpp |   7 +-
 src/plugins/intel_gpu/src/graph/network.cpp   |  20 +-
 .../intel_gpu/src/graph/primitive_inst.cpp    | 159 ++++++++--------
 .../transformations/einsum_decomposition.cpp  |   4 +
 src/plugins/intel_gpu/src/runtime/layout.cpp  | 172 +++++++++---------
 .../tests/module_tests/layout_test.cpp        |  30 ++-
 .../passes/prepare_buffer_fusing_test.cpp     |   2 +-
 .../tests/test_cases/reshape_gpu_test.cpp     |  50 +++++
 8 files changed, 275 insertions(+), 169 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index 8bbf2aa3b73760..da711cb3ab3e18 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -324,17 +324,14 @@ void prepare_buffer_fusing::run(program& p) {
     If crop is before concat there can be padding mismtach, since concat changes padding.
     */
     auto can_optimize = [](const program_node* node) {
-        bool is_dynamic = node->get_output_layout().is_dynamic();
+        bool is_dynamic = node->is_dynamic();
         bool is_planar = format::is_default_format(node->get_output_layout().format);
         bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
-        // The condition below check only output layout as cases like
-        // (dyn_shape) -> reshape -> (static_shape) -> some_static_primitive
-        // may have invalid set_arguments call as output memory of reshape won't be available until reshape primitive is executed
         if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
             return true;
         }
 
-        if (node->is_dynamic() || node->is_output() || node->has_fused_primitives()) {
+        if (is_dynamic || node->is_output() || node->has_fused_primitives()) {
             return false;
         }
         return true;
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 893103494ac555..31c1943b3f64e5 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -642,8 +642,22 @@ void network::set_arguments() {
         return;
 
     for (auto const& prim : _exec_order) {
-        if (!prim->is_dynamic())
-            prim->set_arguments();
+        if (!prim->is_dynamic()) {
+            bool can_set_args = true;
+            for (auto& dep : prim->dependencies()) {
+                // Skip set args for nodes with dynamic & optimized_out dependency
+                // This is needed to handle dynamic -> static cases like
+                // (dynamic) -> reshape -> (static) -> some_op
+                // In that case some_op is static and we may want to set arguments once,
+                // but dynamic optimized out reshape means that output buffer of reshape is unavailable
+                // and attempt to set args will fail.
+                if (dep.first->can_be_optimized() && dep.first->is_dynamic())
+                    can_set_args = false;
+            }
+
+            if (can_set_args)
+                prim->set_arguments();
+        }
     }
     _reset_arguments = false;
 }
@@ -1308,7 +1322,7 @@ void network::allocate_primitive_instance(program_node const& node) {
                     return true;
             }
             if (dep.first->can_be_optimized()) {
-                if (is_mutable_input(*dep.first)) {
+                if (is_mutable_input(*dep.first) || dep.first->is_dynamic()) {
                     return true;
                 }
             }
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index f5f41bf074853a..85e82982725d94 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -395,11 +395,13 @@ bool primitive_inst::update_impl() {
                     impl->set_kernels(kernels);
                     cache.add(updated_params, impl->clone());
                 });
-                _impl = _dynamic_impl->clone();
-                auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
-                _impl->update_dispatch_data(new_impl_params);
+                if (!can_be_optimized())  {
+                    _impl = _dynamic_impl->clone();
+                    auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
+                    _impl->update_dispatch_data(new_impl_params);
 
-                update_shape_info(new_impl_params);
+                    update_shape_info(new_impl_params);
+                }
             } else {
                 _impl = _node->type()->choose_impl(*_node, updated_params);
                 auto& kernels_cache = get_network().get_program()->get_kernels_cache();
@@ -715,90 +717,97 @@ event::ptr primitive_inst::update_weights() {
     if (!weightable_node)
         return nullptr;
 
+    auto& engine = _network.get_engine();
     auto& weights_params = _impl->_weights_reorder_params;
-    bool requires_reorder = weights_params.engine != kernel_selector::GenericKernelParams::Engine::NONE;
-
-    const auto weights_idx = _node->get_primitive()->input.size();
-    const auto original_weights_memory = dep_memory_ptr(weights_idx);
-    auto expected_layout = requires_reorder ? from_weights_tensor(weights_params.dest)
-                                            : original_weights_memory->get_layout();
-
-    // Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
-    expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
-
-    if (requires_reorder && !_reordered_weights_cache.has(expected_layout)) {
-        GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(false);
-        auto original_layout = original_weights_memory->get_layout();
-        auto& engine = _network.get_engine();
-
-        auto get_kernel_key = [&]() -> size_t {
-            auto seed = _node->get_primitive()->hash();
-            seed = hash_combine(seed, expected_layout.hash());
-            seed = hash_combine(seed, original_layout.hash());
-            return seed;
-        };
 
-        cldnn::kernel::ptr kernel = nullptr;
-        auto kernel_key = get_kernel_key();
-        auto& cache = get_network().get_in_mem_kernels_cache();
-        if (cache.has(kernel_key)) {
-            GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights (cached) from " << original_layout.to_short_string()
-                                   << " to " << expected_layout.to_short_string() << std::endl;
+    auto weights_idx = _node->get_primitive()->input.size();
+    auto original_weights_memory = dep_memory_ptr(weights_idx);
+    auto original_layout = original_weights_memory->get_layout();
+
+    if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
+        // If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
+        // incorrect memory buffer may be assigned, so reset cached weights for such case
+        _reordered_weights_cache.add(original_weights_memory->get_layout(), original_weights_memory);
+    } else {
+        auto expected_layout = from_weights_tensor(weights_params.dest);
+        // Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
+        expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
+        _impl_params->weights_layout = optional_layout(expected_layout);
+
+        if (_reordered_weights_cache.has(expected_layout)) {
             GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
-            kernel = cache.get(kernel_key);
+            GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights for " << expected_layout.to_short_string() << std::endl;
+            return nullptr;
+        } else if (original_layout.compatible(expected_layout)) {
+            GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
+            GPU_DEBUG_TRACE_DETAIL << id() << ": reinterpret original weights memory from " << original_layout.to_short_string()
+                                           << " to " << expected_layout.to_short_string() << std::endl;
+            _reordered_weights_cache.add(expected_layout, engine.reinterpret_buffer(*original_weights_memory, expected_layout));
+            return nullptr;
         } else {
-            GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string()
-                                   << " to " << expected_layout.to_short_string() << std::endl;
-            auto& kernels_cache = get_network().get_program()->get_kernels_cache();
-            auto kernels = kernels_cache.compile(*_impl_params, {weights_params.clKernel->code.kernelString});
-            OPENVINO_ASSERT(kernels.size() == 1, "The output of kernel compile has issue");
-            kernel = (kernels.begin()->second)[0];
-            cache.add(kernel_key, kernel);
-        }
+            GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(false);
+            auto get_kernel_key = [&]() -> size_t {
+                auto seed = _node->get_primitive()->hash();
+                seed = hash_combine(seed, expected_layout.hash());
+                seed = hash_combine(seed, original_layout.hash());
+                return seed;
+            };
+
+            cldnn::kernel::ptr kernel = nullptr;
+            auto kernel_key = get_kernel_key();
+            auto& cache = get_network().get_in_mem_kernels_cache();
+            if (cache.has(kernel_key)) {
+                GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights (cached) from " << original_layout.to_short_string()
+                                       << " to " << expected_layout.to_short_string() << std::endl;
+                GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
+                kernel = cache.get(kernel_key);
+            } else {
+                GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string()
+                                       << " to " << expected_layout.to_short_string() << std::endl;
+                auto& kernels_cache = get_network().get_program()->get_kernels_cache();
+                auto kernels = kernels_cache.compile(*_impl_params, {weights_params.clKernel->code.kernelString});
+                OPENVINO_ASSERT(kernels.size() == 1, "The output of kernel compile has issue");
+                kernel = (kernels.begin()->second)[0];
+                cache.add(kernel_key, kernel);
+            }
 
-        auto& stream = get_network().get_stream();
+            auto& stream = get_network().get_stream();
 
-        bool can_reuse = false;
-        memory::ptr weights_memory = nullptr;
-        if (_reordered_weights_cache.is_full()) {
-            weights_memory = _reordered_weights_cache.get_lru_element().second;
-            can_reuse = weights_memory->size() <= expected_layout.bytes_count() && weights_memory != original_weights_memory;
-        }
+            bool can_reuse = false;
+            memory::ptr weights_memory = nullptr;
+            if (_reordered_weights_cache.is_full()) {
+                weights_memory = _reordered_weights_cache.get_lru_element().second;
+                can_reuse = weights_memory->size() <= expected_layout.bytes_count() && weights_memory != original_weights_memory;
+            }
 
-        if (can_reuse) {
-            GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory" << std::endl;
-            weights_memory = engine.reinterpret_buffer(*weights_memory, expected_layout);
-        } else {
-            GPU_DEBUG_TRACE_DETAIL << id() << ": allocate weights memory" << std::endl;
-            auto alloc_type = engine.get_preferred_memory_allocation_type();
-            weights_memory = engine.allocate_memory(expected_layout, alloc_type);
-        }
+            if (can_reuse) {
+                GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory for new layout " << expected_layout.to_short_string() << std::endl;
+                weights_memory = engine.reinterpret_buffer(*weights_memory, expected_layout);
+            } else {
+                GPU_DEBUG_TRACE_DETAIL << id() << ": allocate weights memory" << std::endl;
+                auto alloc_type = engine.get_preferred_memory_allocation_type();
+                weights_memory = engine.allocate_memory(expected_layout, alloc_type);
+            }
 
-        _reordered_weights_cache.add(expected_layout, weights_memory);
-        _impl_params->weights_layout = optional_layout(expected_layout);
-        GPU_DEBUG_TRACE_DETAIL << id() << ": update weights cache: " << expected_layout.to_short_string() << " cache_size="
-                               << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl;
+            _reordered_weights_cache.add(expected_layout, weights_memory);
+            GPU_DEBUG_TRACE_DETAIL << id() << ": update weights cache: " << expected_layout.to_short_string() << " cache_size="
+                                   << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl;
 
-        kernel_arguments_data args;
-        args.inputs.push_back(original_weights_memory);
-        args.outputs.push_back(weights_memory);
-        stream.set_arguments(*kernel, weights_params.clKernel->params, args);
-        auto ev = stream.enqueue_kernel(*kernel, weights_params.clKernel->params, args, {}, true);
+            kernel_arguments_data args;
+            args.inputs.push_back(original_weights_memory);
+            args.outputs.push_back(weights_memory);
+            stream.set_arguments(*kernel, weights_params.clKernel->params, args);
+            auto ev = stream.enqueue_kernel(*kernel, weights_params.clKernel->params, args, {}, true);
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
-            stream.wait_for_events({ev});
-        }
+            GPU_DEBUG_GET_INSTANCE(debug_config);
+            GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
+                stream.wait_for_events({ev});
+            }
 
-        return ev;
-    } else {
-        // If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
-        // incorrect memory buffer may be assigned, so push front original memory in LRU cache
-        if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
-            _reordered_weights_cache.add(expected_layout, original_weights_memory);
-            _impl_params->weights_layout = optional_layout(expected_layout);
+            return ev;
         }
     }
+
     GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
 
     return nullptr;
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp b/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp
index 6b04b45f50d408..b7c5fe4b5e3494 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp
@@ -893,6 +893,10 @@ EinsumDecomposition::EinsumDecomposition() {
             return false;
         }
 
+        if (einsum_node->is_dynamic()) {
+            return false;
+        }
+
         auto equation = einsum_node->get_equation();
         std::vector<std::string> input_subscripts;
         std::string output_subscript;
diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp
index 29076654686453..6cceb61a3119c3 100644
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@@ -11,86 +11,6 @@
 namespace cldnn {
 static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2);
 namespace {
-// pair.first tells whether l1 and l2 are absolutely identical
-// pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
-// note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted
-// nor dropped) note: if layouts describe two buffers with different size, consider them not to be identical even if
-// smaller buffer can be considered to hold subsequence of larger buffer,
-//       this behavior is required to force buffer allocation for smaller buffer which, currently, should always be
-//       performed
-std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2) {
-    const auto& l1_pad = l1.data_padding;
-    const auto& l2_pad = l2.data_padding;
-
-    if (l1.is_dynamic() || l2.is_dynamic())
-        return {false, false};
-
-    auto l1_size = l1.get_tensor();
-    auto l2_size = l2.get_tensor();
-    if (l1 == l2)
-        return {true, true};
-    if (check_redundant_1d_along_feature(l1, l2))
-        return {false, true};
-    if (l1.data_type != l2.data_type)
-        return {false, false};
-    // Reorders between bfyx, bfzyx, bfwzyx can pe reinterpeted as reshape when
-    // there is no padding and both hold same number of elements.
-    if (format::is_default_format(l1.format) && format::is_default_format(l2.format) &&
-        !l1_pad && !l2_pad && l1.get_linear_size() == l2.get_linear_size())
-        return {false, true};
-    if (l1_size != l2_size)
-        return {false, false};
-    if (l1.get_linear_size() != l2.get_linear_size())
-        return {false, false};
-
-    auto check_format = [&l1, &l2](cldnn::format format) {
-        return (l1.format == format && l2.format != format) ||
-               (l2.format == format && l1.format != format);
-    };
-
-    if (check_format(format::b_fs_yx_fsv2) ||
-        check_format(format::b_fs_yx_fsv4) ||
-        check_format(format::fs_b_yx_fsv32) ||
-        check_format(format::b_fs_yx_fsv16) ||
-        check_format(format::b_fs_yx_fsv32) ||
-        check_format(format::b_fs_zyx_fsv2) ||
-        check_format(format::b_fs_zyx_fsv4) ||
-        check_format(format::b_fs_zyx_fsv32) ||
-        check_format(format::b_fs_zyx_fsv16) ||
-        check_format(format::bs_fs_yx_bsv4_fsv4) ||
-        check_format(format::bs_fs_yx_bsv8_fsv4) ||
-        check_format(format::bs_fs_zyx_bsv8_fsv4) ||
-        check_format(format::bs_fs_yx_bsv8_fsv2) ||
-        check_format(format::bs_fs_zyx_bsv8_fsv2) ||
-        check_format(format::bs_fs_yx_bsv4_fsv2) ||
-        check_format(format::bs_fs_yx_bsv32_fsv16) ||
-        check_format(format::bs_fs_yx_bsv32_fsv32) ||
-        check_format(format::bs_fs_yx_bsv16_fsv16) ||
-        check_format(format::bs_fs_yx_bsv16_fsv32) ||
-        check_format(format::bs_fs_zyx_bsv16_fsv32) ||
-        check_format(format::bs_fs_zyx_bsv16_fsv16) ||
-        check_format(format::bs_fs_zyx_bsv32_fsv16) ||
-        check_format(format::bs_fs_zyx_bsv32_fsv32))
-        return {false, false};
-
-    auto l1_pitch = l1.get_pitches();
-    auto l2_pitch = l2.get_pitches();
-
-    // ignore pitches which will never be used (for dims with size == 1)
-    for (size_t i = 0; i < tensor_dim_max; ++i)
-        if (l1_size.raw[i] == 1)
-            l1_pitch.raw[i] = 0;
-    for (size_t i = 0; i < tensor_dim_max; ++i)
-        if (l2_size.raw[i] == 1)
-            l2_pitch.raw[i] = 0;
-
-    auto l1_offset = l1.get_linear_offset();
-    auto l2_offset = l2.get_linear_offset();
-    if (l1_pitch == l2_pitch && l1_offset == l2_offset)
-        return {false, true};
-
-    return {false, false};
-}
 
 std::vector<cldnn::tensor::value_type> convert_dimensions(const std::vector<cldnn::tensor::value_type>& sizes, std::string in_order, std::string out_order) {
     std::vector<cldnn::tensor::value_type> new_sizes(out_order.size(), {-1});
@@ -497,12 +417,100 @@ layout layout::with_padding(padding const& padd) const {
     return ret;
 }
 
+// tells whether l1 and l2 can be reinterpreted to each other without need of reordering
+// note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted
+// nor dropped) note: if layouts describe two buffers with different size, consider them not to be identical even if
+// smaller buffer can be considered to hold subsequence of larger buffer,
+//       this behavior is required to force buffer allocation for smaller buffer which, currently, should always be
+//       performed
 bool layout::compatible(const layout& other) const {
-    return are_layouts_identical(*this, other).second;
+    auto& l1 = *this;
+    auto& l2 = other;
+    const auto& l1_pad = l1.data_padding;
+    const auto& l2_pad = l2.data_padding;
+
+    if (l1.is_dynamic() || l2.is_dynamic())
+        return false;
+
+    auto l1_size = l1.get_tensor();
+    auto l2_size = l2.get_tensor();
+    if (l1 == l2)
+        return true;
+    if (check_redundant_1d_along_feature(l1, l2))
+        return true;
+    if (l1.data_type != l2.data_type)
+        return false;
+    // Reorders between bfyx, bfzyx, bfwzyx can be reinterpeted as reshape when
+    // there is no padding and both hold same number of elements.
+    if (format::is_default_format(l1.format) && format::is_default_format(l2.format) &&
+        !l1_pad && !l2_pad && l1.get_linear_size() == l2.get_linear_size())
+        return true;
+    if (l1_size != l2_size)
+        return false;
+    if (l1.get_linear_size() != l2.get_linear_size())
+        return false;
+
+    auto check_format = [&l1, &l2](cldnn::format format) {
+        return (l1.format == format && l2.format != format) ||
+               (l2.format == format && l1.format != format);
+    };
+
+    const auto& blocks1 = format::block_sizes(l1.format);
+    const auto& blocks2 = format::block_sizes(l2.format);
+
+    // TODO: Relax restrictions below
+    if (blocks1 != blocks2 ||
+        (!blocks1.empty() && format::traits(l1.format)._order != format::traits(l2.format)._order))
+        return false;
+
+    if (check_format(format::b_fs_yx_fsv2) ||
+        check_format(format::b_fs_yx_fsv4) ||
+        check_format(format::fs_b_yx_fsv32) ||
+        check_format(format::b_fs_yx_fsv16) ||
+        check_format(format::b_fs_yx_fsv32) ||
+        check_format(format::b_fs_zyx_fsv2) ||
+        check_format(format::b_fs_zyx_fsv4) ||
+        check_format(format::b_fs_zyx_fsv32) ||
+        check_format(format::b_fs_zyx_fsv16) ||
+        check_format(format::bs_fs_yx_bsv4_fsv4) ||
+        check_format(format::bs_fs_yx_bsv8_fsv4) ||
+        check_format(format::bs_fs_zyx_bsv8_fsv4) ||
+        check_format(format::bs_fs_yx_bsv8_fsv2) ||
+        check_format(format::bs_fs_zyx_bsv8_fsv2) ||
+        check_format(format::bs_fs_yx_bsv4_fsv2) ||
+        check_format(format::bs_fs_yx_bsv32_fsv16) ||
+        check_format(format::bs_fs_yx_bsv32_fsv32) ||
+        check_format(format::bs_fs_yx_bsv16_fsv16) ||
+        check_format(format::bs_fs_yx_bsv16_fsv32) ||
+        check_format(format::bs_fs_zyx_bsv16_fsv32) ||
+        check_format(format::bs_fs_zyx_bsv16_fsv16) ||
+        check_format(format::bs_fs_zyx_bsv32_fsv16) ||
+        check_format(format::bs_fs_zyx_bsv32_fsv32))
+        return false;
+
+    auto l1_pitch = l1.get_pitches();
+    auto l2_pitch = l2.get_pitches();
+
+    // ignore pitches which will never be used (for dims with size == 1)
+    for (size_t i = 0; i < tensor_dim_max; ++i)
+        if (l1_size.raw[i] == 1)
+            l1_pitch.raw[i] = 0;
+    for (size_t i = 0; i < tensor_dim_max; ++i)
+        if (l2_size.raw[i] == 1)
+            l2_pitch.raw[i] = 0;
+
+    auto l1_offset = l1.get_linear_offset();
+    auto l2_offset = l2.get_linear_offset();
+    if (l1_pitch == l2_pitch && l1_offset == l2_offset)
+        return true;
+
+    return false;
 }
 
 bool layout::identical(const layout& other) const {
-    return are_layouts_identical(*this, other).first;
+    if (is_dynamic() || other.is_dynamic())
+        return false;
+    return *this == other;
 }
 
 ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) {
diff --git a/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp
index 4e110514f115ad..4cd52dbdb2842b 100644
--- a/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp
+++ b/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp
@@ -196,8 +196,8 @@ class layout_cmp_test : public testing::TestWithParam<layouts_cmp_test_params> {
 TEST_P(layout_cmp_test, basic) {
     auto p = GetParam();
 
-    EXPECT_EQ(p.l1.identical(p.l2), p.is_identical);
-    EXPECT_EQ(p.l1.compatible(p.l2), p.is_compatible);
+    EXPECT_EQ(p.l1.identical(p.l2), p.is_identical) << p.l1.to_short_string() << " -> " << p.l2.to_short_string();
+    EXPECT_EQ(p.l1.compatible(p.l2), p.is_compatible) << p.l1.to_short_string() << " -> " << p.l2.to_short_string();
 }
 
 INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
@@ -209,11 +209,35 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
         {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx},
          layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, false, false},
         {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
-         layout{ov::PartialShape{1, 2, 3, 4, 1}, data_types::f16, format::bfzyx}, false, true},
+         layout{ov::PartialShape{1, 2, 1, 3, 4}, data_types::f16, format::bfzyx}, false, true},
         {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
          layout{ov::PartialShape{1, 2, 3, 4, 1, 1}, data_types::f16, format::bfwzyx}, false, true},
+        {layout{ov::PartialShape{1, 2, 3, 4, 1, 1}, data_types::f16, format::bfwzyx},
+         layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, false, true},
+        {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 2, 1, 1, 3, 4}, data_types::f16, format::bfwzyx}, false, true},
         {layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
          layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 0, 0}, 0)}, false, false},
         {layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
          layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true},
+        {layout{ov::PartialShape{10, 20}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{10, 20}, data_types::f16, format::os_iyx_osv16}, false, false},
+        {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::oiyx}, false, true},
+        {layout{ov::PartialShape{128, 10}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{128, 10}, data_types::f16, format::os_iyx_osv32}, false, false},
+        {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::yxfb}, false, false},
+        {layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::b_fs_yx_fsv16}, false, false},
+        {layout{ov::PartialShape{1, 2, 1, 1, 1}, data_types::f16, format::b_fs_zyx_fsv16},
+         layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::b_fs_yx_fsv16}, false, false},
+        {layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::os_is_zyx_isv16_osv16},
+         layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::is_os_zyx_isv16_osv16}, false, false},
+        {layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::g_os_yx_is_osv8_isv2},
+         layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::g_os_y_is_x_osv8_isv2}, false, false},
+        {layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::goiyx},
+         layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::gioyx}, false, false},
+        {layout{ov::PartialShape{9, 17, 3, 2, 5}, data_types::f16, format::is_os_zyx_isa8_osv8_isv2},
+         layout{ov::PartialShape{9, 17, 3, 2, 5}, data_types::f16, format::os_is_zyx_isa8_osv8_isv2}, false, false},
     }));
diff --git a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
index 0a5358d19fdccc..420b49cd8ee90f 100644
--- a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp
@@ -85,7 +85,7 @@ TEST(prepare_buffer_fusing, static_node_after_optimized_out_dyn_reshape) {
     program_wrapper::apply_opt_pass<prepare_buffer_fusing>(*prog);
     program_wrapper::apply_opt_pass<compile_graph>(*prog);
     ASSERT_NO_THROW(prog->get_node("reshape"));
-    ASSERT_FALSE(prog->get_node("reshape").can_be_optimized());
+    ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
     program_wrapper::apply_opt_pass<build_implementations>(*prog);
 
     ASSERT_TRUE(has_node_with_type<reshape>(*prog));
diff --git a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
index acb659a11a3dcf..e3cdc480c9f082 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
@@ -11,6 +11,8 @@
 #include <intel_gpu/primitives/input_layout.hpp>
 #include <intel_gpu/primitives/eltwise.hpp>
 
+#include "reshape_inst.h"
+
 using namespace cldnn;
 using namespace ::tests;
 using namespace testing;
@@ -942,6 +944,54 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
     }
 }
 
+TEST(reshape_gpu_f32, basic_dynamic_shape_to_static_optimized_out) {
+    auto& engine = get_test_engine();
+
+    auto input = engine.allocate_memory(layout{ov::PartialShape{2, 10}, data_types::f32, format::bfyx});
+    topology topology;
+    topology.add(input_layout("input", layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}));
+    topology.add(reshape("reshape", input_info("input"), false, {2, 10}, {2, 10}));
+    topology.add(reduce("reduce", input_info("reshape"), reduce_mode::max, {1}, true));
+
+    // clang-format off
+    std::vector<float> input_data = {
+        0.0, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
+        0.0, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
+    };
+    // clang-format on
+
+    set_values(input, input_data);
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    network network(engine, topology, config);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    ASSERT_TRUE(network.get_primitive("reshape")->can_be_optimized());
+
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "reduce");
+
+    auto output = outputs.at("reduce").get_memory();
+
+    ASSERT_EQ(output->get_layout().data_type, input->get_layout().data_type);
+    ASSERT_EQ(output->get_layout().format, format::bfyx);
+    ASSERT_TRUE(output->get_layout().is_static());
+    ov::PartialShape expected_shape = {2, 1};
+    ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    std::vector<float> expected_res = {9.f, 9.f};
+    ASSERT_EQ(output_ptr.size(), expected_res.size());
+
+
+    for (size_t i = 0; i < expected_res.size(); i++) {
+        ASSERT_EQ(expected_res[i], output_ptr[i]);
+    }
+}
+
 #ifdef RUN_ALL_MODEL_CACHING_TESTS
 TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
     generic_reshape_test<float>(

From 44cfbea9abb9d022b7be6ba2689510a87139d9b8 Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.park@intel.com>
Date: Wed, 5 Apr 2023 21:29:47 +0900
Subject: [PATCH 248/296] [GPU] Fix synchronization issue from wrong stream in
 multi-stream mode on dGPU (#16671)

Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../intel_gpu/graph/kernel_impl_params.hpp    |  8 +++++-
 .../include/intel_gpu/graph/program.hpp       |  1 +
 .../intel_gpu/src/graph/arg_max_min.cpp       |  2 +-
 src/plugins/intel_gpu/src/graph/border.cpp    |  8 +++---
 src/plugins/intel_gpu/src/graph/broadcast.cpp |  2 +-
 src/plugins/intel_gpu/src/graph/crop.cpp      |  6 ++--
 .../intel_gpu/src/graph/deconvolution.cpp     |  4 +--
 .../graph/graph_optimizer/reorder_inputs.cpp  |  3 ++
 .../src/graph/impls/ocl/resample.cpp          |  2 +-
 .../src/graph/include/primitive_inst.h        |  1 +
 .../src/graph/include/program_node.h          |  4 +--
 .../src/graph/non_max_suppression.cpp         |  2 +-
 src/plugins/intel_gpu/src/graph/non_zero.cpp  |  4 +--
 .../intel_gpu/src/graph/primitive_inst.cpp    |  2 +-
 src/plugins/intel_gpu/src/graph/range.cpp     |  6 ++--
 src/plugins/intel_gpu/src/graph/resample.cpp  |  4 +--
 src/plugins/intel_gpu/src/graph/reshape.cpp   |  2 +-
 .../intel_gpu/src/graph/strided_slice.cpp     |  6 ++--
 src/plugins/intel_gpu/src/graph/tile.cpp      |  2 +-
 .../fusions/fully_connected_fusion_test.cpp   | 28 ++++++++++++++++++-
 .../test_cases/multiple_streams_gpu_test.cpp  | 14 +++++++++-
 21 files changed, 80 insertions(+), 31 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
index a962eb402c0a28..17f30a3eecc4ac 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
@@ -7,6 +7,7 @@
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/memory.hpp"
+#include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/utils.hpp"
 #include "intel_gpu/runtime/tensor.hpp"
 #include "intel_gpu/primitives/primitive.hpp"
@@ -32,6 +33,7 @@ struct kernel_impl_params {
 
     bool has_runtime_layouts = false;
     const program *prog;
+    stream::ptr strm;
     std::shared_ptr<const primitive> desc;
     size_t unique_id;
     std::vector<layout> input_layouts;
@@ -52,9 +54,10 @@ struct kernel_impl_params {
     std::map<size_t, memory::ptr> memory_deps = {};
     size_t primary_input_idx = 0;
 
-    kernel_impl_params() : prog(nullptr), desc(nullptr), unique_id(0) {}
+    kernel_impl_params() : prog(nullptr), strm(nullptr), desc(nullptr), unique_id(0) {}
 
     kernel_impl_params(program& _prog,
+                       stream::ptr _strm,
                        std::shared_ptr<const primitive> _desc,
                        size_t _uid,
                        const std::vector<layout>& _in_layouts,
@@ -62,6 +65,7 @@ struct kernel_impl_params {
                        const std::vector<cldnn::fused_primitive_desc>& _fused_descs)
                        : has_runtime_layouts(true)
                        , prog(&_prog)
+                       , strm(_strm)
                        , desc(_desc)
                        , unique_id(_uid)
                        , input_layouts(_in_layouts)
@@ -119,6 +123,8 @@ struct kernel_impl_params {
         OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params in not initialized");
         return *prog;
     }
+    stream& get_stream() const { return *strm; }
+    stream::ptr get_stream_ptr() const { return strm; }
 
     size_t hash() const;
     bool operator==(const kernel_impl_params& rhs) const;
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
index 195469d4f43524..c537b1335a7d46 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@@ -151,6 +151,7 @@ struct program {
     nodes_ordering& get_processing_order();
     uint32_t get_prog_id() { return prog_id; }
     stream& get_stream() { return *_stream; }
+    stream::ptr get_stream_ptr() const { return _stream; }
     const stream& get_stream() const { return *_stream; }
     const std::list<primitive_id>& get_optimized_out() const { return optimized_out; }
     const std::list<optimized_info>& get_optimized() const { return optimized; }
diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp
index 21665eceafc83d..e83cba5b6c5dcc 100644
--- a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp
@@ -96,7 +96,7 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const
     } else if (constant_mem.count(1)) {
         std::map<size_t, ngraph::HostTensorPtr> const_data;
         auto target_shape_mem = constant_mem.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
 
         ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp
index bd80f5736fb71b..ce6d5edd4f1f5d 100644
--- a/src/plugins/intel_gpu/src/graph/border.cpp
+++ b/src/plugins/intel_gpu/src/graph/border.cpp
@@ -59,18 +59,18 @@ std::vector<layout> border_inst::calc_output_layouts(border_node const& /*node*/
 
     if ((is_begin_mem && memory_deps.count(1)) && (is_end_mem && memory_deps.count(2))) {
         auto pads_begin_mem = memory_deps.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data()));
 
         auto pads_end_mem = memory_deps.at(2);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.get_stream());
         const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data()));
 
         ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
     } else if ((is_begin_mem || is_end_mem) && memory_deps.count(1)) {
         if (is_begin_mem) {
             auto pads_begin_mem = memory_deps.at(1);
-            cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.prog->get_stream());
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.get_stream());
             const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data()));
 
             auto pads_end_data = desc->pads_end;
@@ -84,7 +84,7 @@ std::vector<layout> border_inst::calc_output_layouts(border_node const& /*node*/
             const_data.emplace(1, pads_begin_tensor);
 
             auto pads_end_mem = memory_deps.at(1);
-            cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.prog->get_stream());
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.get_stream());
             const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data()));
 
             ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp
index bdb2bb331ce3b2..4cae5acdb3f165 100644
--- a/src/plugins/intel_gpu/src/graph/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp
@@ -73,7 +73,7 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
     auto& constant_mem = impl_param.memory_deps;
     if (constant_mem.count(1)) {
         auto target_shape_mem = constant_mem.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
         ov::op::v3::shape_infer(&op, input_shapes, output_shapes, const_data);
     } else if (impl_param.input_layouts.size() == 1) {
diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp
index 01b14c3337b739..f55dadf335d19a 100644
--- a/src/plugins/intel_gpu/src/graph/crop.cpp
+++ b/src/plugins/intel_gpu/src/graph/crop.cpp
@@ -60,12 +60,12 @@ std::vector<layout> crop_inst::calc_output_layouts(const crop_node& /*node*/, co
 
         OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) axis values memory dependency");
         auto axis_values_mem = impl_param.memory_deps.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));
 
         OPENVINO_ASSERT(impl_param.memory_deps.count(2) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) split length values memory dependency");
         auto split_length_mem = impl_param.memory_deps.at(2);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> split_length_mem_lock(split_length_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> split_length_mem_lock(split_length_mem, impl_param.get_stream());
         const_data.emplace(2, make_host_tensor(split_length_mem->get_layout(), split_length_mem_lock.data()));
 
         ov::op::v1::VariadicSplit op;
@@ -75,7 +75,7 @@ std::vector<layout> crop_inst::calc_output_layouts(const crop_node& /*node*/, co
 
         OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph Split op mode) axis values memory dependency");
         auto axis_values_mem = impl_param.memory_deps.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));
 
         ov::op::v1::Split op;
diff --git a/src/plugins/intel_gpu/src/graph/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/deconvolution.cpp
index 3dc12a9fbd2e2a..bc6805f0f4a7a3 100644
--- a/src/plugins/intel_gpu/src/graph/deconvolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/deconvolution.cpp
@@ -185,7 +185,7 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
             output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         } else if (memory_deps.count(2)) {
             auto mem = memory_deps.at(2);
-            auto dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
+            auto dims = read_vector<int64_t>(mem, impl_param.get_stream());
             auto dims_shape = ov::Shape{dims.size()};
             input_shapes.push_back(dims_shape);
             output_shapes = ov::op::v1::shape_infer(
@@ -211,7 +211,7 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
             output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
         } else if (memory_deps.count(2)) {
             auto mem = memory_deps.at(2);
-            auto dims = read_vector<int64_t>(mem, impl_param.prog->get_stream());
+            auto dims = read_vector<int64_t>(mem, impl_param.get_stream());
             auto dims_shape = ov::Shape{dims.size()};
             input_shapes.push_back(dims_shape);
             output_shapes = ov::op::v1::shape_infer(
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
index 66c87f99816576..4ddd96d6a4c51f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -1021,6 +1021,9 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
                     auto& data = node->get_dependency(fused_prim.dep_start_idx);
                     auto data_layout = data.get_output_layout();
 
+                    if (fc_layout.is_dynamic() || data_layout.is_dynamic())
+                        continue;
+
                     if ((fc_layout.batch() == 1 || fc_layout.feature() == 1) ||
                         (data_layout.batch() == 1 && data_layout.feature() == 1) ||
                         (fc_layout.count() == data_layout.count())) {
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp
index bd481afb8470fe..4098ea46066832 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp
@@ -156,7 +156,7 @@ struct resample_impl : typed_primitive_impl_ocl<resample> {
         bool scales_calc_mod = primitive->shape_calc_mode == resample::InterpolateOp::ShapeCalcMode::SCALES;
         if (scales_calc_mod && impl_param.input_layouts.size() > 1 && scales.empty()) {
             auto mem = impl_param.memory_deps.at(2);
-            scales = read_vector<float>(mem, impl_param.prog->get_stream());
+            scales = read_vector<float>(mem, impl_param.get_stream());
         }
 
         for (size_t i = 0; i < scales.size(); ++i) {
diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
index ba347ae069d0bd..481c86fc4485fb 100644
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@@ -144,6 +144,7 @@ class primitive_inst {
     void check_memory_to_set(const memory& mem, const layout& layout) const;
     const std::list<const cldnn::program_node *>& get_users() const { return _node->get_users(); }
 
+    const kernel_impl_params* get_impl_params() const { return _impl_params.get(); }
     // return pointer to const to prevent arbitrary 'execute' call -> use primitive_inst.execute() instead
     const primitive_impl* get_impl() const { return _impl.get(); }
 
diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h
index 4d353c270706ef..87730de4eba943 100644
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@@ -111,8 +111,8 @@ struct program_node {
     }
 
     virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const {
-        auto params = std::unique_ptr<kernel_impl_params>(new kernel_impl_params(get_program(), get_primitive(), get_unique_id(), in_layouts, out_layouts,
-                                                                                 get_fused_primitives()));
+        auto params = std::unique_ptr<kernel_impl_params>(new kernel_impl_params(get_program(), get_program().get_stream_ptr(), get_primitive(),
+                                                                                 get_unique_id(), in_layouts, out_layouts, get_fused_primitives()));
         params->memory_deps = get_const_memory_deps();
 
         auto deps = get_dependencies();
diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
index 029fddf3fcc074..a250a200de6afc 100644
--- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
@@ -42,7 +42,7 @@ std::vector<layout> non_max_suppression_inst::calc_output_layouts(non_max_suppre
     if (memory_deps.count(2)) {
         auto max_output_boxes_per_class_mem = memory_deps.at(2);
         cldnn::mem_lock<uint8_t, mem_lock_type::read> max_output_boxes_per_class_lock(max_output_boxes_per_class_mem,
-                                                                                      impl_param.prog->get_stream());
+                                                                                      impl_param.get_stream());
         auto max_output_boxes_per_class_tensor = make_host_tensor(max_output_boxes_per_class_mem->get_layout(),
                                                                   max_output_boxes_per_class_lock.data());
         const_data.emplace(2, max_output_boxes_per_class_tensor);
diff --git a/src/plugins/intel_gpu/src/graph/non_zero.cpp b/src/plugins/intel_gpu/src/graph/non_zero.cpp
index db6489fc3cd82a..c181835ab9e2bb 100644
--- a/src/plugins/intel_gpu/src/graph/non_zero.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_zero.cpp
@@ -57,7 +57,7 @@ layout gather_nonzero_inst::calc_output_layout(gather_nonzero_node const& node,
     assert(static_cast<bool>(node.get_primitive()->output_data_types[0]) == false &&
            "Output data type forcing is not supported for gather_nonzero_node!");
     if (impl_param.memory_deps.count(1)) {
-        auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.prog->get_stream());
+        auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream());
         ov::Shape output_shape(out_size.begin(), out_size.end());
         ov::PartialShape output_pshape(output_shape);
         return layout{output_pshape, cldnn::data_types::i32, cldnn::format::bfyx};
@@ -72,7 +72,7 @@ std::vector<layout> gather_nonzero_inst::calc_output_layouts(gather_nonzero_node
     assert(static_cast<bool>(desc->output_data_types[0]) == false &&
            "Output data type forcing is not supported for gather_nonzero_node!");
     if (impl_param.memory_deps.count(1)) {
-        auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.prog->get_stream());
+        auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream());
         // output shape of nonzero is [input_rank, count_non_zero]
         auto rank = static_cast<size_t>(impl_param.get_input_layout(0).get<ShapeType>().rank().get_length());
         auto count = static_cast<size_t>(out_size[0]);
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 85e82982725d94..ff77999718e9d2 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -633,7 +633,7 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
             _shape_info_memory = _network.get_engine().allocate_memory(layout{{shape_elements}, data_types::i32, format::bfyx});
         }
     }
-
+    _impl_params->strm = _network.get_stream_ptr();
     if (_outputs[0])
         max_output_layout_size = _outputs[0]->get_layout().get_tensor().count();
 }
diff --git a/src/plugins/intel_gpu/src/graph/range.cpp b/src/plugins/intel_gpu/src/graph/range.cpp
index 62db0604ee8f32..4b67a9ff962185 100644
--- a/src/plugins/intel_gpu/src/graph/range.cpp
+++ b/src/plugins/intel_gpu/src/graph/range.cpp
@@ -39,15 +39,15 @@ std::vector<layout> range_inst::calc_output_layouts(range_node const& /*node*/,
 
     if (memory_deps.count(0) > 0 && memory_deps.count(1) > 0 && memory_deps.count(2) > 0) {
         auto start_mem = memory_deps.at(0);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> start_mem_lock(start_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> start_mem_lock(start_mem, impl_param.get_stream());
         const_data.emplace(0, make_host_tensor(start_mem->get_layout(), start_mem_lock.data()));
 
         auto stop_mem = memory_deps.at(1);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> stop_mem_lock(stop_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> stop_mem_lock(stop_mem, impl_param.get_stream());
         const_data.emplace(1, make_host_tensor(stop_mem->get_layout(), stop_mem_lock.data()));
 
         auto step_mem = memory_deps.at(2);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> step_mem_lock(step_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> step_mem_lock(step_mem, impl_param.get_stream());
         const_data.emplace(2, make_host_tensor(step_mem->get_layout(), step_mem_lock.data()));
 
         shape_infer(&op, input_shapes, output_shapes, const_data);
diff --git a/src/plugins/intel_gpu/src/graph/resample.cpp b/src/plugins/intel_gpu/src/graph/resample.cpp
index f3b02e438c94fe..2a95c856d9c13a 100644
--- a/src/plugins/intel_gpu/src/graph/resample.cpp
+++ b/src/plugins/intel_gpu/src/graph/resample.cpp
@@ -84,7 +84,7 @@ std::vector<layout> resample_inst::calc_output_layouts(resample_node const& /*no
             ov::op::v4::shape_infer(&op, pads_begin, pads_end, input_shapes, output_shapes, {const_data});
         } else {
             auto sizes_mem = memory_deps.at(1);
-            cldnn::mem_lock<uint8_t, mem_lock_type::read> lock(sizes_mem, impl_param.prog->get_stream());
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> lock(sizes_mem, impl_param.get_stream());
             auto sizes_tensor = make_host_tensor(sizes_mem->get_layout(), lock.data());
             const_data.emplace(1, sizes_tensor);
             ov::op::v4::shape_infer(&op, pads_begin, pads_end, input_shapes, output_shapes, {const_data});
@@ -96,7 +96,7 @@ std::vector<layout> resample_inst::calc_output_layouts(resample_node const& /*no
             ov::op::v4::shape_infer(&op, pads_begin, pads_end, input_shapes, output_shapes, {const_data});
         } else {
             auto scales_mem = memory_deps.at(2);
-            cldnn::mem_lock<uint8_t, mem_lock_type::read> lock(scales_mem, impl_param.prog->get_stream());
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> lock(scales_mem, impl_param.get_stream());
             auto scales_tensor = make_host_tensor(scales_mem->get_layout(), lock.data());
             const_data.emplace(2, scales_tensor);
             ov::op::v4::shape_infer(&op, pads_begin, pads_end, input_shapes, output_shapes, {const_data});
diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
index 7cabb6ab2d9af9..b32ec9e0683286 100644
--- a/src/plugins/intel_gpu/src/graph/reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -111,7 +111,7 @@ std::vector<layout> reshape_inst::calc_output_layouts(reshape_node const& /*node
     if (memory_deps.count(1) > 0) {
         auto pattern_mem = memory_deps.at(1);
 
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> pattern_lock(pattern_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> pattern_lock(pattern_mem, impl_param.get_stream());
 
         auto pattern_ptr = pattern_lock.data();
         auto pattern_tensor = make_host_tensor(pattern_mem->get_layout(), pattern_ptr);
diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
index 2942e9624dee13..5c9a0f50252d2b 100644
--- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
@@ -83,9 +83,9 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
         auto end_mem = constant_mem.at(2);
         auto strides_mem = constant_mem.at(3);
 
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock1(begin_mem, impl_param.prog->get_stream());
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock2(end_mem, impl_param.prog->get_stream());
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock3(strides_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock1(begin_mem, impl_param.get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock2(end_mem, impl_param.get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock3(strides_mem, impl_param.get_stream());
 
         auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock1.data());
         auto end_tensor = make_host_tensor(end_mem->get_layout(), lock2.data());
diff --git a/src/plugins/intel_gpu/src/graph/tile.cpp b/src/plugins/intel_gpu/src/graph/tile.cpp
index 85aa8685a4980e..584ca0d197a737 100644
--- a/src/plugins/intel_gpu/src/graph/tile.cpp
+++ b/src/plugins/intel_gpu/src/graph/tile.cpp
@@ -54,7 +54,7 @@ std::vector<layout> tile_inst::calc_output_layouts(tile_node const& /*node*/, co
     if (desc->input_size() == 2) {
         if (constant_mem.count(1)) {
             auto repeats_mem = constant_mem.at(1);
-            cldnn::mem_lock<uint8_t, mem_lock_type::read> repeats_lock(repeats_mem, impl_param.prog->get_stream());
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> repeats_lock(repeats_mem, impl_param.get_stream());
             const auto& layout = repeats_mem->get_layout();
             const auto repeats_tensor =
                 ov::Tensor(data_type_to_element_type(layout.data_type), layout.get_shape(), repeats_lock.data());
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index 3abdc8fadd10c4..f29813b56e3cb4 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -78,7 +78,7 @@ class FullyConnectedFusingTest : public ::BaseFusingTest<fully_connected_test_pa
 #ifdef ENABLE_ONEDNN_FOR_GPU
 class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_test_params> {
 public:
-    void execute(fully_connected_test_params& p, bool is_caching_test = false) {
+    void execute(fully_connected_test_params& p, bool is_caching_test = false, bool is_dynamic = false) {
         // Onednn post operation has issue in a machine that does not support imad.
         if (!engine.get_device_info().supports_immad)
             return;
@@ -103,6 +103,8 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
             ov::intel_gpu::ImplementationDesc fc_ocl_impl = { ocl_forcing_format, p.ocl_kernel_name /*fully_connected_gpu_bfyx_ref*/};
             cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_ocl_impl } }));
         }
+        cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
+        cfg_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
         network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(), is_caching_test);
         network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(), is_caching_test);
         network_fused->set_input_data("input", input_prim);
@@ -498,6 +500,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_add, ::testing::ValuesIn(s
     fully_connected_test_params{ CASE_FC_FP16_3D_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
 }));
 
+class fc_fp16_eltwise_add_dynamic : public FullyConnectedFusingTestOneDNN {};
+TEST_P(fc_fp16_eltwise_add_dynamic, basic) {
+    auto p = GetParam();
+    auto test_input_layout = get_input_layout(p);
+    auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()), test_input_layout.data_type, test_input_layout.format};
+    create_topologies(
+        input_layout("input", dynamic_input_layout),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-2f;
+    execute(p, false, true);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_add_dynamic, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
+    fully_connected_test_params{ CASE_FC_FP16_3, 2, 3, "fully_connected_gpu_bfyx_ref" },
+    fully_connected_test_params{ CASE_FC_FP16_4, 2, 3, "fully_connected_gpu_bfyx_ref" },
+}));
+
 class fc_fp16_eltwise_sub : public FullyConnectedFusingTestOneDNN {};
 TEST_P(fc_fp16_eltwise_sub, basic) {
     auto p = GetParam();
diff --git a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
index 06b0eb081198c8..b6374f385400f8 100644
--- a/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/multiple_streams_gpu_test.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <iostream>
 
+#include "primitive_inst.h"
+
 using namespace cldnn;
 using namespace ::tests;
 
@@ -40,14 +42,20 @@ TEST(multistream_gpu, basic) {
     topology.add(shape_of("shape_of", input_info("fc"), 3, data_types::i32));
 
     auto prog_ptr = program::build_program(engine, topology, config);
+    auto &node = prog_ptr->get_node("shape_of");
+    auto strm = node.get_kernel_impl_params()->get_stream_ptr();
+    ASSERT_EQ(prog_ptr->get_stream_ptr(), strm);
+
     std::vector<network::ptr> networks;
+    std::vector<stream::ptr> streams;
     for (size_t i = 0; i < num_streams; i++) {
         networks.push_back(network::allocate_network(engine, prog_ptr));
+        streams.push_back(networks[i]->get_stream_ptr());
     }
 
     std::vector<InferenceEngine::Task> tasks;
     for (size_t i = 0; i < num_streams; i++) {
-        tasks.push_back([&networks, i, &engine] {
+        tasks.push_back([&networks, &streams, i, &engine] {
             auto cfg = get_test_default_config(engine);
             auto stream = engine.create_stream(cfg);
             auto net = networks[i];
@@ -61,6 +69,10 @@ TEST(multistream_gpu, basic) {
 
                 auto outputs = net->execute();
 
+                auto inst = net->get_primitive("shape_of");
+                auto strm = inst->get_impl_params()->get_stream_ptr();
+                ASSERT_EQ(streams[i], strm);
+
                 auto output = outputs.at("shape_of").get_memory();
                 cldnn::mem_lock<int32_t> output_ptr(output, *stream);
 

From fef04e468a8ae7352a1a8d34e0afc3f9364eb125 Mon Sep 17 00:00:00 2001
From: Sungeun Kim <sungeun.kim@intel.com>
Date: Wed, 5 Apr 2023 21:32:42 +0900
Subject: [PATCH 249/296] [GPU] add WA to avoid hang issue. (#16724)

---
 .../include/intel_gpu/plugin/common_utils.hpp         | 11 +++++++++++
 .../src/graph/impls/onednn/primitive_onednn_base.h    | 11 ++++++++++-
 src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp  |  5 +++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
index 4c7dacb37652f2..25d7b8308a28c7 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
@@ -156,5 +156,16 @@ inline InferenceEngine::Layout InferenceEngineLayoutFromOVLayout(ov::Layout l) {
     IE_THROW() << "The plugin does not support " << l.to_string() << " layout";
 }
 
+/// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
+inline void ForceExit() {
+    std::cerr << "[GPU] force exit.\n"
+              << "\tDue to the driver bug any subsequent OpenCL API call will cause application hang, "
+              << "so GPU plugin can't finish correctly.\n"
+              << "\tPlease try to update the driver or reduce memory consumption "
+              << "(use smaller batch size, less streams, lower precision, etc)"
+              << "to avoid CL_OUT_OF_RESOURCES exception" << std::endl;
+    std::_Exit(-1);
+}
+
 }  // namespace intel_gpu
 }  // namespace ov
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 9672398647ad45..c06594580cfab9 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -8,6 +8,7 @@
 
 #include "primitive_inst.h"
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
 #include "intel_gpu/runtime/memory.hpp"
 #include "to_string_utils.h"
 #include "register.hpp"
@@ -488,7 +489,15 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
         }
 
         if (!instance.can_be_optimized()) {
-            _prim.execute(stream.get_onednn_stream(), _args[net_id]);
+            try {
+                _prim.execute(stream.get_onednn_stream(), _args[net_id]);
+            } catch (dnnl::error& err) {
+                /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
+                if (err.status == dnnl_status_t::dnnl_out_of_memory) {
+                    ov::intel_gpu::ForceExit();
+                }
+                throw;    // rethrowing dnnl::error if not out_of_memory
+            }
         }
 
         if (_enable_profiling) {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index 6e7a6e08c622aa..7bdc10d4121cfc 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -6,6 +6,7 @@
 #include "ocl_event.hpp"
 #include "ocl_user_event.hpp"
 #include "ocl_command_queues_builder.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "ocl_kernel.hpp"
 #include "ocl_common.hpp"
@@ -304,6 +305,10 @@ event::ptr ocl_stream::enqueue_kernel(kernel& kernel,
     try {
         _command_queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local, dep_events_ptr, set_output_event ? &ret_ev : nullptr);
     } catch (cl::Error const& err) {
+        /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
+        if (err.err() == CL_OUT_OF_RESOURCES) {
+            ov::intel_gpu::ForceExit();
+        }
         throw ocl_error(err);
     }
 

From 7442a17240bb1d3b9146527ce215919333c587d9 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Wed, 5 Apr 2023 18:22:11 +0400
Subject: [PATCH 250/296] Add Core property to switch from `mmap` to `read` in
 IR Frontend (#16600)

* Add Core property to switch from `mmap` to `read`
in IR FrontEnd

* Add tests on `ov::enable_mmap` property

* Add `enable_mmap` in C & Py APIs

* ClangFormat
---
 .../c/include/openvino/c/ov_property.h        | 14 +++++
 src/bindings/c/src/ov_property.cpp            |  2 +
 src/bindings/c/tests/ov_core_test.cpp         | 63 +++++++------------
 .../pyopenvino/core/properties/properties.cpp |  1 +
 .../tests/test_runtime/test_properties.py     |  3 +-
 src/frontends/ir/src/frontend.cpp             |  5 +-
 .../ir/tests/frontend_test_basic.cpp          | 17 ++++-
 .../include/openvino/runtime/properties.hpp   | 11 ++++
 src/inference/src/dev/core_impl.cpp           | 14 +++++
 src/inference/src/dev/core_impl.hpp           |  3 +
 src/inference/src/dev/core_impl_ie.cpp        |  7 ++-
 src/inference/src/ie_network_reader.cpp       |  5 +-
 src/inference/src/ie_network_reader.hpp       |  4 +-
 .../behavior/ov_plugin/core_integration.hpp   | 16 ++++-
 .../behavior/ov_plugin/properties_tests.cpp   |  3 +
 15 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h
index 1b8a580c42a116..0f99d2870f620a 100644
--- a/src/bindings/c/include/openvino/c/ov_property.h
+++ b/src/bindings/c/include/openvino/c/ov_property.h
@@ -206,3 +206,17 @@ ov_property_key_device_priorities;
  */
 OPENVINO_C_VAR(const char*)
 ov_property_key_hint_execution_mode;
+
+/**
+ * @brief Read-write property to set whether force terminate tbb when ov core destruction
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_force_tbb_terminate;
+
+/**
+ * @brief Read-write property to configure `mmap()` use for model read
+ * @ingroup ov_property_c_api
+ */
+OPENVINO_C_VAR(const char*)
+ov_property_key_enable_mmap;
diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp
index a7ea7697856590..78b3bfe4576ff9 100644
--- a/src/bindings/c/src/ov_property.cpp
+++ b/src/bindings/c/src/ov_property.cpp
@@ -33,3 +33,5 @@ const char* ov_property_key_log_level = "LOG_LEVEL";
 const char* ov_property_key_enable_profiling = "PERF_COUNT";
 const char* ov_property_key_device_priorities = "MULTI_DEVICE_PRIORITIES";
 const char* ov_property_key_hint_execution_mode = "EXECUTION_MODE_HINT";
+const char* ov_property_key_force_tbb_terminate = "FORCE_TBB_TERMINATE";
+const char* ov_property_key_enable_mmap = "ENABLE_MMAP";
diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp
index 1a4c7045f927c6..621f35dcf969e1 100644
--- a/src/bindings/c/tests/ov_core_test.cpp
+++ b/src/bindings/c/tests/ov_core_test.cpp
@@ -312,55 +312,38 @@ TEST_P(ov_core_test, ov_core_set_and_get_property_enum) {
     OV_EXPECT_OK(ov_core_create(&core));
     EXPECT_NE(nullptr, core);
 
-    const char* key = ov_property_key_hint_performance_mode;
-    const char* affinity = "LATENCY";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, affinity));
-    char* ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &ret));
-    EXPECT_STREQ(affinity, ret);
-    ov_free(ret);
-
-    const char* key_pin = ov_property_key_hint_enable_cpu_pinning;
-    const char* val_pin = "YES";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_pin, val_pin));
-    ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_pin, &ret));
-    EXPECT_STREQ(val_pin, ret);
-    ov_free(ret);
-
-    const char* key_type = ov_property_key_hint_scheduling_core_type;
-    const char* val_type = "PCORE_ONLY";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
-    ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
-    EXPECT_STREQ(val_type, ret);
-    ov_free(ret);
-
-    const char* key_ht = ov_property_key_hint_enable_hyper_threading;
-    const char* val_ht = "YES";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_ht, val_ht));
-    ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_ht, &ret));
-    EXPECT_STREQ(val_ht, ret);
-    ov_free(ret);
+    std::map<const char*, const char*> properties = {{ov_property_key_hint_performance_mode, "LATENCY"},
+                                                     {ov_property_key_hint_scheduling_core_type, "PCORE_ONLY"},
+                                                     {ov_property_key_hint_enable_hyper_threading, "YES"},
+                                                     {ov_property_key_enable_profiling, "YES"}};
+
+    for (const auto& property : properties) {
+        OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), property.first, property.second));
+        char* ret = nullptr;
+        OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), property.first, &ret));
+        EXPECT_STREQ(property.second, ret);
+        ov_free(ret);
+    }
 
     ov_core_free(core);
 }
 
-TEST_P(ov_core_test, ov_core_set_and_get_property_bool) {
-    auto device_name = GetParam();
+TEST_F(ov_core_test, ov_core_set_and_get_property_no_device) {
     ov_core_t* core = nullptr;
     OV_EXPECT_OK(ov_core_create(&core));
     EXPECT_NE(nullptr, core);
 
-    const char* key = ov_property_key_enable_profiling;
-    const char* enable = "YES";
-    OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, enable));
+    std::map<const char*, const char*> properties = {{ov_property_key_force_tbb_terminate, "YES"},
+                                                     {ov_property_key_enable_mmap, "NO"}};
+
+    for (const auto& property : properties) {
+        OV_EXPECT_OK(ov_core_set_property(core, "", property.first, property.second));
+        char* ret = nullptr;
+        OV_EXPECT_OK(ov_core_get_property(core, "", property.first, &ret));
+        EXPECT_STREQ(property.second, ret);
+        ov_free(ret);
+    }
 
-    char* ret = nullptr;
-    OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &ret));
-    EXPECT_STREQ(enable, ret);
-    ov_free(ret);
     ov_core_free(core);
 }
 
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index 01471f27218e45..96149c7df6d5e3 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -30,6 +30,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_properties, ov::compilation_num_threads, "compilation_num_threads");
     wrap_property_RW(m_properties, ov::affinity, "affinity");
     wrap_property_RW(m_properties, ov::force_tbb_terminate, "force_tbb_terminate");
+    wrap_property_RW(m_properties, ov::enable_mmap, "enable_mmap");
 
     wrap_property_RO(m_properties, ov::supported_properties, "supported_properties");
     wrap_property_RO(m_properties, ov::available_devices, "available_devices");
diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
index ea7dd3ba6dbddc..f21a204ae8dd93 100644
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -213,7 +213,8 @@ def test_properties_ro(ov_property_ro, expected_value):
             "AFFINITY",
             ((properties.Affinity.NONE, properties.Affinity.NONE),),
         ),
-        (properties.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True),)),
+        (properties.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True), (False, False))),
+        (properties.enable_mmap, "ENABLE_MMAP", ((True, True), (False, False))),
         (properties.hint.inference_precision, "INFERENCE_PRECISION_HINT", ((Type.f32, Type.f32),)),
         (
             properties.hint.model_priority,
diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp
index 4dcbb931e6d355..0215eee96b0a33 100644
--- a/src/frontends/ir/src/frontend.cpp
+++ b/src/frontends/ir/src/frontend.cpp
@@ -183,8 +183,7 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
             weights = variant.as<std::shared_ptr<ngraph::runtime::AlignedBuffer>>();
         }
     }
-    bool use_map_allocator =
-        variants[variants.size() - 1].is<bool>() ? variants[variants.size() - 1].as<bool>() : false;
+    bool enable_mmap = variants[variants.size() - 1].is<bool>() ? variants[variants.size() - 1].as<bool>() : false;
 
     // Find weights if only path to xml was provided
     if (weights_path.empty()) {
@@ -202,7 +201,7 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
         }
     }
     if (!weights_path.empty()) {
-        if (use_map_allocator)
+        if (enable_mmap)
             weights = ov::load_mmap_object(weights_path);
         else {
             std::ifstream bin_stream;
diff --git a/src/frontends/ir/tests/frontend_test_basic.cpp b/src/frontends/ir/tests/frontend_test_basic.cpp
index 78d0099bed1461..29f412e9b21d6f 100644
--- a/src/frontends/ir/tests/frontend_test_basic.cpp
+++ b/src/frontends/ir/tests/frontend_test_basic.cpp
@@ -16,6 +16,15 @@ class IRFrontendTests : public ::testing::Test, public IRFrontendTestsImpl {
     }
 };
 
+class IRFrontendMMapTests : public ::testing::TestWithParam<bool>, public IRFrontendTestsImpl {
+protected:
+    void SetUp() override {}
+
+    void TearDown() override {
+        RemoveTemporalFiles();
+    }
+};
+
 TEST_F(IRFrontendTests, elementary_model_reading_v11) {
     std::string testModelV11 = R"V0G0N(
 <net name="Network" version="11">
@@ -241,7 +250,7 @@ TEST_F(IRFrontendTests, model_with_missing_weights) {
     ASSERT_THROW(core.read_model(testModelV11, ov::Tensor()), ov::Exception);
 }
 
-TEST_F(IRFrontendTests, model_with_weights_reading_from_disk) {
+TEST_P(IRFrontendMMapTests, model_with_weights_reading_from_disk) {
     std::string xmlModel = R"V0G0N(
 <?xml version="1.0" ?>
 <net name="Network" version="11">
@@ -316,7 +325,9 @@ TEST_F(IRFrontendTests, model_with_weights_reading_from_disk) {
 
     std::shared_ptr<ov::Model> model;
 
-    ASSERT_NO_THROW(model = core.read_model(xmlFileName, binFileName));
+    ov::Core new_core;
+    new_core.set_property(ov::enable_mmap(GetParam()));
+    ASSERT_NO_THROW(model = new_core.read_model(xmlFileName, binFileName));
     ASSERT_TRUE(!!model);
 
     std::shared_ptr<ov::Model> modelRef;
@@ -343,6 +354,8 @@ TEST_F(IRFrontendTests, model_with_weights_reading_from_disk) {
     EXPECT_TRUE(res.valid) << res.message;
 }
 
+INSTANTIATE_TEST_SUITE_P(EnableMMapPropery, IRFrontendMMapTests, ::testing::Bool());
+
 TEST_F(IRFrontendTests, model_without_weights_reading_from_disk) {
     std::string xmlModel = R"V0G0N(
 <?xml version="1.0" ?>
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 1dbf47db03ec2a..b2b81b51c2465c 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -678,6 +678,17 @@ static constexpr Property<std::tuple<unsigned int, unsigned int, unsigned int>,
  */
 static constexpr Property<bool, PropertyMutability::RW> force_tbb_terminate{"FORCE_TBB_TERMINATE"};
 
+/**
+ * @brief Read-write property to configure `mmap()` use for model read. Enabled by default.
+ * For the moment only IR Frontend supports the property.
+ *
+ * value type: boolean
+ *   - True enable `mmap()` use and map model
+ *   - False disable `mmap()` use and read model
+ * @ingroup ov_runtime_cpp_prop_api
+ */
+static constexpr Property<bool, PropertyMutability::RW> enable_mmap{"ENABLE_MMAP"};
+
 /**
  * @brief Namespace with device properties
  */
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 0be7703087ba3a..1f1a88c39e4837 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -924,6 +924,9 @@ ov::Any ov::CoreImpl::get_property_for_core(const std::string& name) const {
     } else if (name == ov::hint::allow_auto_batching.name()) {
         const auto flag = coreConfig.get_allow_auto_batch();
         return decltype(ov::hint::allow_auto_batching)::value_type(flag);
+    } else if (name == ov::enable_mmap.name()) {
+        const auto flag = coreConfig.get_enable_mmap();
+        return decltype(ov::enable_mmap)::value_type(flag);
     }
 
     OPENVINO_THROW("Exception is thrown while trying to call get_property with unsupported property: '", name, "'");
@@ -1298,6 +1301,13 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) {
         _flag_allow_auto_batching = flag;
         config.erase(it);
     }
+
+    it = config.find(ov::enable_mmap.name());
+    if (it != config.end()) {
+        auto flag = it->second.as<bool>();
+        _flag_enable_mmap = flag;
+        config.erase(it);
+    }
 }
 
 void ov::CoreImpl::CoreConfig::set_cache_dir_for_device(const std::string& dir, const std::string& name) {
@@ -1314,6 +1324,10 @@ bool ov::CoreImpl::CoreConfig::get_allow_auto_batch() const {
     return _flag_allow_auto_batching;
 }
 
+bool ov::CoreImpl::CoreConfig::get_enable_mmap() const {
+    return _flag_enable_mmap;
+}
+
 // Creating thread-safe copy of config including shared_ptr to ICacheManager
 // Passing empty or not-existing name will return global cache config
 ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::get_cache_config_for_device(
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 179b59cc671c26..4f8ab8e38dda1b 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -85,6 +85,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
 
         bool get_allow_auto_batch() const;
 
+        bool get_enable_mmap() const;
+
         // Creating thread-safe copy of config including shared_ptr to ICacheManager
         // Passing empty or not-existing name will return global cache config
         CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const;
@@ -94,6 +96,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
         CacheConfig _cacheConfig;
         std::map<std::string, CacheConfig> _cacheConfigPerDevice;
         bool _flag_allow_auto_batching = true;
+        bool _flag_enable_mmap = true;
     };
 
     struct CacheContent {
diff --git a/src/inference/src/dev/core_impl_ie.cpp b/src/inference/src/dev/core_impl_ie.cpp
index 126416517974b9..1103fb274d07f2 100644
--- a/src/inference/src/dev/core_impl_ie.cpp
+++ b/src/inference/src/dev/core_impl_ie.cpp
@@ -49,7 +49,12 @@ InferenceEngine::RemoteContext::Ptr ov::CoreImpl::GetDefaultContext(const std::s
 
 InferenceEngine::CNNNetwork ov::CoreImpl::ReadNetwork(const std::string& modelPath, const std::string& binPath) const {
     OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from file");
-    return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions, ov_extensions, is_new_api());
+    return InferenceEngine::details::ReadNetwork(modelPath,
+                                                 binPath,
+                                                 extensions,
+                                                 ov_extensions,
+                                                 is_new_api(),
+                                                 coreConfig.get_enable_mmap());
 }
 
 InferenceEngine::CNNNetwork ov::CoreImpl::ReadNetwork(const std::string& model,
diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp
index 3a219f74382552..09b39ccaf0dd98 100644
--- a/src/inference/src/ie_network_reader.cpp
+++ b/src/inference/src/ie_network_reader.cpp
@@ -418,7 +418,8 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath,
                                 const std::string& binPath,
                                 const std::vector<IExtensionPtr>& exts,
                                 const std::vector<ov::Extension::Ptr>& ov_exts,
-                                bool newAPI) {
+                                bool newAPI,
+                                bool enable_mmap) {
 #ifdef ENABLE_IR_V7_READER
     // IR v7 obsolete code
     {
@@ -457,7 +458,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath,
 #endif
         params.emplace_back(weights_path);
     }
-    params.emplace_back(/*use_ir_frontend_map_allocator=*/true);
+    params.emplace_back(enable_mmap);
 
     FE = manager.load_by_model(params);
     if (FE) {
diff --git a/src/inference/src/ie_network_reader.hpp b/src/inference/src/ie_network_reader.hpp
index 9b2aec6a4a240e..bdf3c5e7d8c72f 100644
--- a/src/inference/src/ie_network_reader.hpp
+++ b/src/inference/src/ie_network_reader.hpp
@@ -22,13 +22,15 @@ namespace details {
  * @param exts vector with extensions
  * @param ov_exts vector with OpenVINO extensions
  * @param newAPI Whether this function is called from OpenVINO 2.0 API
+ * @param enable_mmap boolean to enable/disable `mmap` use in Frontend
  * @return CNNNetwork
  */
 CNNNetwork ReadNetwork(const std::string& modelPath,
                        const std::string& binPath,
                        const std::vector<IExtensionPtr>& exts,
                        const std::vector<ov::Extension::Ptr>& ov_exts,
-                       bool newAPI);
+                       bool newAPI,
+                       bool enable_mmap);
 /**
  * @brief Reads IR xml and bin (with the same name) files
  * @param model string with IR
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
index cb63a7929a285f..ac89c46dd388d6 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@@ -689,10 +689,22 @@ TEST(OVClassBasicTest, SetTBBForceTerminatePropertyCoreNoThrow) {
     bool value = true;
     OV_ASSERT_NO_THROW(ie.set_property(ov::force_tbb_terminate(false)));
     OV_ASSERT_NO_THROW(value = ie.get_property(ov::force_tbb_terminate.name()).as<bool>());
-    EXPECT_EQ(value, false);
+    EXPECT_FALSE(value);
     OV_ASSERT_NO_THROW(ie.set_property(ov::force_tbb_terminate(true)));
     OV_ASSERT_NO_THROW(value = ie.get_property(ov::force_tbb_terminate.name()).as<bool>());
-    EXPECT_EQ(value, true);
+    EXPECT_TRUE(value);
+}
+
+TEST(OVClassBasicTest, SetEnableMmapPropertyCoreNoThrow) {
+    ov::Core ie;
+
+    bool value = true;
+    OV_ASSERT_NO_THROW(ie.set_property(ov::enable_mmap(false)));
+    OV_ASSERT_NO_THROW(value = ie.get_property(ov::enable_mmap.name()).as<bool>());
+    EXPECT_FALSE(value);
+    OV_ASSERT_NO_THROW(ie.set_property(ov::enable_mmap(true)));
+    OV_ASSERT_NO_THROW(value = ie.get_property(ov::enable_mmap.name()).as<bool>());
+    EXPECT_TRUE(value);
 }
 
 TEST(OVClassBasicTest, GetUnsupportedPropertyCoreThrow) {
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
index c8fc23815f22f4..2bf10e591740c4 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp
@@ -338,6 +338,9 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithComplieModelProps::getPropertiesVal
     res.push_back({{ov::PropertyName(ov::force_tbb_terminate.name(), ov::force_tbb_terminate.mutability), true}});
     res.push_back({{ov::PropertyName(ov::force_tbb_terminate.name(), ov::force_tbb_terminate.mutability), false}});
 
+    res.push_back({{ov::PropertyName(ov::enable_mmap.name(), ov::enable_mmap.mutability), true}});
+    res.push_back({{ov::PropertyName(ov::enable_mmap.name(), ov::enable_mmap.mutability), false}});
+
     ov::streams::Num nums[] = {ov::streams::AUTO, ov::streams::NUMA};
     for (auto &num : nums) {
         res.push_back({{ov::PropertyName(ov::streams::num.name(), ov::streams::num.mutability), num}});

From 8c2766c4bcafb2da5fb671b41839702dcaa084b1 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Wed, 5 Apr 2023 16:49:43 +0200
Subject: [PATCH 251/296] DOCS shift to rst - Converting TensorFlow YOLO Models
 (#16735)

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>
---
 .../Convert_YOLO_From_Tensorflow.md           | 443 ++++++++++--------
 1 file changed, 254 insertions(+), 189 deletions(-)

diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
index 648c15df7fa104..8e2c661950dc88 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
@@ -1,8 +1,11 @@
 # Converting TensorFlow YOLO Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow}
 
+@sphinxdirective
+
 This document explains how to convert real-time object detection YOLOv1, YOLOv2, YOLOv3 and YOLOv4 public models to the Intermediate Representation (IR). All YOLO models are originally implemented in the DarkNet framework and consist of two files:
-* The `.cfg` file with model configurations
-* The `.weights` file with model weights
+
+* The ``.cfg`` file with model configurations
+* The ``.weights`` file with model weights
 
 Depending on a YOLO model version, the Model Optimizer converts it differently:
 
@@ -10,232 +13,294 @@ Depending on a YOLO model version, the Model Optimizer converts it differently:
 - YOLOv3 has several implementations. This tutorial uses a TensorFlow implementation of YOLOv3 model, which can be directly converted to an IR.
 - YOLOv1 and YOLOv2 models must be first converted to TensorFlow using DarkFlow.
 
-## <a name="yolov4-to-ir"></a>Converting a YOLOv4 Model to IR
+Converting a YOLOv4 Model to IR
+###############################
 
-This section explains how to convert the YOLOv4 Keras model from the [repository](https://github.com/david8862/keras-YOLOv3-model-set) to an IR. To convert the YOLOv4 model, follow the instructions below:
+This section explains how to convert the YOLOv4 Keras model from the `repository <https://github.com/david8862/keras-YOLOv3-model-set>`__ to an IR. To convert the YOLOv4 model, follow the instructions below:
 
 1. Download YOLOv4 weights and associated with it cfg file:
 
-  - for YOLOv4 ([weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights)/[config file](https://github.com/david8862/keras-YOLOv3-model-set/raw/6c9aff7bb0c1660704ad07c85739e95885676e5b/cfg/yolov4.cfg))
-  - for YOLOv4-tiny ([weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)/[config file](https://raw.githubusercontent.com/david8862/keras-YOLOv3-model-set/6b4a0ee63771262363e8224b0ee915cad6c5e93e/cfg/yolov4-tiny.cfg))
+   - for YOLOv4 ( `weights <https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights>`__ / `config file <https://github.com/david8862/keras-YOLOv3-model-set/raw/6c9aff7bb0c1660704ad07c85739e95885676e5b/cfg/yolov4.cfg>`__ )
+   - for YOLOv4-tiny ( `weights <https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights>`__ / `config file <https://raw.githubusercontent.com/david8862/keras-YOLOv3-model-set/6b4a0ee63771262363e8224b0ee915cad6c5e93e/cfg/yolov4-tiny.cfg>`__ )
 
 2. Clone the repository with the YOLOv4 model:
 
-  ```sh
-  git clone https://github.com/david8862/keras-YOLOv3-model-set
-  ```
+   .. code-block:: sh
+
+      git clone https://github.com/david8862/keras-YOLOv3-model-set
+
 
 3. Convert the model to the TensorFlow 2 format:
 
-  - for YOLOv4:
+   - for YOLOv4:
+
+     .. code-block:: sh
+
+        python keras-YOLOv3-model-set/tools/model_converter/convert.py <path_to_cfg_file>/yolov4.cfg <path_to_weights>/yolov4.weights <saved_model_dir>
+
 
-  ```sh
-  python keras-YOLOv3-model-set/tools/model_converter/convert.py <path_to_cfg_file>/yolov4.cfg <path_to_weights>/yolov4.weights <saved_model_dir>
-  ```
+   - for YOLOv4-tiny:
 
-  - for YOLOv4-tiny:
+     .. code-block:: sh
+
+        python keras-YOLOv3-model-set/tools/model_converter/convert.py <path_to_cfg_file>/yolov4-tiny.cfg <path_to_weights>/yolov4-tiny.weights <saved_model_dir>
 
-  ```sh
-  python keras-YOLOv3-model-set/tools/model_converter/convert.py <path_to_cfg_file>/yolov4-tiny.cfg <path_to_weights>/yolov4-tiny.weights <saved_model_dir>
-  ```
 
 4. Run Model Optimizer to converter the model from the TensorFlow 2 format to an IR:
 
-> **NOTE**: Before you run the conversion, make sure you have installed all the Model Optimizer dependencies for TensorFlow 2.
-> ```sh
-> mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4
-> ```
+   .. note::
+
+      Before you run the conversion, make sure you have installed all the Model Optimizer dependencies for TensorFlow 2.
+
+   .. code-block:: sh
+
+      mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4
 
-## <a name="yolov3-to-ir"></a>Converting YOLOv3 Model to the OpenVINO format
+
+Converting YOLOv3 Model to the OpenVINO format
+##############################################
 
 There are several public versions of TensorFlow YOLOv3 model implementation available on GitHub. This section explains how to convert YOLOv3 model from
-the [repository](https://github.com/mystic123/tensorflow-yolo-v3) (commit ed60b90) to an IR , but the process is similar for other versions of TensorFlow YOLOv3 model.
-
-### <a name="yolov3-overview"></a>Overview of YOLOv3 Model Architecture
-Originally, YOLOv3 model includes feature extractor called `Darknet-53` with three branches at the end that make detections at three different scales. These branches must end with the YOLO `Region` layer.
-
-`Region` layer was first introduced in the DarkNet framework. Other frameworks, including TensorFlow, do not have the
-`Region` implemented as a single layer, so every author of public YOLOv3 model creates it using
-simple layers. This badly affects performance. For this reason, the main idea of YOLOv3 model conversion to IR is to cut off these
-custom `Region`-like parts of the model and complete the model with the `Region` layers where required.
-
-### Dumping a YOLOv3 TensorFlow Model
-To dump TensorFlow model out of [GitHub repository](https://github.com/mystic123/tensorflow-yolo-v3) (commit ed60b90), follow the instructions below:
-
-1. Clone the repository:<br>
-```sh
-git clone https://github.com/mystic123/tensorflow-yolo-v3.git
-cd tensorflow-yolo-v3
-```
-2. (Optional) Checkout to the commit that the conversion was tested on:<br>
-```sh
-git checkout ed60b90
-```
-3. Download [coco.names](https://github.com/AlexeyAB/darknet/blob/master/data/coco.names) file from the DarkNet website **OR** use labels that fit your task.
-4. Download the [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) (for the YOLOv3 model) or [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure.
+the `repository <https://github.com/mystic123/tensorflow-yolo-v3>`__ (commit ed60b90) to an IR , but the process is similar for other versions of TensorFlow YOLOv3 model.
+
+Overview of YOLOv3 Model Architecture
++++++++++++++++++++++++++++++++++++++
+
+Originally, YOLOv3 model includes feature extractor called ``Darknet-53`` with three branches at the end that make detections at three different scales. These branches must end with the YOLO ``Region`` layer.
+
+``Region`` layer was first introduced in the DarkNet framework. Other frameworks, including TensorFlow, do not have the ``Region`` implemented as a single layer, so every author of public YOLOv3 model creates it using simple layers. This badly affects performance. For this reason, the main idea of YOLOv3 model conversion to IR is to cut off these custom ``Region`` -like parts of the model and complete the model with the ``Region`` layers where required.
+
+Dumping a YOLOv3 TensorFlow Model
++++++++++++++++++++++++++++++++++
+
+To dump TensorFlow model out of `GitHub repository <https://github.com/mystic123/tensorflow-yolo-v3>`__ (commit ed60b90), follow the instructions below:
+
+1. Clone the repository:
+
+   .. code-block:: sh
+
+      git clone https://github.com/mystic123/tensorflow-yolo-v3.git
+      cd tensorflow-yolo-v3
+
+
+2. (Optional) Checkout to the commit that the conversion was tested on:
+
+   .. code-block:: sh
+
+      git checkout ed60b90
+
+
+3. Download `coco.names <https://github.com/AlexeyAB/darknet/blob/master/data/coco.names>`__ file from the DarkNet website **OR** use labels that fit your task.
+4. Download the `yolov3.weights <https://pjreddie.com/media/files/yolov3.weights>`__ (for the YOLOv3 model) or `yolov3-tiny.weights <https://pjreddie.com/media/files/yolov3-tiny.weights>`__ (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure.
 5. Install PIL, which is used by the conversion script in the repo:
-```sh
-pip install pillow
-```
+
+   .. code-block:: sh
+
+      pip install pillow
+
+
 6. Run a converter:
-> **NOTE**: This converter works with TensorFlow 1.x and numpy 1.19 or lower.
-
-- For YOLO-v3:
-```sh
-python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights
-```
-- For YOLOv3-tiny:
-```sh
-python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3-tiny.weights --tiny
-```
-At this step, you may receive a warning like `WARNING:tensorflow:Entity <...> could not be transformed and will be executed as-is.`. To work around this issue, switch to gast 0.2.2 with the following command:
-```sh
-pip3 install --user gast==0.2.2
-```
-
-If you have YOLOv3 weights trained for an input image with the size different from 416 (320, 608 or your own), provide the `--size` key with the size of your image specified while running the converter. For example, run the following command for an image with size 608:
-```sh
-python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3_608.weights --size 608
-```
-
-### Converting a YOLOv3 TensorFlow Model to the OpenVINO format
-
-To solve the problems explained in the <a href="#yolov3-overview">YOLOv3 architecture overview</a> section, use the `yolo_v3.json` or `yolo_v3_tiny.json` (depending on a model) configuration file with custom operations located in the `<OPENVINO_INSTALL_DIR>/tools/model_optimizer/extensions/front/tf` repository.
-
-It consists of several attributes:<br>
-```sh
-[
-  {
-    "id": "TFYOLOV3",
-    "match_kind": "general",
-    "custom_attributes": {
-      "classes": 80,
-      "anchors": [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
-      "coords": 4,
-      "num": 9,
-      "masks":[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
-      "entry_points": ["detector/yolo-v3/Reshape", "detector/yolo-v3/Reshape_4", "detector/yolo-v3/Reshape_8"]
-    }
-  }
-]
-```
+
+   .. note:: This converter works with TensorFlow 1.x and numpy 1.19 or lower.
+
+
+   - For YOLO-v3:
+
+     .. code-block:: sh
+
+        python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights
+
+
+   - For YOLOv3-tiny:
+
+     .. code-block:: sh
+
+        python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3-tiny.weights --tiny
+
+
+   At this step, you may receive a warning like ``WARNING:tensorflow:Entity <...> could not be transformed and will be executed as-is.``. To work around this issue, switch to gast 0.2.2 with the following command:
+
+   .. code-block:: sh
+
+      pip3 install --user gast==0.2.2
+
+
+If you have YOLOv3 weights trained for an input image with the size different from 416 (320, 608 or your own), provide the ``--size`` key with the size of your image specified while running the converter. For example, run the following command for an image with size 608:
+
+.. code-block:: sh
+
+   python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3_608.weights --size 608
+
+
+Converting a YOLOv3 TensorFlow Model to the OpenVINO format
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+To solve the problems explained in the `YOLOv3 architecture overview <#overview-of-yolov3-model-architecture>`__ section, use the ``yolo_v3.json`` or ``yolo_v3_tiny.json`` (depending on a model) configuration file with custom operations located in the ``<OPENVINO_INSTALL_DIR>/tools/model_optimizer/extensions/front/tf`` repository.
+
+It consists of several attributes:
+
+.. code-block:: sh
+
+   [
+     {
+       "id": "TFYOLOV3",
+       "match_kind": "general",
+       "custom_attributes": {
+         "classes": 80,
+         "anchors": [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
+         "coords": 4,
+         "num": 9,
+         "masks":[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
+         "entry_points": ["detector/yolo-v3/Reshape", "detector/yolo-v3/Reshape_4", "detector/yolo-v3/Reshape_8"]
+       }
+     }
+   ]
+
+
 where:
-- `id` and `match_kind` are parameters that you cannot change.
-- `custom_attributes` is a parameter that stores all the YOLOv3 specific attributes:
-    - `classes`, `coords`, `num`, and `masks` are attributes that you should copy from the configuration
-    file that was used for model training. If you used DarkNet officially shared weights,
-    you can use `yolov3.cfg` or `yolov3-tiny.cfg` configuration file from [GitHub repository](https://github.com/david8862/keras-YOLOv3-model-set/tree/master/cfg). Replace the default values in `custom_attributes` with the parameters that
-    follow the `[yolo]` titles in the configuration file.
-    - `anchors` is an optional parameter that is not used while inference of the model, but it used in a demo to parse `Region` layer output
-    - `entry_points` is a node name list to cut off the model and append the `Region` layer with custom attributes specified above.
-
-
-To generate an IR of the YOLOv3 TensorFlow model, run:<br>
-```sh
- mo                                                   \
---input_model /path/to/yolo_v3.pb                                  \
---transformations_config front/tf/yolo_v3.json \
---batch 1                                                          \
---output_dir <OUTPUT_MODEL_DIR>
-```
-
-To generate an IR of the YOLOv3-tiny TensorFlow model, run:<br>
-```sh
- mo                                                        \
---input_model /path/to/yolo_v3_tiny.pb                                  \
---transformations_config front/tf/yolo_v3_tiny.json \
---batch 1                                                               \
---output_dir <OUTPUT_MODEL_DIR>
-```
+
+- ``id`` and ``match_kind`` are parameters that you cannot change.
+- ``custom_attributes`` is a parameter that stores all the YOLOv3 specific attributes:
+
+  - ``classes``, ``coords``, ``num``, and ``masks`` are attributes that you should copy from the configuration file that was used for model training. If you used DarkNet officially shared weights, you can use ``yolov3.cfg`` or ``yolov3-tiny.cfg`` configuration file from `GitHub repository <https://github.com/david8862/keras-YOLOv3-model-set/tree/master/cfg>`__. eplace the default values in ``custom_attributes`` with the parameters that follow the ``[yolo]`` titles in the configuration file.
+  - ``anchors`` is an optional parameter that is not used while inference of the model, but it used in a demo to parse ``Region`` layer output
+  - ``entry_points`` is a node name list to cut off the model and append the ``Region`` layer with custom attributes specified above.
+
+
+To generate an IR of the YOLOv3 TensorFlow model, run:
+
+.. code-block:: sh
+
+   mo                                                   \
+   --input_model /path/to/yolo_v3.pb                                  \
+   --transformations_config front/tf/yolo_v3.json \
+   --batch 1                                                          \
+   --output_dir <OUTPUT_MODEL_DIR>
+
+
+To generate an IR of the YOLOv3-tiny TensorFlow model, run:
+
+.. code-block:: sh
+
+   mo                                                        \
+   --input_model /path/to/yolo_v3_tiny.pb                                  \
+   --transformations_config front/tf/yolo_v3_tiny.json \
+   --batch 1                                                               \
+   --output_dir <OUTPUT_MODEL_DIR>
+
 
 where:
 
-* `--batch` defines shape of model input. In the example, `--batch` is equal to 1, but you can also specify other integers larger than 1.
-* `--transformations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`.
+* ``--batch`` defines shape of model input. In the example, ``--batch`` is equal to 1, but you can also specify other integers larger than 1.
+* ``--transformations_config`` adds missing ``Region`` layers to the model. In the IR, the ``Region`` layer has name ``RegionYolo``.
+
+.. note::
+
+   The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
 
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the [Converting a Model to Intermediate Representation (IR)](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
 
-OpenVINO toolkit provides a demo that uses YOLOv3 model. Refer to the [Object Detection C++ Demo](@ref omz_demos_object_detection_demo_cpp) for more information.
+OpenVINO toolkit provides a demo that uses YOLOv3 model. Refer to the :doc:`Object Detection C++ Demo <omz_demos_object_detection_demo_cpp>` for more information.
 
-## Converting YOLOv1 and YOLOv2 Models to the IR
+Converting YOLOv1 and YOLOv2 Models to the IR
+#############################################
 
 Before converting, choose a YOLOv1 or YOLOv2 model version that best suits your task. Download model configuration file and corresponding weight file:
-* From [DarkFlow repository](https://github.com/thtrieu/darkflow): configuration files are stored in the `cfg` directory, links to weight files are given in the `README.md` file. The files from this repository are adapted for conversion to TensorFlow using DarkFlow.
-* From DarkNet website and repository: configuration files are stored in the `cfg` directory of the [repository](https://github.com/pjreddie/darknet), links to weight files are given on the [YOLOv1](https://pjreddie.com/darknet/yolov1/) and [YOLOv2](https://pjreddie.com/darknet/yolov2/) websites.
+
+* From `DarkFlow repository <https://github.com/thtrieu/darkflow>`__ : configuration files are stored in the ``cfg`` directory, links to weight files are given in the ``README.md`` file. The files from this repository are adapted for conversion to TensorFlow using DarkFlow.
+* From DarkNet website and repository: configuration files are stored in the ``cfg`` directory of the `repository <https://github.com/pjreddie/darknet>`__, links to weight files are given on the `YOLOv1 <https://pjreddie.com/darknet/yolov1/>`__ and `YOLOv2 <https://pjreddie.com/darknet/yolov2/>`__ websites.
 
 To convert DarkNet YOLOv1 and YOLOv2 models to the OpenVINO format, follow these steps:
 
-1. [Install DarkFlow](@ref install_darkflow)
-2. [Convert DarkNet YOLOv1 or YOLOv2 model to TensorFlow](@ref yolov1_v2_to_tf) using DarkFlow
-3. [Convert TensorFlow YOLOv1 or YOLOv2 model to IR](@ref yolov1_v2_to_ir)
+1. `Install DarkFlow <#installing-darkflow>`__
+2. `Convert DarkNet YOLOv1 or YOLOv2 model to TensorFlow <#converting-a-darknet-yolov1-or-yolov2-model-to-tensorflow>`__ using DarkFlow
+3. `Convert TensorFlow YOLOv1 or YOLOv2 model to IR <#converting-a-tensorflow-yolov1-or-yolov2-model-to-the-ir>`__
+
 
-@anchor install_darkflow
-#### Installing DarkFlow
+Installing DarkFlow
+--------------------------------------------------------------
 
 You need DarkFlow to convert YOLOv1 and YOLOv2 models to TensorFlow. To install DarkFlow:
-1. Install DarkFlow [required dependencies](https://github.com/thtrieu/darkflow#dependencies).
-2. Clone DarkFlow git repository:<br>
-```sh
-git clone https://github.com/thtrieu/darkflow.git
-```
-3. Go to the root directory of the cloned repository:<br>
-```sh
-cd darkflow
-```
-4. Install DarkFlow, using the instructions from the `README.md` file in the [DarkFlow repository](https://github.com/thtrieu/darkflow/blob/master/README.md#getting-started).
-
-@anchor yolov1_v2_to_tf
-#### Converting a DarkNet YOLOv1 or YOLOv2 Model to TensorFlow
-
-To convert YOLOv1 or YOLOv2 model to TensorFlow, go to the root directory of the cloned DarkFlow repository, place the previously downloaded \*.cfg and \*.weights files in the current directory and run the following command:<br>
+
+1. Install DarkFlow `required dependencies <https://github.com/thtrieu/darkflow#dependencies>`__.
+2. Clone DarkFlow git repository:
+
+   .. code-block:: sh
+
+      git clone https://github.com/thtrieu/darkflow.git
+
+
+3. Go to the root directory of the cloned repository:
+
+   .. code-block:: sh
+
+      cd darkflow
+
+
+4. Install DarkFlow, using the instructions from the ``README.md`` file in the `DarkFlow repository <https://github.com/thtrieu/darkflow/blob/master/README.md#getting-started>`__.
+
+
+Converting a DarkNet YOLOv1 or YOLOv2 Model to TensorFlow
+--------------------------------------------------------------
+
+To convert YOLOv1 or YOLOv2 model to TensorFlow, go to the root directory of the cloned DarkFlow repository, place the previously downloaded \*.cfg and \*.weights files in the current directory and run the following command:
+
 - For YOLOv1:
-```sh
-python3 flow --model yolov1.cfg --load yolov1.weights --savepb
-```
-
-- For YOLOv2 with VOC dataset `--labels` argument should be specified and additional changes in the original exporting script are required.
-In the [file](https://github.com/thtrieu/darkflow/blob/b187c65630f9aa1bb8b809c33ec67c8cc5d60124/darkflow/utils/loader.py#L121)
-change line 121 from `self.offset = 16` to `self.offset = 20`. Then run:
-```sh
-python3 flow --model yolov2-voc.cfg --load yolov2-voc.weights --labels voc-labels.txt --savepb
-```
-VOC labels can be found on the following [link](https://raw.githubusercontent.com/szaza/android-yolo-v2/master/assets/tiny-yolo-voc-labels.txt)
+
+  .. code-block:: sh
+
+     python3 flow --model yolov1.cfg --load yolov1.weights --savepb
+
+
+- For YOLOv2 with VOC dataset ``--labels`` argument should be specified and additional changes in the original exporting script are required. In the `file <https://github.com/thtrieu/darkflow/blob/b187c65630f9aa1bb8b809c33ec67c8cc5d60124/darkflow/utils/loader.py#L121>`__ change line 121 from ``self.offset = 16`` to ``self.offset = 20``. Then run:
+
+  .. code-block:: sh
+
+     python3 flow --model yolov2-voc.cfg --load yolov2-voc.weights --labels voc-labels.txt --savepb
+
+
+VOC labels can be found on the following `link <https://raw.githubusercontent.com/szaza/android-yolo-v2/master/assets/tiny-yolo-voc-labels.txt>`__
 
 General conversion command is:
-```sh
-python3 flow --model <path_to_model>/<model_name>.cfg --load <path_to_model>/<model_name>.weights --labels <path_to_dataset_labels_file> --savepb
-```
-For YOLOv1,  the `--labels` argument can be skipped. If the model was successfully converted, you can find the `<model_name>.meta` and `<model_name>.pb` files.
-in `built_graph`  subdirectory of the cloned DarkFlow repository.
-
-File `<model_name>.pb` is a TensorFlow representation of the YOLO model.
-
-@anchor yolov1_v2_to_ir
-#### Converting a TensorFlow YOLOv1 or YOLOv2 Model to the IR
-
-Converted TensorFlow YOLO model is missing `Region` layer and its parameters. Original YOLO `Region` layer parameters are stored in the configuration `<path_to_model>/<model_name>.cfg`
-file under the `[region]` title.
-
-To recreate the original model structure, use the corresponding yolo `.json` configuration file with custom operations and `Region` layer
-parameters when converting the model to the IR. This file is located in the `<OPENVINO_INSTALL_DIR>/tools/model_optimizer/extensions/front/tf` directory.
-
-If chosen model has specific values of these parameters,
-create another configuration file with custom operations and use it for conversion.
-
-To generate the IR of the YOLOv1 model, provide TensorFlow YOLOv1 or YOLOv2 model to Model Optimizer with the following parameters:<br>
-```sh
- mo
---input_model <path_to_model>/<model_name>.pb       \
---batch 1                                       \
---scale 255 \
---transformations_config front/tf/<yolo_config>.json
-```
+
+.. code-block:: sh
+
+   python3 flow --model <path_to_model>/<model_name>.cfg --load <path_to_model>/<model_name>.weights --labels <path_to_dataset_labels_file> --savepb
+
+
+For YOLOv1,  the ``--labels`` argument can be skipped. If the model was successfully converted, you can find the ``<model_name>.meta`` and ``<model_name>.pb`` files.
+in ``built_graph``  subdirectory of the cloned DarkFlow repository.
+
+File ``<model_name>.pb`` is a TensorFlow representation of the YOLO model.
+
+Converting a TensorFlow YOLOv1 or YOLOv2 Model to the IR
+---------------------------------------------------------
+
+Converted TensorFlow YOLO model is missing ``Region`` layer and its parameters. Original YOLO ``Region`` layer parameters are stored in the configuration ``<path_to_model>/<model_name>.cfg`` file under the ``[region]`` title.
+
+To recreate the original model structure, use the corresponding yolo ``.json`` configuration file with custom operations and ``Region`` layer parameters when converting the model to the IR. This file is located in the ``<OPENVINO_INSTALL_DIR>/tools/model_optimizer/extensions/front/tf`` directory.
+
+If chosen model has specific values of these parameters, create another configuration file with custom operations and use it for conversion.
+
+To generate the IR of the YOLOv1 model, provide TensorFlow YOLOv1 or YOLOv2 model to Model Optimizer with the following parameters:
+
+.. code-block:: sh
+
+   mo
+   --input_model <path_to_model>/<model_name>.pb       \
+   --batch 1                                       \
+   --scale 255 \
+   --transformations_config front/tf/<yolo_config>.json
+
+
 where:
 
-* `--batch` defines shape of model input. In the example, `--batch` is equal to 1, but you can also specify other integers larger than 1.
-* `--scale` specifies scale factor that input values will be divided by.
-The model was trained with input values in the range `[0,1]`. OpenVINO toolkit samples read input images as values in `[0,255]` range, so the scale 255 must be applied.
-* `--transformations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`.
-For other applicable parameters, refer to the [Convert Model from TensorFlow](@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow) guide.
+* ``--batch`` defines shape of model input. In the example, ``--batch`` is equal to 1, but you can also specify other integers larger than 1.
+* ``--scale`` specifies scale factor that input values will be divided by. The model was trained with input values in the range ``[0,1]``. OpenVINO toolkit samples read input images as values in ``[0,255]`` range, so the scale 255 must be applied.
+* ``--transformations_config`` adds missing ``Region`` layers to the model. In the IR, the ``Region`` layer has name ``RegionYolo``. For other applicable parameters, refer to the :doc:`Convert Model from TensorFlow <openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow>` guide.
+
+.. note::
+
+   The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
+
 
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the [Converting a Model to Intermediate Representation (IR)](@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model) guide.
+@endsphinxdirective

From ff8f36177813efa1fbbe085d0d25ee86a7da3d45 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Wed, 5 Apr 2023 21:25:05 +0400
Subject: [PATCH 252/296] [CONFORMANCE] Solve the problem to generate filters
 (#16713)

* [CONFORMANCE] Solve the problem to generate filters

* trigger linux build

* Add handling of not run tests

* Remove extra
---
 .../layer_tests_summary/run_conformance.py    |   4 +-
 .../layer_tests_summary/run_parallel.py       | 116 +++++++++++++-----
 2 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
index fac5835c8ccba7..1917fb9272d255 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py
@@ -50,6 +50,7 @@ def parse_arguments():
     ov_config_path_helper = "Specify path to file contains plugin config"
     dump_conformance_help = "Set '1' if you want to create Conformance IRs from custom/downloaded models. In other cases, set 0. The default value is '1'"
     shape_mode_help = "Specify shape mode for conformance. Default value is ``. Possible values: `static`, `dynamic`, ``"
+    parallel_help = "Parallel over HW devices. For example run tests over GPU.0, GPU.1 and etc"
 
     parser.add_argument("-m", "--models_path", help=models_path_help, type=str, required=False, default=NO_MODEL_CONSTANT)
     parser.add_argument("-d", "--device", help= device_help, type=str, required=False, default="CPU")
@@ -61,6 +62,7 @@ def parse_arguments():
     parser.add_argument("-c", "--ov_config_path", help=ov_config_path_helper, type=str, required=False, default="")
     parser.add_argument("-s", "--dump_conformance", help=dump_conformance_help, type=int, required=False, default=0)
     parser.add_argument("-sm", "--shape_mode", help=shape_mode_help, type=str, required=False, default="")
+    parser.add_argument("-p", "--parallel_devices", help=parallel_help, type=int, required=False, default=0)
 
     return parser.parse_args()
 
@@ -167,7 +169,7 @@ def __run_conformance(self):
                              f"--report_unique_name", f'--output_folder="{parallel_report_dir}"',
                              f'--gtest_filter={self._gtest_filter}', f'--config_path="{self._ov_config_path}"',
                              f'--shape_mode={self._shape_mode}']
-        conformance = TestParallelRunner(f"{conformance_path}", command_line_args, self._workers, logs_dir, "")
+        conformance = TestParallelRunner(f"{conformance_path}", command_line_args, self._workers, logs_dir, "", True)
         conformance.run()
         conformance.postprocess_logs()
 
diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
index 9fa3983cdc220d..eaa77845bcfa35 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py
@@ -5,10 +5,11 @@
 from utils import constants
 from utils import file_utils
 from argparse import ArgumentParser
-from subprocess import Popen, STDOUT, TimeoutExpired, run
+from subprocess import Popen, STDOUT, TimeoutExpired, run, call
 from hashlib import sha256
 from pathlib import Path
 from shutil import rmtree
+from signal import SIGKILL
 
 import os
 import sys
@@ -48,7 +49,7 @@ def parse_arguments():
     parser.add_argument("-e", "--exec_file", help=exec_file_path_help, type=str, required=True)
     parser.add_argument("-c", "--cache_path", help=cache_path_help, type=str, required=False, default="")
     parser.add_argument("-j", "--workers", help=worker_num_help, type=int, required=False, default=(os.cpu_count() - 1) if os.cpu_count() > 2 else 1)
-    parser.add_argument("-p", "--parallel_devices", help=parallel_help, type=int, required=False, default=1)
+    parser.add_argument("-p", "--parallel_devices", help=parallel_help, type=int, required=False, default=0)
     parser.add_argument("-w", "--working_dir", help=working_dir_num_help, type=str, required=False, default=".")
     parser.add_argument("-t", "--process_timeout", help=process_timeout_help, type=int, required=False, default=DEFAULT_PROCESS_TIMEOUT)
     return parser.parse_args()
@@ -133,6 +134,18 @@ def init_worker(self):
                 log_file.close()
             # logger.info(f"{self._idx}/{len(self._command_list)} is started")
             self._idx += 1
+
+    @staticmethod
+    def kill_process_tree(pid):
+        try:
+            if not constants.IS_WIN:
+                os.killpg(pid, SIGKILL)
+            else:
+                call(['taskkill', '/F', '/T', '/PID', str(pid)])
+        except OSError as err:
+            # logger.warning(f"Impossible to kill process {pid} with error: {err}")
+            pass
+
     
     def __find_free_process(self):
         while True:
@@ -140,7 +153,9 @@ def __find_free_process(self):
                 try:
                     if float((datetime.datetime.now() - self._timers[pid]).total_seconds()) > self.process_timeout:
                         logger.warning(f"Process {pid} exceed time limetattion per process")
+                        self.kill_process_tree(self._process_list[pid].pid)
                         self._process_list[pid].kill()
+                        self._process_list[pid].wait(timeout=1)
                     self._process_list[pid].wait(timeout=0)
                     device = get_device_by_args(self._process_list[pid].args)
                     # logger.info(f"{self._idx}/{len(self._command_list)} is started")
@@ -172,6 +187,7 @@ def compelete_all_processes(self):
                 try:
                     if float((datetime.datetime.now() - self._timers[pid]).total_seconds()) > self.process_timeout:
                         logger.warning(f"Process {pid} exceed time limetation per process. The process will be killed")
+                        self.kill_process_tree(self._process_list[pid].pid)
                         self._process_list[pid].kill()
                         self._process_list[pid].wait(timeout=1)
                     self._process_list[pid].wait(timeout=0)
@@ -184,7 +200,7 @@ def compelete_all_processes(self):
         return self._idx
 
 class TestParallelRunner:
-    def __init__(self, exec_file_path: os.path, test_command_line: list, worker_num: int, working_dir: os.path, cache_path: os.path):
+    def __init__(self, exec_file_path: os.path, test_command_line: list, worker_num: int, working_dir: os.path, cache_path: os.path, is_parallel_devices: False):
         self._exec_file_path = exec_file_path
         self._working_dir = working_dir
         self._command = self.__init_basic_command_line_for_exec_file(test_command_line)
@@ -202,7 +218,7 @@ def __init__(self, exec_file_path: os.path, test_command_line: list, worker_num:
         self._total_test_cnt = 0
         self._available_devices = None
         self._device = get_device_by_args(self._command.split())
-        if has_python_api:
+        if has_python_api and is_parallel_devices:
             self._available_devices = get_available_devices(self._device)
         else:
             self._available_devices = [self._device] if not self._device is None else []
@@ -294,7 +310,7 @@ def __generate_test_lists(self, test_list_cache: list, test_list_runtime:list):
         cached_test_list = list()
         runtime_test_test = list()
         cached_test_list_names = list()
-        it = 0
+
         for test in test_list_cache:
             if test._name in test_list_runtime:
                 cached_test_list.append(test)
@@ -320,33 +336,42 @@ def __prepare_smart_filters(self, proved_test_list:list):
         
         # Run crashed tests in a separed thread
         if idx < len(proved_test_list):
-            while proved_test_list[idx]._time == -1 :
-                proved_test_list.pop(idx)
+            while proved_test_list[idx]._time == -1:
+                test = proved_test_list.pop(idx)
+                res_test_filters.append(test._name)
                 if idx >= len(proved_test_list):
                     break
 
+        longest_device = ""
+        for device in self._available_devices:
+            if len(device) > len(longest_device):
+                longest_device = device
+
         # prepare gtest filters per worker according command line length limitation
         while len(proved_test_list) > 0:
             test_times = []
             is_not_full = True
             worker_test_filters = list()
 
-            for _ in range(self._worker_num):
+            real_worker_num = self._worker_num * len(self._available_devices)
+
+            for _ in range(real_worker_num):
                 if len(proved_test_list) == 0:
                     break
                 worker_test_filters.append(f'"{self.__replace_restricted_symbols(proved_test_list[0]._name)}":')
-                test_times.append(proved_test_list[0]._time)
-                proved_test_list.pop(0)
+                test = proved_test_list.pop(0)
+                test_times.append(test._time)
             while is_not_full and len(proved_test_list) > 0:
-                for i in range(self._worker_num):
+                for i in range(real_worker_num):
                     if i >= len(proved_test_list):
                         break
                     if i == 0:
                         continue
                     while test_times[0] > test_times[i] + proved_test_list[len(proved_test_list) - 1]._time:
                         final_pos = len(proved_test_list) - 1
-                        if len(worker_test_filters[i]) + def_length + len(proved_test_list[final_pos]._name) < MAX_LENGHT:
-                            worker_test_filters[i] += f'"{self.__replace_restricted_symbols(proved_test_list[final_pos]._name)}":'
+                        filter = proved_test_list[final_pos]._name
+                        if len(worker_test_filters[i]) + def_length + len(filter.replace(self._device, longest_device)) < MAX_LENGHT:
+                            worker_test_filters[i] += f'"{self.__replace_restricted_symbols(filter)}":'
                             test_times[i] += proved_test_list[final_pos]._time
                             proved_test_list.pop(final_pos)
                         else:
@@ -355,17 +380,22 @@ def __prepare_smart_filters(self, proved_test_list:list):
                         if len(proved_test_list) == 0:
                             break
                 if is_not_full and len(proved_test_list) > 0:
-                    worker_test_filters[0] += f'"{self.__replace_restricted_symbols(proved_test_list[0]._name)}":'
-                    test_times[0] += proved_test_list[0]._time
-                    proved_test_list.pop(0)
+                    filter = proved_test_list[0]._name
+                    if len(worker_test_filters[0]) + def_length + len(filter.replace(self._device, longest_device)) < MAX_LENGHT:
+                        worker_test_filters[0] += f'"{self.__replace_restricted_symbols(filter)}":'
+                        test_times[0] += proved_test_list[0]._time
+                        proved_test_list.pop(0)
+                    else:
+                        is_not_full = False
             for filter in worker_test_filters:
                 res_test_filters.append(filter)
             is_not_full = True
         # logging for debug
-        # for i in range(len(res_test_filters)):
-        #     filter = res_test_filters[i]
-        #     cnt = filter.count('\":')
-        #     logger.info(f"Number of tests in job_{i}: {cnt}")
+        for i in range(len(res_test_filters)):
+            filter = res_test_filters[i]
+            cnt = filter.count('\":')
+            self._total_test_cnt += cnt
+            # logger.info(f"Number of tests in job_{i}: {cnt}")
         return res_test_filters
             
     def __get_filters(self):
@@ -380,7 +410,6 @@ def __get_filters(self):
 
         if len(cached_test_list) > 0:
             self._is_save_cache = False
-            self._total_test_cnt += len(cached_test_list)
             cached_test_list = self.__prepare_smart_filters(cached_test_list)
         if len(runtime_test_list) > 0:
             self._is_save_cache = True
@@ -391,7 +420,10 @@ def __get_filters(self):
         
     def __execute_tests(self, filters: list(), prev_worker_cnt = 0):
         commands = [f'{self._command} --gtest_filter={filter}' for filter in filters]
-        task_manager = TaskManager(commands, self._working_dir, prev_worker_cnt, self._device, self._available_devices)
+        tmp_log_dir = os.path.join(self._working_dir, "temp")
+        if not os.path.isdir(tmp_log_dir):
+            os.mkdir(tmp_log_dir)
+        task_manager = TaskManager(commands, tmp_log_dir, prev_worker_cnt, self._device, self._available_devices)
         for _ in progressbar(range(self._worker_num), "Worker initialization: ", 40):
             task_manager.init_worker()
         for _ in progressbar(range(len(commands) - self._worker_num), "Worker execution: ", 40):
@@ -399,6 +431,26 @@ def __execute_tests(self, filters: list(), prev_worker_cnt = 0):
                 break
         return task_manager.compelete_all_processes()
 
+    def __find_not_runned_tests(self):
+        test_names = set()
+        for log in Path(os.path.join(self._working_dir, "temp")).rglob("log_*.log"):
+            log_filename = os.path.join(self._working_dir, log)
+            with open(log_filename, "r") as log_file:
+                test_name = None
+                try:
+                    lines = log_file.readlines()
+                except:
+                    lines = log.read_text(encoding='ascii', errors='ignore').split('\n')
+
+                for line in lines:
+                    if constants.RUN in line:
+                        test_name = line[line.find(constants.RUN) + len(constants.RUN) + 1:-1:]
+                        if test_name is not None:
+                            test_names.add(f'"{test_name}":')
+                log_file.close()
+        test_list_runtime = set(self.__get_test_list_by_runtime())
+        return list(test_list_runtime.difference(test_names))
+
     def run(self):
         if TaskManager.process_timeout == -1:
             TaskManager.process_timeout = DEFAULT_PROCESS_TIMEOUT
@@ -410,15 +462,19 @@ def run(self):
         filters_cache, filters_runtime = self.__get_filters()
 
         worker_cnt = 0
-        if len(filters_runtime):
-            logger.info(f"Execute jobs taken from runtime")
-            worker_cnt = self.__execute_tests(filters_runtime, worker_cnt)
+        if len(filters_cache):
+            logger.info(f"Execute jobs taken from cache")
+            worker_cnt = self.__execute_tests(filters_cache, worker_cnt)
         # 15m for one test in one process
         if TaskManager.process_timeout == -1 or TaskManager.process_timeout == DEFAULT_PROCESS_TIMEOUT:
             TaskManager.process_timeout = DEFAULT_TEST_TIMEOUT
-        if len(filters_cache):
-            logger.info(f"Execute jobs taken from cache")
-            self.__execute_tests(filters_cache, worker_cnt)
+        if len(filters_runtime):
+            logger.info(f"Execute jobs taken from runtime")
+            worker_cnt = self.__execute_tests(filters_runtime, worker_cnt)
+        not_runned_test_filter = self.__find_not_runned_tests()
+        if len(not_runned_test_filter) > 0:
+            logger.info(f"Execute not runned {len(not_runned_test_filter)} tests")
+            worker_cnt = self.__execute_tests(not_runned_test_filter, worker_cnt)
 
         t_end = datetime.datetime.now()
         total_seconds = (t_end - t_start).total_seconds()
@@ -599,9 +655,11 @@ def __save_log(logs_dir, dir, test_name):
     logger.info(f"[ARGUMENTS] --working_dir={args.working_dir}")
     logger.info(f"[ARGUMENTS] --process_timeout={args.process_timeout}")
     logger.info(f"[ARGUMENTS] --cache_path={args.cache_path}")
+    logger.info(f"[ARGUMENTS] --workers={args.workers}")
+    logger.info(f"[ARGUMENTS] --parallel_devices={args.parallel_devices}")
     logger.info(f"[ARGUMENTS] Executable file arguments = {exec_file_args}")
     TaskManager.process_timeout = args.process_timeout
-    conformance = TestParallelRunner(args.exec_file, exec_file_args, args.workers, args.working_dir, args.cache_path)
+    conformance = TestParallelRunner(args.exec_file, exec_file_args, args.workers, args.working_dir, args.cache_path, args.parallel_devices)
     conformance.run()
     if not conformance.postprocess_logs():
         logger.error("Run is not successful")

From 2e0bac34db88e7626219354132ac6d3ea7493d77 Mon Sep 17 00:00:00 2001
From: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
Date: Wed, 5 Apr 2023 20:01:43 +0200
Subject: [PATCH 253/296] [PyOV] Fix warnings (#16674)

* [PyOV] Fix warnings

* another win

* fix codestyle

* fix test

* fix any

* exclude some warnings
---
 .../src/compatibility/pyngraph/CMakeLists.txt |  1 -
 .../src/compatibility/pyngraph/axis_set.cpp   |  2 +-
 .../pyngraph/coordinate_diff.cpp              |  2 +-
 .../src/compatibility/pyngraph/strides.cpp    |  2 +-
 .../python/src/pyopenvino/CMakeLists.txt      |  9 +----
 .../python/src/pyopenvino/core/common.cpp     |  6 ++--
 .../python/src/pyopenvino/core/core.cpp       |  2 +-
 .../src/pyopenvino/frontend/node_context.cpp  |  2 +-
 .../python/src/pyopenvino/frontend/place.cpp  | 36 +++++++++----------
 .../python/src/pyopenvino/graph/axis_set.cpp  |  2 +-
 .../src/pyopenvino/graph/coordinate_diff.cpp  |  2 +-
 .../python/src/pyopenvino/graph/model.cpp     |  4 +--
 .../python/src/pyopenvino/graph/strides.cpp   |  2 +-
 .../python/src/pyopenvino/utils/utils.cpp     |  2 +-
 .../python/src/pyopenvino/utils/utils.hpp     | 11 ++++++
 15 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt b/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt
index c8639d40c491e4..9efba7a8369b87 100644
--- a/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt
+++ b/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt
@@ -34,7 +34,6 @@ endif()
 if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # disable warning: This operator was deprecated and will be removed with v0 operation.
     add_compile_options(/wd4996)
-    add_compile_options(/wd4244)
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     add_compile_options(-Wno-deprecated-register -Wno-range-loop-analysis)
 elseif(OV_COMPILER_IS_APPLECLANG)
diff --git a/src/bindings/python/src/compatibility/pyngraph/axis_set.cpp b/src/bindings/python/src/compatibility/pyngraph/axis_set.cpp
index de7ff91d336c54..0aa59b9a055e27 100644
--- a/src/bindings/python/src/compatibility/pyngraph/axis_set.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/axis_set.cpp
@@ -36,7 +36,7 @@ void regclass_pyngraph_AxisSet(py::module m) {
 
     axis_set.def("__repr__", [](const ngraph::AxisSet& self) -> std::string {
         std::stringstream data_ss;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(data_ss, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(data_ss, ", "));
         std::string data_str = data_ss.str();
         return "<AxisSet {" + data_str.substr(0, data_str.size() - 2) + "}>";
     });
diff --git a/src/bindings/python/src/compatibility/pyngraph/coordinate_diff.cpp b/src/bindings/python/src/compatibility/pyngraph/coordinate_diff.cpp
index 0a62ba0a07e22b..6c91879ad3ca60 100644
--- a/src/bindings/python/src/compatibility/pyngraph/coordinate_diff.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/coordinate_diff.cpp
@@ -26,7 +26,7 @@ void regclass_pyngraph_CoordinateDiff(py::module m) {
 
     coordinate_diff.def("__str__", [](const ngraph::CoordinateDiff& self) -> std::string {
         std::stringstream stringstream;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(stringstream, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(stringstream, ", "));
         std::string string = stringstream.str();
         return string.substr(0, string.size() - 2);
     });
diff --git a/src/bindings/python/src/compatibility/pyngraph/strides.cpp b/src/bindings/python/src/compatibility/pyngraph/strides.cpp
index 88ff61655509ed..b740336e2288f5 100644
--- a/src/bindings/python/src/compatibility/pyngraph/strides.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/strides.cpp
@@ -24,7 +24,7 @@ void regclass_pyngraph_Strides(py::module m) {
 
     strides.def("__str__", [](const ngraph::Strides& self) -> std::string {
         std::stringstream stringstream;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(stringstream, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(stringstream, ", "));
         std::string string = stringstream.str();
         return string.substr(0, string.size() - 2);
     });
diff --git a/src/bindings/python/src/pyopenvino/CMakeLists.txt b/src/bindings/python/src/pyopenvino/CMakeLists.txt
index c23c84edcf0115..7e0e90c0766a7c 100644
--- a/src/bindings/python/src/pyopenvino/CMakeLists.txt
+++ b/src/bindings/python/src/pyopenvino/CMakeLists.txt
@@ -26,15 +26,8 @@ endif()
 
 # compile options
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    # disable warning: This operator was deprecated and will be removed with v0 operation.
-    add_compile_options(/wd4996)
-    add_compile_options(/wd4244)
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    add_compile_options(-Wno-deprecated-register -Wno-range-loop-analysis)
-elseif(OV_COMPILER_IS_APPLECLANG)
+if(OV_COMPILER_IS_APPLECLANG)
     add_link_options(-stdlib=libc++)
-    add_compile_options(-Wno-unused-value -Wno-range-loop-analysis)
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     # WA for GCC 7.5 "PYBIND11_NOINLINE inline" warning
     add_compile_options(-Wno-error=attributes)
diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
index ef5313cec0185d..d899cb5f121ec2 100644
--- a/src/bindings/python/src/pyopenvino/core/common.cpp
+++ b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -288,14 +288,14 @@ ov::PartialShape partial_shape_from_list(const py::list& shape) {
                                      std::to_string(bounded_dim.size()) + " elements were given.");
             }
             if (!(py::isinstance<py::int_>(bounded_dim[0]) && py::isinstance<py::int_>(bounded_dim[1]))) {
-                throw py::type_error("Incorrect pair of types (" + std::string(bounded_dim[0].get_type().str()) + ", " +
-                                     std::string(bounded_dim[1].get_type().str()) +
+                throw py::type_error("Incorrect pair of types (" + std::string(py::str(bounded_dim[0].get_type())) +
+                                     ", " + std::string(py::str(bounded_dim[1].get_type())) +
                                      ") for dynamic dimension, ints are expected.");
             }
             pshape.insert(pshape.end(),
                           ov::Dimension(bounded_dim[0].cast<value_type>(), bounded_dim[1].cast<value_type>()));
         } else {
-            throw py::type_error("Incorrect type " + std::string(dim.get_type().str()) +
+            throw py::type_error("Incorrect type " + std::string(py::str(dim.get_type())) +
                                  " for dimension. Expected types are: "
                                  "int, str, openvino.runtime.Dimension, list/tuple with lower and upper values for "
                                  "dynamic dimension.");
diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp
index c110dcd5bd7776..da4bdfa6b076f7 100644
--- a/src/bindings/python/src/pyopenvino/core/core.cpp
+++ b/src/bindings/python/src/pyopenvino/core/core.cpp
@@ -360,7 +360,7 @@ void regclass_Core(py::module m) {
             }
 
             std::stringstream str;
-            str << "Provided python object type " << model_path.get_type().str()
+            str << "Provided python object type " << py::str(model_path.get_type())
                 << " isn't supported as 'model' argument.";
             OPENVINO_THROW(str.str());
         },
diff --git a/src/bindings/python/src/pyopenvino/frontend/node_context.cpp b/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
index 76d895913590a2..1f0904070ea60d 100644
--- a/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/node_context.cpp
@@ -85,7 +85,7 @@ void regclass_frontend_NodeContext(py::module m) {
             auto any = self.get_attribute_as_any(name);
 
             auto type = m.attr("Type");
-            if (dtype == type) {
+            if (dtype.is(type)) {
                 if (any.is<int32_t>() || any.is<int64_t>()) {
                     return py::cast(self.get_attribute<ov::element::Type>(name));
                 } else if (any.is<std::vector<int32_t>>() || any.is<std::vector<int64_t>>()) {
diff --git a/src/bindings/python/src/pyopenvino/frontend/place.cpp b/src/bindings/python/src/pyopenvino/frontend/place.cpp
index 32dc0ff07ba35b..57c51969a3c40c 100644
--- a/src/bindings/python/src/pyopenvino/frontend/place.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/place.cpp
@@ -75,14 +75,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_consuming_operations",
         [](const ov::frontend::Place& self, py::object outputName, py::object outputPortIndex) {
-            if (outputName == py::none()) {
-                if (outputPortIndex == py::none()) {
+            if (outputName.is(py::none())) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_consuming_operations();
                 } else {
                     return self.get_consuming_operations(py::cast<int>(outputPortIndex));
                 }
             } else {
-                if (outputPortIndex == py::none()) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_consuming_operations(py::cast<std::string>(outputName));
                 } else {
                     return self.get_consuming_operations(py::cast<std::string>(outputName),
@@ -108,14 +108,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_target_tensor",
         [](const ov::frontend::Place& self, py::object outputName, py::object outputPortIndex) {
-            if (outputName == py::none()) {
-                if (outputPortIndex == py::none()) {
+            if (outputName.is(py::none())) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_target_tensor();
                 } else {
                     return self.get_target_tensor(py::cast<int>(outputPortIndex));
                 }
             } else {
-                if (outputPortIndex == py::none()) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_target_tensor(py::cast<std::string>(outputName));
                 } else {
                     return self.get_target_tensor(py::cast<std::string>(outputName), py::cast<int>(outputPortIndex));
@@ -140,14 +140,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_producing_operation",
         [](const ov::frontend::Place& self, py::object inputName, py::object inputPortIndex) {
-            if (inputName == py::none()) {
-                if (inputPortIndex == py::none()) {
+            if (inputName.is(py::none())) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_producing_operation();
                 } else {
                     return self.get_producing_operation(py::cast<int>(inputPortIndex));
                 }
             } else {
-                if (inputPortIndex == py::none()) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_producing_operation(py::cast<std::string>(inputName));
                 } else {
                     return self.get_producing_operation(py::cast<std::string>(inputName),
@@ -181,14 +181,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_input_port",
         [](const ov::frontend::Place& self, py::object inputName, py::object inputPortIndex) {
-            if (inputName == py::none()) {
-                if (inputPortIndex == py::none()) {
+            if (inputName.is(py::none())) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_input_port();
                 } else {
                     return self.get_input_port(py::cast<int>(inputPortIndex));
                 }
             } else {
-                if (inputPortIndex == py::none()) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_input_port(py::cast<std::string>(inputName));
                 } else {
                     return self.get_input_port(py::cast<std::string>(inputName), py::cast<int>(inputPortIndex));
@@ -211,14 +211,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_output_port",
         [](const ov::frontend::Place& self, py::object outputName, py::object outputPortIndex) {
-            if (outputName == py::none()) {
-                if (outputPortIndex == py::none()) {
+            if (outputName.is(py::none())) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_output_port();
                 } else {
                     return self.get_output_port(py::cast<int>(outputPortIndex));
                 }
             } else {
-                if (outputPortIndex == py::none()) {
+                if (outputPortIndex.is(py::none())) {
                     return self.get_output_port(py::cast<std::string>(outputName));
                 } else {
                     return self.get_output_port(py::cast<std::string>(outputName), py::cast<int>(outputPortIndex));
@@ -250,14 +250,14 @@ void regclass_frontend_Place(py::module m) {
     place.def(
         "get_source_tensor",
         [](const ov::frontend::Place& self, py::object inputName, py::object inputPortIndex) {
-            if (inputName == py::none()) {
-                if (inputPortIndex == py::none()) {
+            if (inputName.is(py::none())) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_source_tensor();
                 } else {
                     return self.get_source_tensor(py::cast<int>(inputPortIndex));
                 }
             } else {
-                if (inputPortIndex == py::none()) {
+                if (inputPortIndex.is(py::none())) {
                     return self.get_source_tensor(py::cast<std::string>(inputName));
                 } else {
                     return self.get_source_tensor(py::cast<std::string>(inputName), py::cast<int>(inputPortIndex));
diff --git a/src/bindings/python/src/pyopenvino/graph/axis_set.cpp b/src/bindings/python/src/pyopenvino/graph/axis_set.cpp
index 6799c2f368f1b2..9ff932b0ee54b9 100644
--- a/src/bindings/python/src/pyopenvino/graph/axis_set.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/axis_set.cpp
@@ -36,7 +36,7 @@ void regclass_graph_AxisSet(py::module m) {
 
     axis_set.def("__repr__", [](const ov::AxisSet& self) -> std::string {
         std::stringstream data_ss;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(data_ss, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(data_ss, ", "));
         std::string data_str = data_ss.str();
         return "<AxisSet {" + data_str.substr(0, data_str.size() - 2) + "}>";
     });
diff --git a/src/bindings/python/src/pyopenvino/graph/coordinate_diff.cpp b/src/bindings/python/src/pyopenvino/graph/coordinate_diff.cpp
index dae14a7100afbc..88e71bac5f01d1 100644
--- a/src/bindings/python/src/pyopenvino/graph/coordinate_diff.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/coordinate_diff.cpp
@@ -24,7 +24,7 @@ void regclass_graph_CoordinateDiff(py::module m) {
 
     coordinate_diff.def("__str__", [](const ov::CoordinateDiff& self) -> std::string {
         std::stringstream stringstream;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(stringstream, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(stringstream, ", "));
         std::string string = stringstream.str();
         return string.substr(0, string.size() - 2);
     });
diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp
index acaa8792f0538a..40ed6876739bc8 100644
--- a/src/bindings/python/src/pyopenvino/graph/model.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/model.cpp
@@ -347,7 +347,7 @@ void regclass_graph_Model(py::module m) {
                 } else if (py::isinstance<ov::Output<ov::Node>>(item.first)) {
                     new_shape.first = item.first.cast<ov::Output<ov::Node>>();
                 } else {
-                    throw py::type_error("Incorrect key type " + std::string(item.first.get_type().str()) +
+                    throw py::type_error("Incorrect key type " + std::string(py::str(item.first.get_type())) +
                                          " to reshape a model, expected keys as openvino.runtime.Output, int or str.");
                 }
                 // check values
@@ -359,7 +359,7 @@ void regclass_graph_Model(py::module m) {
                     new_shape.second = ov::PartialShape(item.second.cast<std::string>());
                 } else {
                     throw py::type_error(
-                        "Incorrect value type " + std::string(item.second.get_type().str()) +
+                        "Incorrect value type " + std::string(py::str(item.second.get_type())) +
                         " to reshape a model, expected values as openvino.runtime.PartialShape, str, list or tuple.");
                 }
                 new_shapes.insert(new_shape);
diff --git a/src/bindings/python/src/pyopenvino/graph/strides.cpp b/src/bindings/python/src/pyopenvino/graph/strides.cpp
index 772ca6047e6429..202dbef3fd5bd1 100644
--- a/src/bindings/python/src/pyopenvino/graph/strides.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/strides.cpp
@@ -24,7 +24,7 @@ void regclass_graph_Strides(py::module m) {
 
     strides.def("__str__", [](const ov::Strides& self) -> std::string {
         std::stringstream stringstream;
-        std::copy(self.begin(), self.end(), std::ostream_iterator<int>(stringstream, ", "));
+        std::copy(self.begin(), self.end(), std::ostream_iterator<size_t>(stringstream, ", "));
         std::string string = stringstream.str();
         return string.substr(0, string.size() - 2);
     });
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp
index 297196a7197e5d..e8c55d5eb7b75c 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -215,7 +215,7 @@ std::string convert_path_to_string(const py::object& path) {
     py::object Path = py::module_::import("pathlib").attr("Path");
     // check if model path is either a string or pathlib.Path
     if (py::isinstance(path, Path) || py::isinstance<py::str>(path)) {
-        return path.str();
+        return py::str(path);
     }
     // Convert bytes to string
     if (py::isinstance<py::bytes>(path)) {
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp
index 328f06820033f8..3b635c2e9bb0b1 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.hpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp
@@ -6,7 +6,18 @@
 
 #include <pybind11/pybind11.h>
 
+#ifdef _MSC_VER
+    // Warning occurred at the junction of pybind11 
+    // and the templates inside ov::Any.
+    // Generated by operator `==` inside pybind::handle.
+    #pragma warning( push )
+    #pragma warning( disable: 4996 )
+#endif
 #include "openvino/core/any.hpp"
+#ifdef _MSC_VER
+    #pragma warning( pop )
+#endif
+
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/pass/serialize.hpp"

From 6e97c82c97de32ee84c28e7ccf73a06471f62eb0 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 5 Apr 2023 23:01:02 +0400
Subject: [PATCH 254/296] [Spec] Fix Interpolate specification: input index
 port (#16762)

---
 docs/ops/image/Interpolate_11.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ops/image/Interpolate_11.md b/docs/ops/image/Interpolate_11.md
index 14b37383a2ad92..05014c8a153bf5 100644
--- a/docs/ops/image/Interpolate_11.md
+++ b/docs/ops/image/Interpolate_11.md
@@ -91,7 +91,7 @@
 
 *   **2**: `scales_or_sizes` - 1D tensor containing the data used to calculate the spatial output shape. The number of elements must match the number of values in the `axes` input tensor, the order needs to match as well. The type of this input tensor is either *T_SCALES* or *T_SIZES* depending on the value of the `shape_calculation_mode` attribute. **Required.**
 
-*   **4**: `axes` - 1D tensor of type *T_AXES* specifying dimension indices where interpolation is applied, and `axes` is any unordered list of indices of different dimensions of input tensor, e.g. `[0, 4]`, `[4, 0]`, `[4, 2, 1]`, `[1, 2, 3]`. These indices should be non-negative integers from `0` to `rank(image) - 1` inclusively.  Other dimensions do not change. The order of elements in `axes` attribute matters and is mapped directly to the elements in the 2nd input `scales_or_sizes`. **Optional** with default value `[X,...,rank(image) - 1]` pointing to the spatial dimensions of the input image. The number of the default axes matches the number of elements in the `scales_or_sizes` input tensor.
+*   **3**: `axes` - 1D tensor of type *T_AXES* specifying dimension indices where interpolation is applied, and `axes` is any unordered list of indices of different dimensions of input tensor, e.g. `[0, 4]`, `[4, 0]`, `[4, 2, 1]`, `[1, 2, 3]`. These indices should be non-negative integers from `0` to `rank(image) - 1` inclusively.  Other dimensions do not change. The order of elements in `axes` attribute matters and is mapped directly to the elements in the 2nd input `scales_or_sizes`. **Optional** with default value `[X,...,rank(image) - 1]` pointing to the spatial dimensions of the input image. The number of the default axes matches the number of elements in the `scales_or_sizes` input tensor.
 
 **Outputs**
 

From 1c564226f3403705efd512f352d11726998a4930 Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Thu, 6 Apr 2023 04:32:04 +0200
Subject: [PATCH 255/296] Deprecate util functions in public api (#16716)

* Deprecate util functions in public api

* Add deprecation suppression for usage inside openvino

* Fix clang-format

* Fix1
---
 .../src/concat.cpp                            |  4 ++
 .../src/gather.cpp                            |  5 +++
 .../src/reduce_base_transformation.cpp        |  2 +
 .../src/shuffle_channels.cpp                  |  2 +
 .../src/split.cpp                             |  2 +
 .../src/compress_quantize_weigths.cpp         | 30 +++++++++++---
 .../src/pruning/init_masks.cpp                |  2 +
 .../src/pruning/propagate_masks.cpp           |  6 +++
 .../src/pruning/shrink_weights.cpp            |  6 +++
 .../snippets/src/pass/collapse_subgraph.cpp   |  2 +
 .../snippets/src/pass/mha_tokenization.cpp    |  2 +
 .../src/pass/softmax_decomposition.cpp        |  2 +
 .../src/pass/softmax_reshape_elimination.cpp  |  2 +
 .../transformations/src/ov_ops/augru_cell.cpp |  2 +
 .../src/ov_ops/augru_sequence.cpp             |  2 +
 .../add_fake_quantize_fusion.cpp              |  4 ++
 .../conv_to_binary_conv.cpp                   |  2 +
 .../convert_quantize_dequantize.cpp           |  4 ++
 .../common_optimizations/fq_mul_fusion.cpp    |  4 ++
 .../matmul_const_transposes_extraction.cpp    |  5 ++-
 .../matmul_multiply_fusion.cpp                |  4 ++
 .../common_optimizations/mul_conv_fusion.cpp  |  8 ++++
 .../mul_fake_quantize_fusion.cpp              |  4 ++
 .../common_optimizations/nop_elimination.cpp  |  2 +
 .../common_optimizations/pad_fusion.cpp       |  2 +
 .../pull_through_reduce.cpp                   |  2 +
 .../pull_transpose_through_fq.cpp             |  2 +
 .../random_uniform_fusion.cpp                 |  2 +
 .../common_optimizations/reduce_merge.cpp     |  2 +
 .../reverse_shape_and_type_infer.cpp          |  2 +
 .../common_optimizations/ric_fusion.cpp       |  4 ++
 .../simplify_shape_of_sub_graph.cpp           |  2 +
 ...plit_concat_pair_to_interpolate_fusion.cpp |  2 +
 .../strides_optimization.cpp                  |  4 ++
 .../transpose_sinking.cpp                     |  4 ++
 .../control_flow/unroll_if.cpp                |  2 +
 .../batch_norm_decomposition.cpp              |  2 +
 .../op_conversions/convert_divide.cpp         |  2 +
 .../convert_interpolate1_to_interpolate4.cpp  |  5 ++-
 .../convert_scatter_elements_to_scatter.cpp   |  2 +
 .../convert_slice_to_strided_slice.cpp        |  6 +++
 .../convert_softmax_downgrade.cpp             |  2 +
 .../op_conversions/einsum_decomposition.cpp   |  4 ++
 .../gather_normalize_negative_indices.cpp     |  5 ++-
 .../smart_reshape/matmul_sr.cpp               |  2 +
 .../smart_reshape/shape_of_const_folding.cpp  |  2 +
 .../smart_reshape/strided_slice_squeeze.cpp   |  9 ++++-
 .../transpose_sinking/ts_reduction.cpp        |  4 ++
 .../transpose_sinking/ts_squeeze.cpp          |  4 ++
 .../transpose_sinking/ts_unsqueeze.cpp        |  4 ++
 ...gather_normalize_negative_indices_test.cpp |  6 +++
 src/core/include/ngraph/validation_util.hpp   | 16 ++++++++
 .../include/openvino/core/validation_util.hpp | 16 ++++++++
 .../convolution_shape_inference_util.hpp      |  2 +
 .../gather_elements_shape_inference.hpp       |  5 ++-
 .../include/gather_shape_inference.hpp        |  2 +
 .../include/one_hot_shape_inference.hpp       |  6 +++
 .../include/pooling_shape_inference_util.hpp  |  4 ++
 .../include/range_shape_inference.hpp         |  2 +
 .../include/reduce_shape_inference.hpp        |  2 +
 .../include/region_yolo_shape_inference.hpp   |  2 +
 .../reverse_sequence_shape_inference.hpp      |  2 +
 .../include/roll_shape_inference.hpp          |  2 +
 ...catter_elements_update_shape_inference.hpp |  4 ++
 .../shuffle_channels_shape_inference.hpp      |  2 +
 .../include/slice_shape_inference.hpp         |  2 +
 .../include/slice_shape_inference_utils.hpp   |  2 +
 .../include/split_shape_inference.hpp         |  2 +
 .../include/squeeze_shape_inference.hpp       |  4 ++
 .../include/topk_shape_inference.hpp          |  2 +
 .../include/transpose_shape_inference.hpp     |  4 ++
 .../include/unsqueeze_shape_inference.hpp     |  4 ++
 src/core/shape_inference/include/utils.hpp    |  6 +++
 .../variadic_split_shape_inference.hpp        |  2 +
 src/core/src/bound_evaluate.cpp               |  5 ++-
 src/core/src/op/adaptive_avg_pool.cpp         |  2 +
 src/core/src/op/adaptive_max_pool.cpp         |  2 +
 src/core/src/op/avg_pool.cpp                  |  2 +
 src/core/src/op/batch_norm.cpp                |  6 ++-
 src/core/src/op/batch_to_space.cpp            |  4 ++
 src/core/src/op/binary_convolution.cpp        |  2 +
 src/core/src/op/bucketize.cpp                 |  2 +
 src/core/src/op/clamp.cpp                     |  2 +
 src/core/src/op/concat.cpp                    |  4 ++
 src/core/src/op/convert.cpp                   |  4 ++
 src/core/src/op/convolution.cpp               |  6 +++
 src/core/src/op/cos.cpp                       |  2 +
 src/core/src/op/cosh.cpp                      |  2 +
 src/core/src/op/ctc_loss.cpp                  |  2 +
 src/core/src/op/deformable_convolution.cpp    |  4 ++
 src/core/src/op/deformable_psroi_pooling.cpp  |  2 +
 src/core/src/op/depth_to_space.cpp            |  2 +
 src/core/src/op/detection_output.cpp          |  4 ++
 src/core/src/op/divide.cpp                    |  6 +++
 src/core/src/op/einsum.cpp                    |  2 +
 src/core/src/op/embedding_segments_sum.cpp    |  2 +
 src/core/src/op/exp.cpp                       |  2 +
 src/core/src/op/eye.cpp                       |  4 ++
 src/core/src/op/gather.cpp                    |  5 ++-
 src/core/src/op/gather_elements.cpp           |  2 +
 src/core/src/op/gather_tree.cpp               |  2 +
 src/core/src/op/gelu.cpp                      |  2 +
 src/core/src/op/greater_eq.cpp                |  2 +
 src/core/src/op/grid_sample.cpp               |  2 +
 src/core/src/op/group_conv.cpp                |  6 +++
 src/core/src/op/gru_cell.cpp                  |  2 +
 src/core/src/op/gru_sequence.cpp              |  2 +
 src/core/src/op/hsigmoid.cpp                  |  2 +
 src/core/src/op/hswish.cpp                    |  2 +
 src/core/src/op/if.cpp                        |  2 +
 src/core/src/op/interpolate.cpp               |  2 +
 src/core/src/op/logical_and.cpp               |  2 +
 src/core/src/op/logical_not.cpp               |  2 +
 src/core/src/op/logical_or.cpp                |  2 +
 src/core/src/op/loop.cpp                      | 12 ++++++
 src/core/src/op/lrn.cpp                       |  5 ++-
 src/core/src/op/max_pool.cpp                  |  6 +++
 src/core/src/op/mish.cpp                      |  2 +
 src/core/src/op/negative.cpp                  |  2 +
 src/core/src/op/non_max_suppression.cpp       | 26 +++++++++++++
 src/core/src/op/non_zero.cpp                  |  2 +
 src/core/src/op/normalize_l2.cpp              |  4 ++
 src/core/src/op/not_equal.cpp                 |  2 +
 src/core/src/op/one_hot.cpp                   |  2 +
 src/core/src/op/pad.cpp                       |  8 ++++
 src/core/src/op/prelu.cpp                     |  2 +
 src/core/src/op/prior_box.cpp                 |  4 ++
 src/core/src/op/prior_box_clustered.cpp       |  2 +
 src/core/src/op/psroi_pooling.cpp             |  2 +
 src/core/src/op/random_uniform.cpp            |  4 ++
 src/core/src/op/range.cpp                     |  2 +
 src/core/src/op/reduce_l1.cpp                 |  2 +
 src/core/src/op/reduce_l2.cpp                 |  2 +
 src/core/src/op/reduce_logical_and.cpp        |  3 ++
 src/core/src/op/reduce_logical_or.cpp         |  3 ++
 src/core/src/op/reduce_max.cpp                |  2 +
 src/core/src/op/reduce_mean.cpp               |  2 +
 src/core/src/op/reduce_min.cpp                |  2 +
 src/core/src/op/reduce_prod.cpp               |  2 +
 src/core/src/op/reduce_sum.cpp                |  2 +
 src/core/src/op/relu.cpp                      |  2 +
 src/core/src/op/reshape.cpp                   |  4 ++
 src/core/src/op/reverse.cpp                   |  2 +
 src/core/src/op/reverse_sequence.cpp          |  8 ++++
 src/core/src/op/roi_pooling.cpp               |  2 +
 src/core/src/op/roll.cpp                      |  2 +
 src/core/src/op/scatter_elements_update.cpp   |  8 ++++
 src/core/src/op/scatter_nd_update.cpp         |  4 ++
 src/core/src/op/scatter_update.cpp            |  4 ++
 src/core/src/op/select.cpp                    |  4 ++
 src/core/src/op/shape_of.cpp                  |  4 ++
 src/core/src/op/shuffle_channels.cpp          |  4 ++
 src/core/src/op/sigmoid.cpp                   |  2 +
 src/core/src/op/sign.cpp                      |  2 +
 src/core/src/op/sinh.cpp                      |  2 +
 src/core/src/op/slice.cpp                     |  4 ++
 src/core/src/op/softmax.cpp                   |  6 +++
 src/core/src/op/softplus.cpp                  |  2 +
 src/core/src/op/space_to_batch.cpp            |  2 +
 src/core/src/op/space_to_depth.cpp            |  2 +
 src/core/src/op/split.cpp                     |  8 ++++
 src/core/src/op/squeeze.cpp                   |  6 +++
 src/core/src/op/strided_slice.cpp             |  6 +++
 src/core/src/op/swish.cpp                     |  2 +
 src/core/src/op/tile.cpp                      |  4 ++
 src/core/src/op/topk.cpp                      |  2 +
 src/core/src/op/transpose.cpp                 |  2 +
 src/core/src/op/unique.cpp                    |  2 +
 src/core/src/op/unsqueeze.cpp                 |  8 ++++
 src/core/src/op/util/broadcast_base.cpp       |  9 ++++-
 .../src/op/util/embeddingbag_offsets_base.cpp |  2 +
 .../src/op/util/embeddingbag_packed_base.cpp  |  2 +
 src/core/src/op/util/evaluate_helpers.cpp     |  2 +
 src/core/src/op/util/gather_base.cpp          |  7 +++-
 src/core/src/op/util/reduction_base.cpp       |  4 ++
 src/core/src/op/util/scatter_base.cpp         |  4 ++
 src/core/src/op/util/topk_base.cpp            |  4 ++
 src/core/src/op/variadic_split.cpp            |  6 +++
 src/core/src/op/xor.cpp                       |  2 +
 src/core/tests/eval.cpp                       |  8 ++++
 src/core/tests/partial_shape.cpp              | 39 ++++++++++++++++++-
 src/core/tests/type_prop/variadic_split.cpp   |  2 +
 src/core/tests/validation_utils.cpp           |  4 ++
 src/frontends/ir/src/input_model.cpp          |  2 +
 src/frontends/onnx/frontend/src/op/clip.cpp   |  4 ++
 .../frontend/src/op/dequantize_linear.cpp     |  2 +
 .../onnx/frontend/src/op/flatten.cpp          |  2 +
 .../onnx/frontend/src/op/hardmax.cpp          |  4 ++
 .../onnx/frontend/src/op/log_softmax.cpp      |  4 ++
 .../onnx/frontend/src/op/lp_norm.cpp          |  2 +
 .../src/op/mean_variance_normalization.cpp    |  2 +
 .../onnx/frontend/src/op/quantize_linear.cpp  | 12 +++++-
 .../onnx/frontend/src/op/reverse_sequence.cpp |  4 ++
 src/frontends/onnx/frontend/src/op/scan.cpp   |  6 +++
 .../onnx/frontend/src/op/squeeze.cpp          |  2 +
 .../src/utils/arg_min_max_factory.cpp         |  2 +
 .../onnx/frontend/src/utils/convpool.cpp      |  2 +
 .../tensorflow/src/op/queue_dequeue.cpp       |  2 +
 .../include/helper_ops/sparse_segment_ops.hpp |  2 +
 .../tensorflow_common/include/utils.hpp       |  2 +
 .../tensorflow_common/src/op/max_pool.cpp     |  2 +
 .../tflite_quantize_resolver.cpp              | 11 +++++-
 .../dev_api/performance_heuristics.hpp        |  2 +
 .../legacy/src/ngraph_ops/convolution_ie.cpp  |  4 ++
 .../behavior/ov_plugin/core_integration.cpp   |  3 +-
 .../intel_gpu/src/graph/impls/ocl/pooling.cpp |  2 +
 src/plugins/intel_gpu/src/graph/pooling.cpp   |  2 +
 .../intel_gpu/src/plugin/ops/cum_sum.cpp      |  2 +
 src/plugins/intel_gpu/src/plugin/ops/dft.cpp  |  2 +
 .../intel_gpu/src/plugin/ops/interpolate.cpp  |  4 ++
 src/plugins/intel_gpu/src/plugin/ops/mvn.cpp  |  2 +
 .../plugin/ops/scatter_elements_update.cpp    |  2 +
 .../src/plugin/ops/shuffle_channels.cpp       |  2 +
 .../intel_gpu/src/plugin/ops/softmax.cpp      |  4 ++
 .../src/matchers/single_op.cpp                |  2 +
 .../subgraphs_dumper/src/op_cloner.cpp        |  4 ++
 .../src/subgraph/mul_conv_fusion.cpp          |  8 ++++
 .../quantized_convolution_batch_norm.cpp      |  2 +
 218 files changed, 811 insertions(+), 23 deletions(-)

diff --git a/src/common/low_precision_transformations/src/concat.cpp b/src/common/low_precision_transformations/src/concat.cpp
index 48e64a7772c7f7..e5fe59299ddb88 100644
--- a/src/common/low_precision_transformations/src/concat.cpp
+++ b/src/common/low_precision_transformations/src/concat.cpp
@@ -99,9 +99,11 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         [](const FakeQuantizeDequantization& value) { return !value.isLowPrecision(); });
 
     bool DqWithDifferentPrecision = someDqInLowPrecision && someDqInFpPrecision;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto axis = ngraph::normalize_axis(concat->get_friendly_name(),
         concat->get_axis(),
         concat->get_output_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     OutputVector dataNodes;
     NodeVector convertNodes;
@@ -214,7 +216,9 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
         return false;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outRank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (outPShape[normalizedAxis].is_dynamic()) {
         return false;
     }
diff --git a/src/common/low_precision_transformations/src/gather.cpp b/src/common/low_precision_transformations/src/gather.cpp
index daaad0b3b2710f..7079752192bc91 100644
--- a/src/common/low_precision_transformations/src/gather.cpp
+++ b/src/common/low_precision_transformations/src/gather.cpp
@@ -42,7 +42,9 @@ std::shared_ptr<opset1::Constant> gatherDeqConstant(
     }
 
     const int64_t axis = ov::as_type_ptr<opset1::Constant>(gather->get_input_node_shared_ptr(2))->cast_vector<int64_t>()[0];
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const size_t normalizedAxis = normalize_axis(gather->get_friendly_name(), axis, gather->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     // Dequantization channel matches with gather axis
     if (constantShape[normalizedAxis] != 1ul) {
@@ -165,7 +167,10 @@ bool GatherTransformation::canBeTransformed(const TransformationContext& context
             }
         }
         const int64_t axis = axisConstant->cast_vector<int64_t>()[0];
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const size_t normalizedAxis = normalize_axis(operation->get_friendly_name(), axis, operation->get_input_partial_shape(0).rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
+
         if (constantShape[normalizedAxis] != 1ul) {
             const auto indicesConstant = ov::as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1));
             if (indicesConstant == nullptr)
diff --git a/src/common/low_precision_transformations/src/reduce_base_transformation.cpp b/src/common/low_precision_transformations/src/reduce_base_transformation.cpp
index 87604bb193e316..27a8645884a2ba 100644
--- a/src/common/low_precision_transformations/src/reduce_base_transformation.cpp
+++ b/src/common/low_precision_transformations/src/reduce_base_transformation.cpp
@@ -48,7 +48,9 @@ bool ReduceBaseTransformation::canBeTransformed(const TransformationContext& con
         return false;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const std::vector<size_t> axes = ngraph::normalize_axes(reduce->get_friendly_name(), constData, inputRank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto deqByReducedConst = [&](const std::shared_ptr<Node>& eltwise) {
         const auto constShape = eltwise->get_shape();
diff --git a/src/common/low_precision_transformations/src/shuffle_channels.cpp b/src/common/low_precision_transformations/src/shuffle_channels.cpp
index 6979ac309bffd0..37c36284c4b73e 100644
--- a/src/common/low_precision_transformations/src/shuffle_channels.cpp
+++ b/src/common/low_precision_transformations/src/shuffle_channels.cpp
@@ -48,10 +48,12 @@ bool ShuffleChannelsTransformation::transform(TransformationContext& context, ng
         if (shape_size(constShape) == 1ul) {
             return NetworkHelper::toScalar(normalizedConst);
         } else {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             const size_t normalizedAxis = ngraph::normalize_axis(
                 shuffleChannels->get_friendly_name(),
                 shuffleChannels->get_axis(),
                 shuffleChannels->get_input_partial_shape(0).rank());
+            OPENVINO_SUPPRESS_DEPRECATED_END
 
             if (constShape[normalizedAxis] == 1ul) {
                 return normalizedConst;
diff --git a/src/common/low_precision_transformations/src/split.cpp b/src/common/low_precision_transformations/src/split.cpp
index aec4408d38d532..d49d79861cb08b 100644
--- a/src/common/low_precision_transformations/src/split.cpp
+++ b/src/common/low_precision_transformations/src/split.cpp
@@ -46,7 +46,9 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
     ngraph::copy_runtime_info(split, newSplit);
 
     const int64_t axis = ov::as_type_ptr<opset1::Constant>(split->get_input_node_shared_ptr(1))->cast_vector<int64_t>()[0];
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const size_t normalizedAxis = normalize_axis(split->get_friendly_name(), axis, split->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const size_t outputSize = newSplit->get_output_size();
 
     const auto splitConstant = [&](const std::shared_ptr<Node> operation) {
diff --git a/src/common/offline_transformations/src/compress_quantize_weigths.cpp b/src/common/offline_transformations/src/compress_quantize_weigths.cpp
index 819ca0752ee2ec..a1d89520cbf481 100644
--- a/src/common/offline_transformations/src/compress_quantize_weigths.cpp
+++ b/src/common/offline_transformations/src/compress_quantize_weigths.cpp
@@ -70,7 +70,9 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() {
         auto fq_users = fq->get_users();
         if (fq_users.size() == 1 && has_dequantization_subgraph(fq_users[0])) {
             auto& first_convert = fq_users[0];
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto new_weights = ov::get_constant_from_source(first_convert)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 new_weights->set_friendly_name(first_convert->get_friendly_name());
                 replace_node(first_convert, new_weights);
                 copy_runtime_info(first_convert, new_weights);
@@ -108,7 +110,9 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() {
             // Convert quantized weights to low precision type
             std::shared_ptr<Node> new_weights = std::make_shared<opset8::Convert>(quantize, quantized_type);
             // Constant fold quantized weights
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto constant = ov::get_constant_from_source(new_weights)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 new_weights = constant;
             } else {
                 return false;
@@ -146,17 +150,26 @@ ngraph::pass::CompressQuantizeWeights::CompressQuantizeWeights() {
             std::shared_ptr<Node> scale = std::make_shared<opset8::Divide>(output_range, input_range);
             auto descaled_output_low = std::make_shared<opset8::Divide>(output_low, scale);
             std::shared_ptr<Node> shift = std::make_shared<opset8::Subtract>(new_output_low, descaled_output_low);
-            if (auto constant = ov::get_constant_from_source(scale))
+            OPENVINO_SUPPRESS_DEPRECATED_START
+            if (auto constant = ov::get_constant_from_source(scale)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 scale = constant;
+            }
             auto zero = op::Constant::create(input_type, Shape{}, {0});
             auto scale_eq_zero = std::make_shared<opset8::Equal>(scale, zero);
             // shift equals to input_low - output_low / scale
             // for positions where scale == 0, we put zero as shift
             std::shared_ptr<Node> zero_point = std::make_shared<opset8::Select>(scale_eq_zero, zero, shift);
-            if (auto constant = ov::get_constant_from_source(zero_point))
+            OPENVINO_SUPPRESS_DEPRECATED_START
+            if (auto constant = ov::get_constant_from_source(zero_point)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 zero_point = constant;
-            if (auto constant = ov::get_constant_from_source(scale))
+            }
+            OPENVINO_SUPPRESS_DEPRECATED_START
+            if (auto constant = ov::get_constant_from_source(scale)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 scale = constant;
+            }
             auto convert_to_high_prec = std::make_shared<opset8::Convert>(new_weights, input_type);
             auto sub = register_new_node<opset8::Subtract>(convert_to_high_prec, zero_point);
             auto mul = register_new_node<opset8::Multiply>(sub, scale);
@@ -206,7 +219,9 @@ ngraph::pass::ZeroPointOptimizer::ZeroPointOptimizer() {
             zero_point,
             std::make_shared<opset8::Convert>(int8_zero_point, convert->get_element_type()));
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto adj_zero_point_const = ov::get_constant_from_source(adj_zero_point);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!adj_zero_point_const)
             return false;
         auto adj_zero_point_val = adj_zero_point_const->cast_vector<float>();
@@ -222,7 +237,9 @@ ngraph::pass::ZeroPointOptimizer::ZeroPointOptimizer() {
                                               convert->get_element_type()),
             adj_zero_point);
         auto diff = std::make_shared<opset8::Subtract>(sub, transformed);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto diff_const = ov::get_constant_from_source(diff);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!diff_const)
             return false;
         auto diff_val = diff_const->cast_vector<float>();
@@ -233,10 +250,13 @@ ngraph::pass::ZeroPointOptimizer::ZeroPointOptimizer() {
             return false;
 
         std::shared_ptr<Node> new_weights = std::make_shared<opset8::Subtract>(weights, int8_zero_point);
-        if (auto constant = ov::get_constant_from_source(new_weights))
+        OPENVINO_SUPPRESS_DEPRECATED_START
+        if (auto constant = ov::get_constant_from_source(new_weights)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             new_weights = constant;
-        else
+        } else {
             return false;
+        }
         new_weights->set_friendly_name(weights->get_friendly_name());
         replace_node(weights, new_weights);
 
diff --git a/src/common/offline_transformations/src/pruning/init_masks.cpp b/src/common/offline_transformations/src/pruning/init_masks.cpp
index e1c22d1d0f1aef..709090e9561ba4 100644
--- a/src/common/offline_transformations/src/pruning/init_masks.cpp
+++ b/src/common/offline_transformations/src/pruning/init_masks.cpp
@@ -86,7 +86,9 @@ class ngraph::pass::init_masks::InitMatMulMask : public MatcherPass {
             while (!ngraph::is_type<opset6::Constant>(cur_node) && cur_node->inputs().size()) {
                 weights_calculation_nodes.push_back(cur_node);
                 if (ngraph::is_type<opset6::Transpose>(cur_node)) {
+                    OPENVINO_SUPPRESS_DEPRECATED_START
                     const auto forward_order = get_constant_from_source(cur_node->get_input_node_shared_ptr(1));
+                    OPENVINO_SUPPRESS_DEPRECATED_END
                     if (!forward_order)
                         return false;
                     const auto forward_order_vec = forward_order->cast_vector<int64_t>();
diff --git a/src/common/offline_transformations/src/pruning/propagate_masks.cpp b/src/common/offline_transformations/src/pruning/propagate_masks.cpp
index 717443521caa73..6f289cbff6cb24 100644
--- a/src/common/offline_transformations/src/pruning/propagate_masks.cpp
+++ b/src/common/offline_transformations/src/pruning/propagate_masks.cpp
@@ -397,7 +397,9 @@ class ngraph::pass::mask_propagation::GroupConvolutionReshape : public MatcherPa
                 return false;
             }
 
+            OPENVINO_SUPPRESS_DEPRECATED_START
             const auto constant = get_constant_from_source(m_shape.get_node_shared_ptr());
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (!constant) {
                 NGRAPH_DEBUG << "Can't get constant from source node " << m_shape.get_node()->get_friendly_name();
                 return false;
@@ -1130,7 +1132,9 @@ class ngraph::pass::mask_propagation::Reshape : public MatcherPass {
 
             auto constant = std::dynamic_pointer_cast<opset10::Constant>(m_weights.get_node_shared_ptr());
             if (!constant) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 constant = get_constant_from_source(m_weights.get_node_shared_ptr());
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 if (!constant) {
                     NGRAPH_DEBUG << "Can't process reshape node " << m_output.get_node()->get_friendly_name()
                                  << " with no constant node " << m_weights.get_node()->get_friendly_name()
@@ -1375,7 +1379,9 @@ class ngraph::pass::mask_propagation::Transpose : public MatcherPass {
             const auto& m_weights = pattern_map.at(weights);
             const auto& m_output = pattern_map.at(transpose);
 
+            OPENVINO_SUPPRESS_DEPRECATED_START
             const auto input_order_node = get_constant_from_source(m_weights.get_node_shared_ptr());
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (!input_order_node) {
                 NGRAPH_DEBUG << "Can't process transpose node " << m_output.get_node()->get_friendly_name()
                              << " with no constant node " << m_weights.get_node()->get_friendly_name()
diff --git a/src/common/offline_transformations/src/pruning/shrink_weights.cpp b/src/common/offline_transformations/src/pruning/shrink_weights.cpp
index 39a18a647e4693..f0520814cbdb81 100644
--- a/src/common/offline_transformations/src/pruning/shrink_weights.cpp
+++ b/src/common/offline_transformations/src/pruning/shrink_weights.cpp
@@ -37,7 +37,9 @@ static bool is_static_reshape_op(std::shared_ptr<ov::Node> node) {
     if (input.get_partial_shape().is_dynamic() || shape.get_partial_shape().is_dynamic())
         return false;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape_const_op = get_constant_from_source(shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (!output_shape_const_op)
         return false;
 
@@ -59,7 +61,9 @@ static bool maybe_adopt_reshape_node(std::shared_ptr<ov::Node> reshape, ngraph::
         return false;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto constant = get_constant_from_source(shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (!constant) {
         return false;
     }
@@ -314,7 +318,9 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
                 }
             }
             // Trying to fold sequence of Gather ops to avoid additional constant folding.
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto folded_const = ngraph::get_constant_from_source(last_output)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 last_output = folded_const;
             }
             // as we insert Gather operations after Constant we need to reconnect all
diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp
index af962adaa64432..dc737d78b794a1 100644
--- a/src/common/snippets/src/pass/collapse_subgraph.cpp
+++ b/src/common/snippets/src/pass/collapse_subgraph.cpp
@@ -142,7 +142,9 @@ auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
         int64_t axis = -1;
         const auto rank = n->get_input_partial_shape(0).rank();
         if (const auto softmax_v8 = ngraph::as_type_ptr<const ov::op::v8::Softmax>(n)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axis = ngraph::normalize_axis(n->get_friendly_name(), softmax_v8->get_axis(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else if (const auto softmax_v1 = ngraph::as_type_ptr<const ov::op::v1::Softmax>(n)) {
             axis = softmax_v1->get_axis();
         } else {
diff --git a/src/common/snippets/src/pass/mha_tokenization.cpp b/src/common/snippets/src/pass/mha_tokenization.cpp
index 69a166140b4093..12161d5214c03c 100644
--- a/src/common/snippets/src/pass/mha_tokenization.cpp
+++ b/src/common/snippets/src/pass/mha_tokenization.cpp
@@ -184,7 +184,9 @@ ngraph::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets() {
         int64_t axis = 0;
         const auto rank = interm_op->get_input_partial_shape(0).rank();
         if (const auto softmax_v8 = ngraph::as_type_ptr<ngraph::opset8::Softmax>(interm_op)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axis = ngraph::normalize_axis(interm_op->get_friendly_name(), softmax_v8->get_axis(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else if (const auto softmax_v1 = ngraph::as_type_ptr<ngraph::opset1::Softmax>(interm_op)) {
             axis = softmax_v1->get_axis();
         } else {
diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp
index a0259a4061b41e..a4a7ad77773327 100644
--- a/src/common/snippets/src/pass/softmax_decomposition.cpp
+++ b/src/common/snippets/src/pass/softmax_decomposition.cpp
@@ -33,7 +33,9 @@ ngraph::snippets::pass::SoftmaxDecomposition::SoftmaxDecomposition(const size_t
 
         int64_t axis = 0;
         if (const auto softmax_v8 = ngraph::as_type_ptr<const ov::op::v8::Softmax>(root)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axis = ngraph::normalize_axis(root->get_friendly_name(), softmax_v8->get_axis(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else if (const auto softmax_v1 = ngraph::as_type_ptr<const ov::op::v1::Softmax>(root)) {
             axis = softmax_v1->get_axis();
         } else {
diff --git a/src/common/snippets/src/pass/softmax_reshape_elimination.cpp b/src/common/snippets/src/pass/softmax_reshape_elimination.cpp
index f770f4e80668cd..7229b29cc54ccd 100644
--- a/src/common/snippets/src/pass/softmax_reshape_elimination.cpp
+++ b/src/common/snippets/src/pass/softmax_reshape_elimination.cpp
@@ -34,7 +34,9 @@ ngraph::snippets::pass::SoftmaxReshapeElimination::SoftmaxReshapeElimination() {
             const auto softmax_rank = softmax->get_input_partial_shape(0).rank();
             int64_t axis = 0;
             if (const auto softmax_v8 = ngraph::as_type_ptr<const ov::op::v8::Softmax>(softmax)) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 axis = ngraph::normalize_axis(softmax->get_friendly_name(), softmax_v8->get_axis(), softmax_rank);
+                OPENVINO_SUPPRESS_DEPRECATED_END
             } else if (const auto softmax_v1 = ngraph::as_type_ptr<const ov::op::v1::Softmax>(softmax)) {
                 axis = softmax_v1->get_axis();
             } else {
diff --git a/src/common/transformations/src/ov_ops/augru_cell.cpp b/src/common/transformations/src/ov_ops/augru_cell.cpp
index ba9179b60317bf..c98d9f77b6b78a 100644
--- a/src/common/transformations/src/ov_ops/augru_cell.cpp
+++ b/src/common/transformations/src/ov_ops/augru_cell.cpp
@@ -63,7 +63,9 @@ void ov::op::internal::AUGRUCell::validate_and_infer_types() {
                           "Element types for inputs do not match.");
 
     // Get input partial shape for all inputs
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic(2)};
     shape_infer(this, input_shapes, output_shapes);
 
diff --git a/src/common/transformations/src/ov_ops/augru_sequence.cpp b/src/common/transformations/src/ov_ops/augru_sequence.cpp
index 1ff7f41b3e8216..f83b8028474536 100644
--- a/src/common/transformations/src/ov_ops/augru_sequence.cpp
+++ b/src/common/transformations/src/ov_ops/augru_sequence.cpp
@@ -61,7 +61,9 @@ void ov::op::internal::AUGRUSequence::validate_and_infer_types() {
                               element::Type::merge(result_et, result_et, get_input_element_type(6)),
                           "Element types for inputs do not match.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic(4), ov::PartialShape::dynamic(3)};
     shape_infer(this, input_shapes, output_shapes);
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp
index 38d4e20e360f25..4f18d4a8d33707 100644
--- a/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp
@@ -105,11 +105,15 @@ ov::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() {
         }
 
         auto input_low_sub = std::make_shared<opset5::Subtract>(fq->input_value(1), new_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_sub);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_input_low)
             new_input_low = input_low_sub;
         auto input_high_sub = std::make_shared<opset5::Subtract>(fq->input_value(2), new_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_sub);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_input_high)
             new_input_high = input_high_sub;
         auto new_fq =
diff --git a/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp b/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp
index 613f31f60328d4..4fe14a0df81346 100644
--- a/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp
@@ -112,7 +112,9 @@ ov::pass::ConvToBinaryConv::ConvToBinaryConv() {
                 weights_reduced,
                 opset5::Constant::create(element::i64, Shape{weights_reduced_shape.size()}, weights_reduced_shape),
                 false);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             weights_reduced_reshaped = ngraph::get_constant_from_source(weights_reduced_reshaped);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             auto add = std::make_shared<opset5::Add>(new_conv, weights_reduced_reshaped);
             auto mul = std::make_shared<opset5::Multiply>(add, opset5::Constant::create(element::f32, Shape{}, {0.5}));
             copy_runtime_info(conv, {new_conv, add, mul});
diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp
index 49aa008ce1718b..65be131c1f36c0 100644
--- a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp
@@ -151,10 +151,14 @@ ov::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() {
         if (out_high_shape.rank().is_dynamic() || out_high_shape.rank().get_length() > data_shape.rank().get_length())
             return false;
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> const_out_low = get_constant_from_source(new_out_low);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (const_out_low)
             new_out_low = const_out_low;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> const_out_high = get_constant_from_source(new_out_high);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (const_out_high)
             new_out_high = const_out_high;
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp
index 3d00d41c0ea2c5..69e1f023b1ff6b 100644
--- a/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp
@@ -102,7 +102,9 @@ ov::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
         auto get_adjusted_output_range = [&](const Output<Node>& node) -> std::shared_ptr<Node> {
             auto ret = std::make_shared<opset4::Multiply>(node, mul_constant);
             copy_runtime_info(node.get_node_shared_ptr(), ret);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto constant = get_constant_from_source(ret);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (constant)
                 return constant;
             return ret;
@@ -113,7 +115,9 @@ ov::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
                                                                  fq_node->input_value(2),
                                                                  get_adjusted_output_range(original_output_low),
                                                                  get_adjusted_output_range(original_output_high)});
+        OPENVINO_SUPPRESS_DEPRECATED_START
         bool fq_on_weights = is_type<opset4::Constant>(data.get_node()) || get_constant_from_source(data) != nullptr;
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!fq_on_weights && transformation_callback(new_fq_node))
             return false;
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp
index 8d8b8cdbb18156..fd77e9b90ff75d 100644
--- a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp
@@ -34,8 +34,11 @@ ov::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() {
             weights,
             opset8::Constant::create(element::i32, {transpose_order.size()}, transpose_order));
         if (ov::is_type<opset8::Constant>(weights.get_node())) {
-            if (auto constant = get_constant_from_source(transpose))
+            OPENVINO_SUPPRESS_DEPRECATED_START
+            if (auto constant = get_constant_from_source(transpose)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 transpose = constant;
+            }
         }
         auto new_matmul = std::make_shared<opset8::MatMul>(pattern_value_map.at(data_pattern),
                                                            transpose,
diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp
index 7208ec88d31c64..8772f5d00f0b1c 100644
--- a/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp
@@ -130,7 +130,9 @@ static std::shared_ptr<Node> fuse_const_to_weights(const std::shared_ptr<Node>&
         auto transpose =
             std::make_shared<opset8::Transpose>(new_const,
                                                 opset8::Constant::create(element::i64, Shape{perm.size()}, perm));
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return get_constant_from_source(transpose);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     };
 
     // If weights meant to be transposed - we need to also transpose constant
@@ -168,7 +170,9 @@ pass::MatMulMultiplyFusion::MatMulMultiplyFusion() {
         // Constantfold new weights, only if old weights is a constant node.
         // To make sure that subgraphs with e.g. FakeQuantize don't get constant folded here.
         if (ov::is_type<opset8::Constant>(weights.get_node())) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto constant = get_constant_from_source(new_weights)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 new_weights = constant;
             }
         }
diff --git a/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp
index 4ef9098b6d0a18..9a63379c0f0199 100644
--- a/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp
@@ -47,7 +47,9 @@ ov::pass::MultiplyConvolutionFusion::MultiplyConvolutionFusion() {
         }
 
         auto weights_multiply = std::make_shared<opset8::Multiply>(weights, mul_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_weights = get_constant_from_source(weights_multiply);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_weights)
             new_weights = weights_multiply;
 
@@ -112,7 +114,9 @@ ov::pass::MultiplyGroupConvolutionFusion::MultiplyGroupConvolutionFusion() {
         }
 
         auto weights_multiply = std::make_shared<opset8::Multiply>(weights, mul_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_weights = get_constant_from_source(weights_multiply);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_weights)
             new_weights = weights_multiply;
 
@@ -179,7 +183,9 @@ ov::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackpropData
         }
 
         auto weights_multiply = std::make_shared<opset8::Multiply>(weights, mul_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_weights = get_constant_from_source(weights_multiply);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_weights)
             new_weights = weights_multiply;
 
@@ -249,7 +255,9 @@ ov::pass::MultiplyGroupConvolutionBackpropDataFusion::MultiplyGroupConvolutionBa
         }
 
         auto weights_multiply = std::make_shared<opset8::Multiply>(weights, mul_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_weights = get_constant_from_source(weights_multiply);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_weights)
             new_weights = weights_multiply;
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp
index 605888a3da8236..9538d1a3d7f01c 100644
--- a/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp
@@ -99,11 +99,15 @@ ov::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
         }
 
         auto input_low_div = std::make_shared<opset5::Divide>(fq->input_value(1), new_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_div);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_input_low)
             new_input_low = input_low_div;
         auto input_high_div = std::make_shared<opset5::Divide>(fq->input_value(2), new_const);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_div);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!new_input_high)
             new_input_high = input_high_div;
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
index 786c37dea8235b..52acd6c4628d20 100644
--- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@@ -319,8 +319,10 @@ pass::EliminatePad::EliminatePad() {
     matcher_pass_callback callback = [=](pattern::Matcher& m) {
         auto pad = m.get_match_root();
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto pad_begin_const = get_constant_from_source(pad->input_value(1));
         auto pad_end_const = get_constant_from_source(pad->input_value(2));
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         if (!pad_begin_const || !pad_end_const) {
             return false;
diff --git a/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp
index f9b59e44de8a64..dd71c408907af5 100644
--- a/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp
@@ -27,7 +27,9 @@ static bool can_be_fused(const std::shared_ptr<opset5::Pad>& pad,
     if (!node)
         return false;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto pad_value_const = ov::get_constant_from_source(pad_value_node);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (!pad_value_const)
         return false;
     auto pad_value = pad_value_const->cast_vector<float>()[0];
diff --git a/src/common/transformations/src/transformations/common_optimizations/pull_through_reduce.cpp b/src/common/transformations/src/transformations/common_optimizations/pull_through_reduce.cpp
index 5a88b007341d1d..db0c3cf3852574 100644
--- a/src/common/transformations/src/transformations/common_optimizations/pull_through_reduce.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/pull_through_reduce.cpp
@@ -128,9 +128,11 @@ ov::pass::PullUnsqueezeThroughReduce::PullUnsqueezeThroughReduce() {
         }
 
         auto unsqueeze_axes_val = unsqueeze_axes_input->cast_vector<int64_t>();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         normalize_axes(unsqueeze_node.get(),
                        unsqueeze_node->get_output_partial_shape(0).rank().get_length(),
                        unsqueeze_axes_val);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto reduce_axes_val = reduce_node->get_reduction_axes().to_vector();
 
         if (have_same_axes(unsqueeze_axes_val, reduce_axes_val)) {
diff --git a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
index 699444a81af7b0..804a07f9e6e6e7 100644
--- a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
@@ -62,7 +62,9 @@ ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
                 new_ops.push_back(fq_input.get_node_shared_ptr());
             }
             fq_input = std::make_shared<opset1::Transpose>(fq_input, transpose->input_value(1));
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto constant = get_constant_from_source(fq_input)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 fq_input = constant;
             }
             ngraph::copy_runtime_info(transpose, fq_input.get_node_shared_ptr());
diff --git a/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp
index 4137f87a9c8176..9ef43710558580 100644
--- a/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp
@@ -59,8 +59,10 @@ ov::pass::RandomUniformFusion::RandomUniformFusion() {
         const auto new_mul_add1 = mul_add_ptr->clone_with_new_inputs({ru->input_value(1), new_const});
         const auto new_mul_add2 = mul_add_ptr->clone_with_new_inputs({ru->input_value(2), new_const});
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto& folded_const1 = ngraph::get_constant_from_source(new_mul_add1);
         const auto& folded_const2 = ngraph::get_constant_from_source(new_mul_add2);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         const auto new_ru = ru->clone_with_new_inputs(
             {data, folded_const1 ? folded_const1 : new_mul_add1, folded_const2 ? folded_const2 : new_mul_add2});
diff --git a/src/common/transformations/src/transformations/common_optimizations/reduce_merge.cpp b/src/common/transformations/src/transformations/common_optimizations/reduce_merge.cpp
index 2d7f189e9b458f..55951b75503671 100644
--- a/src/common/transformations/src/transformations/common_optimizations/reduce_merge.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/reduce_merge.cpp
@@ -58,7 +58,9 @@ bool fuse_reduce_operations(const std::shared_ptr<Node>& node) {
     std::shared_ptr<Node> axes =
         std::make_shared<opset9::Concat>(OutputVector{top_reduce->input_value(1), bottom_reduce->input_value(1)},
                                          int64_t(0));
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto constant = ov::get_constant_from_source(axes)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         axes = constant;
     }
     axes->set_friendly_name(bottom_reduce->get_friendly_name() + "/Axes");
diff --git a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp
index 91e24ab2dacc0f..5ef4ebfeea5f4c 100644
--- a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp
@@ -251,7 +251,9 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptr<ov::
             is_changed |= inherit_output_shape(op, {0});
             is_changed |= inherit_output_type(op, {1});
         } else if (std::dynamic_pointer_cast<Transpose>(op)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto transpose_order = get_constant_from_source(op->input_value(1));
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (output_shape.rank().is_static()) {
                 if (transpose_order) {
                     // set more precise dimensions during reverse infer
diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
index ecfd67225788c0..261d714016ed85 100644
--- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
@@ -279,7 +279,9 @@ class Gather : public ov::pass::MatcherPass {
             const auto& pattern_map = m.get_pattern_value_map();
             const auto& output = pattern_map.at(pattern_root);
 
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto axis = ov::get_constant_from_source(pattern_map.at(axis_p));
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (!axis)
                 return false;
 
@@ -291,7 +293,9 @@ class Gather : public ov::pass::MatcherPass {
                 return true;
             }
 
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto order = ov::get_constant_from_source(pattern_map.at(indices_p));
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (!order)
                 return false;
 
diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
index 7fee95fddff78d..20ee9cf4072ced 100644
--- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
@@ -147,7 +147,9 @@ ov::pass::GatherNopElimination::GatherNopElimination() {
             return false;
         std::vector<int64_t> expected_vector(number_of_indices);
         std::iota(expected_vector.begin(), expected_vector.end(), 0);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (const auto& indices = get_constant_from_source(gather->input_value(1))) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             const auto& indices_values = indices->cast_vector<int64_t>();
             if (indices_values != expected_vector)
                 return false;
diff --git a/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp
index af17b6eafa26db..69494e13013bcd 100644
--- a/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp
@@ -206,7 +206,9 @@ ov::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFusion
         std::shared_ptr<Node> sizes_node;
 
         if (use_shape_for_elimination) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             sizes_node = get_constant_from_source(cast_mul_result_to_int);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else {
             disable_constant_folding(shape_node);
         }
diff --git a/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp b/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp
index 25dc8300f674e9..9add3b590b2830 100644
--- a/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp
@@ -51,7 +51,9 @@ static void insert_pooling(const Output<Node>& first, Input<Node>& second, const
         const auto ones = rg.make<Constant>(element::i64, Shape{diff}, vector<int64_t>(diff, 1));
         const auto current_shape = rg.make<ShapeOf>(first);
         shared_ptr<Node> new_shape = rg.make<Concat>(OutputVector{ones, current_shape}, 0);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (const auto constant_new_shape = get_constant_from_source(new_shape)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             rg.add(constant_new_shape);
             new_shape = constant_new_shape;
         }
@@ -65,7 +67,9 @@ static void insert_pooling(const Output<Node>& first, Input<Node>& second, const
         iota(axes.begin(), axes.end(), 0);
         new_node = rg.make<Squeeze>(new_node, rg.make<Constant>(element::u64, Shape{diff}, axes));
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto constant_new_node = get_constant_from_source(new_node)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         rg.add(constant_new_node);
         new_node = constant_new_node;
     }
diff --git a/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp b/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp
index 6fb9a12cc6e095..62f794270e1fba 100644
--- a/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp
@@ -89,7 +89,9 @@ ov::pass::TransposeEltwise::TransposeEltwise() {
 
         if (ov::shape_size(shape) != 1) {
             eltwise_const_input = std::make_shared<opset6::Transpose>(eltwise_const_input, transpose->input_value(1));
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (auto const_node = ov::get_constant_from_source(eltwise_const_input)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 eltwise_const_input = const_node;
             }
         }
@@ -167,9 +169,11 @@ ov::pass::TransposeReduction::TransposeReduction() {
         if (!transpose_order || !reduction_axes)
             return false;
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto& non_negative_axes = normalize_axes(reduction->get_friendly_name(),
                                                        reduction_axes->cast_vector<int64_t>(),
                                                        reduction->get_input_partial_shape(0).rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         reduction_axes = opset6::Constant::create(ngraph::element::i64, {non_negative_axes.size()}, non_negative_axes);
 
         ngraph::NodeVector new_ops;
diff --git a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp
index 05c97fb719a32f..b8507c7842b2b1 100644
--- a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp
+++ b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp
@@ -30,7 +30,9 @@ bool ov::pass::UnrollIf::run_on_model(const std::shared_ptr<ngraph::Function>& f
             continue;
         }
         Output<Node> cond = if_node->input_value(0);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto cond_is_const = ngraph::get_constant_from_source(cond);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (!cond_is_const) {
             continue;
         }
diff --git a/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp
index 39bc2786ae7077..52b936f42e54ff 100644
--- a/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp
@@ -78,12 +78,14 @@ ov::pass::BatchNormDecomposition::BatchNormDecomposition() {
             mean_aligned,
             opset5::Constant::create(mean_aligned->get_output_element_type(0), Shape{}, {-1}));
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (auto constant = ov::get_constant_from_source(beta_aligned))
             beta_aligned = constant;
         if (auto constant = ov::get_constant_from_source(mean_negative))
             mean_negative = constant;
         if (auto constant = ov::get_constant_from_source(gamma_div_scale_aligned))
             gamma_div_scale_aligned = constant;
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         // input_sub_mean = input + mean * -1
         auto input_sub_mean = register_new_node<opset5::Add>(m_input, mean_negative);
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp b/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp
index 66bf50691b6172..f6702755edeb63 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp
@@ -29,7 +29,9 @@ bool convert_divide(std::shared_ptr<ngraph::Node> node) {
         ngraph::op::Constant::create(div->get_input_element_type(1), ngraph::Shape{}, {-1}));
 
     if (std::dynamic_pointer_cast<ngraph::op::Constant>(div->get_input_node_shared_ptr(1))) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (auto const_pow = ngraph::get_constant_from_source(pow)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             pow = const_pow;
         } else {
             NGRAPH_DEBUG << "ConvertDivide has failed due to unsupported evaluate type in " << pow.get();
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
index f7fe1905b102cb..48c506f0586975 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
@@ -34,8 +34,11 @@ ov::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolate4()
             element::f32);
 
         std::shared_ptr<Node> scales = std::make_shared<opset1::Divide>(out_dims, in_dims);
-        if (const auto& constant = ov::get_constant_from_source(scales))
+        OPENVINO_SUPPRESS_DEPRECATED_START
+        if (const auto& constant = ov::get_constant_from_source(scales)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             scales = constant;
+        }
         auto axisConstant = opset1::Constant::create(ngraph::element::i64, {axes.size()}, axes);
 
         ov::opset4::Interpolate::InterpolateAttrs attrsV4;
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp b/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp
index b17b761ab690ae..9def4bde880ba1 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp
@@ -56,9 +56,11 @@ ov::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() {
             return false;
         }
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const size_t axis = ngraph::normalize_axes(scatter->get_friendly_name(),
                                                    axis_const->cast_vector<int64_t>(),
                                                    data_pshape.rank())[0];
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         struct Range {
             uint64_t l, r;
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp b/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp
index bc6ee2c6022b66..f85781f97b2b67 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp
@@ -78,9 +78,11 @@ ov::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) {
         std::shared_ptr<opset8::Constant> step_const;
 
         if (use_shapes) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             start_const = get_constant_from_source(slice_node->input_value(1));
             stop_const = get_constant_from_source(slice_node->input_value(2));
             step_const = get_constant_from_source(slice_node->input_value(3));
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else {
             start_const = std::dynamic_pointer_cast<opset8::Constant>(slice_node->input_value(1).get_node_shared_ptr());
             stop_const = std::dynamic_pointer_cast<opset8::Constant>(slice_node->input_value(2).get_node_shared_ptr());
@@ -93,10 +95,12 @@ ov::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) {
 
         std::shared_ptr<opset8::Constant> axes_const;
         if (slice_node->get_input_size() > 4) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axes_const =
                 use_shapes
                     ? get_constant_from_source(slice_node->input_value(4))
                     : std::dynamic_pointer_cast<opset8::Constant>(slice_node->input_value(4).get_node_shared_ptr());
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else {
             axes_const = slice_node->get_default_const_axes(start_input);
         }
@@ -106,7 +110,9 @@ ov::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) {
         const auto& data_shape = slice_node->get_input_partial_shape(0);
         auto axes_vec = axes_const->cast_vector<int64_t>();
         if (data_shape.rank().is_static()) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto norm_axes_vec = normalize_axes(slice_node->get_friendly_name(), axes_vec, data_shape.rank());
+            OPENVINO_SUPPRESS_DEPRECATED_END
             axes_vec = std::vector<int64_t>(norm_axes_vec.begin(), norm_axes_vec.end());
         } else {
             const bool need_normalization = std::any_of(axes_vec.begin(), axes_vec.end(), [](int64_t axis) {
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp
index 493b098b0d43d0..ecf49f70a0d813 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp
@@ -25,7 +25,9 @@ ov::pass::ConvertSoftMax8ToSoftMax1::ConvertSoftMax8ToSoftMax1() {
 
         auto v8_axis = softmax_v8_node->get_axis();
         auto rank = softmax_v8_node->get_input_partial_shape(0).rank().get_length();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto v1_axis = static_cast<size_t>(ov::normalize_axis(softmax_v8_node->description(), v8_axis, rank));
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         auto softmax_v1_node = std::make_shared<opset1::Softmax>(softmax_v8_node->input_value(0), v1_axis);
         softmax_v1_node->set_friendly_name(softmax_v8_node->get_friendly_name());
diff --git a/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp
index 06c5699b94ab4e..5186ac7bde8804 100644
--- a/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp
@@ -295,7 +295,9 @@ ngraph::Output<ngraph::Node> reshape_input_for_matmul(const ngraph::Output<ngrap
     // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly
     // in order to prevent repeated computing during constant-folding pass
     std::shared_ptr<ov::opset7::Reshape> reshaped_input_op;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto new_shape_const = ngraph::get_constant_from_source(new_shape_op)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         reshaped_input_op = std::make_shared<ov::opset7::Reshape>(input_node, new_shape_const, false);
         subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const});
     } else {
@@ -660,7 +662,9 @@ void contract_two_inputs(ov::pass::EinsumDecomposition* einsum_decompose_ptr,
         // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly
         // in order to prevent repeated computing during constant-folding pass
         std::shared_ptr<ov::opset7::Reshape> result_op;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (auto new_shape_const = ngraph::get_constant_from_source(result_shape_op)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             result_op = std::make_shared<ov::opset7::Reshape>(matmul->output(0), new_shape_const, false);
             subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const});
         } else {
diff --git a/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp b/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp
index 2e41b627b87ad6..bef1d5f14d8be3 100644
--- a/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp
@@ -66,8 +66,11 @@ ov::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNormali
                                                  ov::opset7::Constant::create(input_type, Shape{}, {0}));
 
         std::shared_ptr<Node> add = std::make_shared<ov::opset7::Add>(input_gather, indices_constant);
-        if (auto folded_const = ngraph::get_constant_from_source(add))
+        OPENVINO_SUPPRESS_DEPRECATED_START
+        if (auto folded_const = ngraph::get_constant_from_source(add)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             add = folded_const;
+        }
         gather->input(1).replace_source_output(add);
 
         ngraph::copy_runtime_info(gather, {shape_of, input_gather, add});
diff --git a/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp b/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp
index 405f825a4fa6ca..aa8c4fa0d3f3b5 100644
--- a/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp
+++ b/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp
@@ -35,7 +35,9 @@ bool relax_hc_reshape_followed_by_matmul(const ngraph::pattern::PatternValueMap&
 
     const auto& raw_idx =
         reshape_is_A_input ? (matmul->get_transpose_b() ? -1 : -2) : (matmul->get_transpose_a() ? -2 : -1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto& idx = ngraph::normalize_axes(matmul->description(), {raw_idx}, reshape_rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto& C =
         std::make_shared<ov::opset4::Gather>(std::make_shared<ov::opset4::ShapeOf>(shape_source),
                                              ov::opset4::Constant::create(ngraph::element::i64, {idx.size()}, idx),
diff --git a/src/common/transformations/src/transformations/smart_reshape/shape_of_const_folding.cpp b/src/common/transformations/src/transformations/smart_reshape/shape_of_const_folding.cpp
index fa343c42fc4356..13166d94ae3ae2 100644
--- a/src/common/transformations/src/transformations/smart_reshape/shape_of_const_folding.cpp
+++ b/src/common/transformations/src/transformations/smart_reshape/shape_of_const_folding.cpp
@@ -19,7 +19,9 @@ ov::pass::ShapeOfConstFolding::ShapeOfConstFolding() {
 
     matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool {
         auto node = m.get_match_root();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (auto constant = get_constant_from_source(node)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             constant->set_friendly_name(node->get_friendly_name());
             copy_runtime_info(node, constant);
             replace_node(node, constant);
diff --git a/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp b/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp
index a0e142b7ceed55..0a26eb7cd6d96c 100644
--- a/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp
+++ b/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp
@@ -55,9 +55,11 @@ ov::pass::StridedSliceSqueeze::StridedSliceSqueeze() {
             }))
             return false;
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto& axes = normalize_axes(squeeze->description(),
                                           const_axes->cast_vector<int64_t>(),
                                           squeeze->get_input_partial_shape(0).rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         // Here squeeze input shape is equal to stridedslice input shape,
         // since new_axis_mask, shrink_axis_mask and ellipsis_mask are all zeros.
@@ -158,9 +160,11 @@ ov::pass::SqueezeStridedSlice::SqueezeStridedSlice() {
             }))
             return false;
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto axes = normalize_axes(squeeze->description(),
                                    const_axes->cast_vector<int64_t>(),
                                    squeeze->get_input_partial_shape(0).rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         std::sort(axes.begin(), axes.end());
         for (const auto& axis : axes) {
             begin_vec.insert(begin_vec.begin() + axis, 0);
@@ -206,9 +210,12 @@ bool squeezes_perform_the_same(std::shared_ptr<ov::opset5::Squeeze> lhs, std::sh
         return false;
     const auto l_axes = std::dynamic_pointer_cast<ov::opset5::Constant>(lhs->get_input_node_shared_ptr(1));
     const auto r_axes = std::dynamic_pointer_cast<ov::opset5::Constant>(rhs->get_input_node_shared_ptr(1));
-    if (l_axes && r_axes)
+    if (l_axes && r_axes) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return ngraph::normalize_axes(lhs->description(), l_axes->cast_vector<int64_t>(), rank) ==
                ngraph::normalize_axes(rhs->description(), r_axes->cast_vector<int64_t>(), rank);
+    }
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return false;
 }
 
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
index cea4ff3bb9abc5..a5b608f5b996b0 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
@@ -58,8 +58,10 @@ TSReductionForward::TSReductionForward() {
             return false;
 
         auto rank = reduction->get_input_partial_shape(0).rank();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto non_negative_axes =
             normalize_axes(reduction->get_friendly_name(), reduction_axes->cast_vector<int64_t>(), rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         auto transpose_order_values = transpose_order->cast_vector<size_t>();
         std::vector<size_t> new_values;
@@ -113,8 +115,10 @@ TSReductionBackward::TSReductionBackward() {
             return false;
 
         auto rank = reduction->get_input_partial_shape(0).rank();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto non_negative_axes =
             normalize_axes(reduction->get_friendly_name(), reduction_axes->cast_vector<int64_t>(), rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto transpose_order_values = transpose_order->cast_vector<size_t>();
         if (!keep_dims) {
             transpose_order_values = GetOrderBeforeReduction(non_negative_axes, transpose_order_values);
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
index af5fc6dfc1e999..8ff816bd3f1035 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
@@ -124,8 +124,10 @@ TSSqueezeForward::TSSqueezeForward() {
             }
         } else {
             auto rank = squeeze->get_input_partial_shape(0).rank();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             non_negative_axes =
                 normalize_axes(squeeze->get_friendly_name(), squeeze_axes->cast_vector<int64_t>(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
 
         // if 2nd input to squeeze is empty then all '1' dims will be deleted.
@@ -209,8 +211,10 @@ TSSqueezeBackward::TSSqueezeBackward() {
             }
         } else {
             auto rank = squeeze->get_input_partial_shape(0).rank();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             non_negative_axes =
                 normalize_axes(squeeze->get_friendly_name(), squeeze_axes->cast_vector<int64_t>(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
 
         bool squeeze_all_dims = false;
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
index a3cbe66e41b853..9d9416d8e38156 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
@@ -125,8 +125,10 @@ TSUnsqueezeForward::TSUnsqueezeForward() {
             }
         } else {
             auto rank = unsqueeze->get_output_partial_shape(0).rank();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             non_negative_axes =
                 normalize_axes(unsqueeze->get_friendly_name(), unsqueeze_axes->cast_vector<int64_t>(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
         auto ts_order_values = transpose_order->cast_vector<size_t>();
 
@@ -191,8 +193,10 @@ TSUnsqueezeBackward::TSUnsqueezeBackward() {
             }
         } else {
             auto rank = unsqueeze->get_output_partial_shape(0).rank();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             non_negative_axes =
                 normalize_axes(unsqueeze->get_friendly_name(), unsqueeze_axes->cast_vector<int64_t>(), rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
 
         auto transpose_order_values = transpose_order->cast_vector<size_t>();
diff --git a/src/common/transformations/tests/op_conversions/gather_normalize_negative_indices_test.cpp b/src/common/transformations/tests/op_conversions/gather_normalize_negative_indices_test.cpp
index dc4b42efa97b2d..6c27102627f9e2 100644
--- a/src/common/transformations/tests/op_conversions/gather_normalize_negative_indices_test.cpp
+++ b/src/common/transformations/tests/op_conversions/gather_normalize_negative_indices_test.cpp
@@ -43,7 +43,9 @@ TEST_F(TransformationTestsF, GatherNegativeIndicesNormalize) {
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}),
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
         auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto const_add = ngraph::get_constant_from_source(add);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (const_add == nullptr)
             throw ngraph::ngraph_error("indices should've been constant folded");
         auto gather = std::make_shared<ngraph::opset7::Gather>(data, const_add, axis);
@@ -78,7 +80,9 @@ TEST_F(TransformationTestsF, GatherNegativeIndicesNormalize_neg_axis) {
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}),
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
         auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto const_add = ngraph::get_constant_from_source(add);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (const_add == nullptr)
             throw ngraph::ngraph_error("indices should've been constant folded");
         auto gather = std::make_shared<ngraph::opset7::Gather>(data, const_add, axis);
@@ -113,7 +117,9 @@ TEST_F(TransformationTestsF, GatherNegativeIndicesNormalize_dif_input_types) {
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}),
             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
         auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto const_add = ngraph::get_constant_from_source(add);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (const_add == nullptr)
             throw ngraph::ngraph_error("indices should've been constant folded");
         auto gather = std::make_shared<ngraph::opset7::Gather>(data, const_add, axis);
diff --git a/src/core/include/ngraph/validation_util.hpp b/src/core/include/ngraph/validation_util.hpp
index 739f923edd526f..4aadf9d210cd21 100644
--- a/src/core/include/ngraph/validation_util.hpp
+++ b/src/core/include/ngraph/validation_util.hpp
@@ -22,14 +22,17 @@ using ov::infer_convolution_forward;
 using ov::normalize_axes;
 using ov::normalize_axis;
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 Strides conv_default_strides(const Node* node, const PartialShape& data_batch_shape, const PartialShape& filters_shape);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 CoordinateDiff conv_default_padding(const Node* node,
                                     const PartialShape& data_batch_shape,
                                     const PartialShape& filters_shape);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 PartialShape infer_windowed_reduction_output_shape(const Node* node,
                                                    const PartialShape& data_shape,
@@ -42,6 +45,7 @@ PartialShape infer_windowed_reduction_output_shape(const Node* node,
                                                    bool is_window_all_in_padding_allowed,
                                                    bool ceil_mode = false);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 void validate_conv_params_spatial_dimensions(const Node* node,
                                              const size_t num_spatial_dims,
                                              const op::PadType auto_pad,
@@ -62,6 +66,7 @@ void validate_conv_params_spatial_dimensions(const Node* node,
 /// \param     pads_end          Pads end.
 ///
 /// \return Partial shape of the output.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 PartialShape validate_and_infer_convolution_forward_output_shape(const Node* node,
                                                                  const Rank& result_ps_rank,
                                                                  const PartialShape& data_batch_pshape,
@@ -72,6 +77,7 @@ PartialShape validate_and_infer_convolution_forward_output_shape(const Node* nod
                                                                  CoordinateDiff& pads_begin,
                                                                  CoordinateDiff& pads_end);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 PartialShape infer_batched_pooling_forward(const Node* node,
                                            const PartialShape& data_batch_shape,
@@ -83,6 +89,7 @@ PartialShape infer_batched_pooling_forward(const Node* node,
                                            bool ceil_mode = false,
                                            const Strides& window_dilation = Strides{});
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 std::tuple<element::Type, PartialShape, PartialShape> infer_batch_norm_forward(const Node* node,
                                                                                element::Type input_element_type,
@@ -96,6 +103,7 @@ std::tuple<element::Type, PartialShape, PartialShape> infer_batch_norm_forward(c
                                                                                const PartialShape& mean_shape,
                                                                                const PartialShape& variance_shape);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 std::tuple<element::Type, PartialShape, PartialShape> infer_batch_norm_forward(const Node* node,
                                                                                element::Type input_element_type,
@@ -119,6 +127,7 @@ std::tuple<element::Type, PartialShape, PartialShape> infer_batch_norm_forward(c
 ///
 /// \return true if auto padding was applied successfully (all needed informations such as
 ///         spatial dims are known), false otherwise.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 bool try_apply_auto_padding(const PartialShape& image_shape,
                             const Shape& filter_shape,
@@ -128,6 +137,7 @@ bool try_apply_auto_padding(const PartialShape& image_shape,
                             CoordinateDiff& padding_above,
                             CoordinateDiff& padding_below);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 PartialShape infer_slice_shape(const Node* node,
                                const PartialShape& input_shape,
@@ -155,22 +165,27 @@ NGRAPH_API std::pair<bool, uint64_t> maximum_value(const Output<Node>& value);
 /// \param outputs Root set of values to try to compute
 /// \param evaluation_context Storage of additional settings and attributes that can be used
 /// when evaluating the function. This additional information can be shared across nodes.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API void evaluate_nodes(std::map<RawNodeOutput, HostTensorPtr>& value_map,
                                std::map<RawNodeOutput, HostTensorPtr>& output_tensor_map,
                                const OutputVector& outputs,
                                const EvaluationContext& evaluation_context = EvaluationContext());
 
 /// \brief Returns a Constant storing scalar value equal to std::numeric_limits<t>::max()
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API std::shared_ptr<op::Constant> get_constant_max_of_type(element::Type_t t);
 
 /// \brief Returns a Constant storing scalar value equal to std::numeric_limits<t>::min()
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API std::shared_ptr<op::Constant> get_constant_min_of_type(element::Type_t t);
 
 /// \brief Returns a Constant storing scalar value equal to std::numeric_limits<t>::lowest()
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API std::shared_ptr<op::Constant> get_constant_lowest_of_type(element::Type_t t);
 
 /// \brief Checks if size of HostTensorVector is the same as passed size attribute. Then checks
 /// that all the HostTensorPtrs are not equal to nullptr
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API bool validate_host_tensor_vector(const HostTensorVector& v, const size_t& size);
 
 namespace opset1 {
@@ -187,6 +202,7 @@ namespace opset1 {
 /// \param      pads_begin        The placeholder for paddings at the beginning of axis.
 /// \param      pads_end          The placeholder for paddings at the end of axis.
 ///
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 NGRAPH_API
 void infer_conv_backprop_auto_padding(const Shape& input_data_shape,
                                       const Shape& filters_shape,
diff --git a/src/core/include/openvino/core/validation_util.hpp b/src/core/include/openvino/core/validation_util.hpp
index 2d6cb08e98ec51..5039ed342dfeef 100644
--- a/src/core/include/openvino/core/validation_util.hpp
+++ b/src/core/include/openvino/core/validation_util.hpp
@@ -11,6 +11,7 @@
 
 namespace ov {
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 PartialShape infer_convolution_forward(const Node* node,
                                        const PartialShape& data_batch_shape,
@@ -21,6 +22,7 @@ PartialShape infer_convolution_forward(const Node* node,
                                        const Strides& filter_strides,
                                        const Strides& filter_dilation);
 
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 void infer_auto_padding(const Shape& image_shape,
                         const Shape& filter_shape,
@@ -39,6 +41,7 @@ void infer_auto_padding(const Shape& image_shape,
 /// \return    Checking if axis is in range [-tensor_rank, tensor_rank-1], otherwise
 ///            returns error. If negative axis, it counts from the last to the first axis,
 ///            by adding tensor_rank to axis.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 int64_t normalize_axis(const Node* node, std::int64_t axis, const Rank& tensor_rank);
 
@@ -51,6 +54,7 @@ int64_t normalize_axis(const Node* node, std::int64_t axis, const Rank& tensor_r
 /// \return     If any negative axis in vector, it counts from the last to the first
 ///             axis, by adding tensor_rank to axis.
 ///
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 std::vector<size_t> normalize_axes(const std::string& node_description,
                                    const std::vector<int64_t>& axes,
@@ -65,6 +69,7 @@ std::vector<size_t> normalize_axes(const std::string& node_description,
 /// \return    Checking if axis is in range [-tensor_rank, tensor_rank-1], otherwise
 ///            returns error. If negative axis, it counts from the last to the first axis,
 ///            by adding tensor_rank to axis.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 int64_t normalize_axis(const std::string& node_description, std::int64_t axis, const Rank& tensor_rank);
 
@@ -79,6 +84,7 @@ int64_t normalize_axis(const std::string& node_description, std::int64_t axis, c
 /// \return     Checking if axis is in range [axis_range_min, axis_range_max], otherwise
 ///             returns error. If negative axis, it counts from the last to the first axis,
 ///             by adding tensor_rank to axis.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 int64_t normalize_axis(const Node* node,
                        std::int64_t axis,
@@ -97,6 +103,7 @@ int64_t normalize_axis(const Node* node,
 /// \return     Checking if axis is in range [axis_range_min, axis_range_max], otherwise
 ///             returns error. If negative axis, it counts from the last to the first axis,
 ///             by adding tensor_rank to axis.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 int64_t normalize_axis(const std::string& node_description,
                        std::int64_t axis,
@@ -112,6 +119,7 @@ int64_t normalize_axis(const std::string& node_description,
 /// \param[in]      tensor_rank  The corresponding tensor rank.
 /// \param[in,out]  axes         The requested vector of axes.
 ///
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API
 void normalize_axes(const Node* node, const int64_t& tensor_rank, std::vector<int64_t>& axes);
 
@@ -120,10 +128,12 @@ void normalize_axes(const Node* node, const int64_t& tensor_rank, std::vector<in
 /// \param output Node output pointing to the tensor for estimation.
 /// \param pshape Resulting estimation would be stored in this PartialShape.
 /// \return boolean status if value evaluation was successful.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API bool evaluate_as_partial_shape(const Output<Node>& output, PartialShape& pshape);
 
 /// \brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and
 /// they are the same returns Constant operation from the resulting bound, otherwise nullptr.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API std::shared_ptr<op::v0::Constant> get_constant_from_source(const Output<Node>& source);
 
 /// \brief Propagates value label from 0 input to the only output through an operation.
@@ -132,6 +142,7 @@ OPENVINO_API std::shared_ptr<op::v0::Constant> get_constant_from_source(const Ou
 /// \param node Operation to be performed
 /// \param output_labels Vector of TensorLabel objects representing resulting value labels
 /// \return boolean status if label evaluation was successful.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API bool default_label_evaluator(const Node* node, TensorLabelVector& output_labels);
 
 /// \brief Generates transpose default axes order at end of input vector.
@@ -140,6 +151,7 @@ OPENVINO_API bool default_label_evaluator(const Node* node, TensorLabelVector& o
 ///
 /// \param axes_order  Vector where default order will be generated.
 /// \param length      Sequence length of axes order.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API void generate_transpose_default_order(std::vector<int64_t>& axes_order, const size_t length);
 
 /// \brief Check if vector of axes order has got valid values.
@@ -150,12 +162,14 @@ OPENVINO_API void generate_transpose_default_order(std::vector<int64_t>& axes_or
 /// \param size        Input for transpose rank size.
 ///
 /// \return true if axes order is valid otherwise false.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API bool is_valid_axes_order(const std::vector<int64_t>& axes_order, const size_t size);
 
 /// \brief Checks label tensor if there is no label
 ///
 /// \param labels  Label tensor for check.
 /// \return True if there is no labels, otherwise false.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API bool has_no_labels(const TensorLabel& labels);
 
 /// \brief Get the node input partial shapes.
@@ -163,6 +177,7 @@ OPENVINO_API bool has_no_labels(const TensorLabel& labels);
 /// \param node   Node to extract input shapes.
 ///
 /// \return Vector of PartialShapes of each input.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API std::vector<PartialShape> get_node_input_partial_shapes(const ov::Node& node);
 
 /// \brief Check if rank is compatible to any of rank from container.
@@ -171,6 +186,7 @@ OPENVINO_API std::vector<PartialShape> get_node_input_partial_shapes(const ov::N
 /// \param ranks  VEctor of ranks used to check input rank compatibility.
 ///
 /// \return True if rank compatible to any from ranks, otherwise false.
+OPENVINO_DEPRECATED("This function is deprecated and will be moved to dev api in 2024.0 release.")
 OPENVINO_API bool is_rank_compatible_any_of(const ov::Rank& rank, const std::vector<ov::Rank>& ranks);
 
 }  // namespace ov
diff --git a/src/core/shape_inference/include/convolution_shape_inference_util.hpp b/src/core/shape_inference/include/convolution_shape_inference_util.hpp
index cb3d0adf52b227..c1a26671e3e627 100644
--- a/src/core/shape_inference/include/convolution_shape_inference_util.hpp
+++ b/src/core/shape_inference/include/convolution_shape_inference_util.hpp
@@ -284,10 +284,12 @@ void append_spatial_shape(const TOp* op,
 namespace validate {
 template <class TShape>
 void data_shape(const ov::op::util::ConvolutionBase* op, const TShape& data_shape) {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NODE_VALIDATION_CHECK(op,
                           is_rank_compatible_any_of(data_shape.rank(), {3, 4, 5}),
                           "Expected a 3D, 4D or 5D tensor for the input. Got: ",
                           data_shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 template <class TShape>
diff --git a/src/core/shape_inference/include/gather_elements_shape_inference.hpp b/src/core/shape_inference/include/gather_elements_shape_inference.hpp
index 8bc763371c3537..8d3fedd0f31c7d 100644
--- a/src/core/shape_inference/include/gather_elements_shape_inference.hpp
+++ b/src/core/shape_inference/include/gather_elements_shape_inference.hpp
@@ -21,8 +21,11 @@ void shape_infer(const GatherElements* op, const std::vector<T>& input_shapes, s
     auto& output_shape = output_shapes[0];
 
     int64_t axis = op->get_axis();
-    if (data_rank.is_static())
+    if (data_rank.is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis = ov::normalize_axis(op, axis, data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
+    }
 
     NODE_VALIDATION_CHECK(op, data_rank.is_dynamic() || data_rank.get_length() >= 1, "data rank must be >= 1.");
     NODE_VALIDATION_CHECK(op,
diff --git a/src/core/shape_inference/include/gather_shape_inference.hpp b/src/core/shape_inference/include/gather_shape_inference.hpp
index 035d50f2d89871..b229ef5b882559 100644
--- a/src/core/shape_inference/include/gather_shape_inference.hpp
+++ b/src/core/shape_inference/include/gather_shape_inference.hpp
@@ -45,7 +45,9 @@ void shape_infer(const GatherBase* op,
         axis = axes_val[0];
 
         if (data_rank.is_static()) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axis = ov::normalize_axis(op, axis, data_rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
         // batch_dims, axis both can be positive by default or after normalization if data_rank &
         // indices_rank are static.
diff --git a/src/core/shape_inference/include/one_hot_shape_inference.hpp b/src/core/shape_inference/include/one_hot_shape_inference.hpp
index 93cd7c22a24769..3cc3d2c5e4a63c 100644
--- a/src/core/shape_inference/include/one_hot_shape_inference.hpp
+++ b/src/core/shape_inference/include/one_hot_shape_inference.hpp
@@ -48,7 +48,9 @@ inline bool get_data_as_shape_and_validate_sign<ov::PartialShape>(
         shape = PartialShape(data);
         return true;
     } else {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return ov::evaluate_as_partial_shape(op->input_value(idx), shape);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 }
 
@@ -62,7 +64,9 @@ void inline resolve_axis(OneHot* op) {
     const auto& indices_shape = op->get_input_partial_shape(0);
     if (indices_shape.rank().is_static()) {
         const auto indices_rank = indices_shape.rank().get_length();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         op->m_axis = ov::normalize_axis(op, op->m_axis, indices_rank + 1, -indices_rank - 1, indices_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 }
 
@@ -94,7 +98,9 @@ void shape_infer(const OneHot* op,
     if (indices_shape.rank().is_static()) {
         result_shape = indices_shape;
         const auto indices_rank = indices_shape.rank().get_length();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto axis = ov::normalize_axis(op, op->get_axis(), indices_rank + 1, -indices_rank - 1, indices_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         T depth_dim_as_shape;
         if (utils::one_hot::get_data_as_shape_and_validate_sign<T>(1, op, depth_dim_as_shape, constant_data) &&
diff --git a/src/core/shape_inference/include/pooling_shape_inference_util.hpp b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
index c3f69904eb1210..baef60e624c1f4 100644
--- a/src/core/shape_inference/include/pooling_shape_inference_util.hpp
+++ b/src/core/shape_inference/include/pooling_shape_inference_util.hpp
@@ -30,10 +30,12 @@ template <class TOp, class TShape>
 void attributes(const TOp* op, const TShape& data_shape, const Strides& dilations) {
     const auto& data_rank = data_shape.rank();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NODE_VALIDATION_CHECK(op,
                           is_rank_compatible_any_of(data_rank, {3, 4, 5}),
                           "Expected a 3D, 4D or 5D tensor for the input. Got: ",
                           data_shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto& kernel = op->get_kernel();
     const auto num_spatial = kernel.size();
@@ -249,10 +251,12 @@ TShape out_shape_infer(const TOp* op,
 
     const auto& data_rank = data_shape.rank();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NODE_VALIDATION_CHECK(op,
                           is_rank_compatible_any_of(data_rank, {3, 4, 5}),
                           "Expected a 3D, 4D or 5D tensor for the input. Got: ",
                           data_shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     TShape output_shape;
     if (data_rank.is_static()) {
diff --git a/src/core/shape_inference/include/range_shape_inference.hpp b/src/core/shape_inference/include/range_shape_inference.hpp
index fa84a7a0a9393b..72da58cda67124 100644
--- a/src/core/shape_inference/include/range_shape_inference.hpp
+++ b/src/core/shape_inference/include/range_shape_inference.hpp
@@ -37,7 +37,9 @@ inline bool get_data_as_double<ov::PartialShape>(
     const std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>>& constant_data) {
     if (constant_data.count(idx)) {
         axes_value = ov::opset1::Constant(constant_data.at(idx)).cast_vector<double>();
+        OPENVINO_SUPPRESS_DEPRECATED_START
     } else if (const auto& constant = ov::get_constant_from_source(op->input_value(idx))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         axes_value = constant->cast_vector<double>();
     } else {
         return false;
diff --git a/src/core/shape_inference/include/reduce_shape_inference.hpp b/src/core/shape_inference/include/reduce_shape_inference.hpp
index 5d26e1ca26f28d..e24b8d94d6efeb 100644
--- a/src/core/shape_inference/include/reduce_shape_inference.hpp
+++ b/src/core/shape_inference/include/reduce_shape_inference.hpp
@@ -32,7 +32,9 @@ void reduce_shape_infer(const ov::op::util::ReductionBase* op,
     bool axes_are_known = get_data_as_int64<T>(1, op, axes_val, constant_data);
 
     if (data_rank.is_static() && axes_are_known) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         ov::normalize_axes(op, data_rank.get_length(), axes_val);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         if (keep_dims) {
             output_shape = input_shape;
diff --git a/src/core/shape_inference/include/region_yolo_shape_inference.hpp b/src/core/shape_inference/include/region_yolo_shape_inference.hpp
index 23baea9a51e5ce..5363bf0362a9bd 100644
--- a/src/core/shape_inference/include/region_yolo_shape_inference.hpp
+++ b/src/core/shape_inference/include/region_yolo_shape_inference.hpp
@@ -31,7 +31,9 @@ void shape_infer(const RegionYolo* op, const std::vector<T>& input_shapes, std::
 
         if (op->m_do_softmax) {
             output_shape.resize(0);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             auto axis = ov::normalize_axis(op, op->m_axis, input_rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             DimType flat_dim = 1;
             for (int64_t i = 0; i < axis; i++) {
                 output_shape.push_back(input_shape[i]);
diff --git a/src/core/shape_inference/include/reverse_sequence_shape_inference.hpp b/src/core/shape_inference/include/reverse_sequence_shape_inference.hpp
index f51b3265208e99..415f5e7f01f669 100644
--- a/src/core/shape_inference/include/reverse_sequence_shape_inference.hpp
+++ b/src/core/shape_inference/include/reverse_sequence_shape_inference.hpp
@@ -30,7 +30,9 @@ std::vector<TShape> shape_infer(const ReverseSequence* op, const std::vector<TSh
                           seq_lengths_pshape);
     auto output_pshape = data_pshape;
     if (data_rank.is_static() && seq_lengths_rank.is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto normalized_batch_axis = ov::normalize_axis(op, op->get_origin_batch_axis(), data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         DimType merged_sequence_length;
         NODE_VALIDATION_CHECK(
             op,
diff --git a/src/core/shape_inference/include/roll_shape_inference.hpp b/src/core/shape_inference/include/roll_shape_inference.hpp
index 71052194150689..c985b2abff0b3b 100644
--- a/src/core/shape_inference/include/roll_shape_inference.hpp
+++ b/src/core/shape_inference/include/roll_shape_inference.hpp
@@ -40,7 +40,9 @@ std::vector<TShape> shape_infer(const Roll* op,
 
     if (data_pshape.rank().is_static()) {
         if (const auto& axes = get_input_const_data_as<TShape, int64_t>(op, 2, constant_data)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             ov::normalize_axes(op, data_pshape.size(), *axes);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
     }
 
diff --git a/src/core/shape_inference/include/scatter_elements_update_shape_inference.hpp b/src/core/shape_inference/include/scatter_elements_update_shape_inference.hpp
index 8db69b87edcc7a..4233644354c339 100644
--- a/src/core/shape_inference/include/scatter_elements_update_shape_inference.hpp
+++ b/src/core/shape_inference/include/scatter_elements_update_shape_inference.hpp
@@ -23,11 +23,13 @@ std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
     const auto& updates_shape = input_shapes[2];
     const auto& axis_shape = input_shapes[3];
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NODE_VALIDATION_CHECK(op,
                           is_rank_compatible_any_of(axis_shape.rank(), {0, 1}),
                           "Axis input shape are required to be scalar or 1D tensor. ",
                           "Got: ",
                           axis_shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto& data_rank = data_shape.rank();
     const auto& indices_rank = indices_shape.rank();
@@ -50,7 +52,9 @@ std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
 
     if (data_shape.rank().is_static()) {
         if (const auto axis_input = get_input_const_data_as<TShape, int64_t>(op, 3, constant_data)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             ov::normalize_axis(op, (*axis_input)[0], data_rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
     }
     return {data_shape};
diff --git a/src/core/shape_inference/include/shuffle_channels_shape_inference.hpp b/src/core/shape_inference/include/shuffle_channels_shape_inference.hpp
index fd54069ea5c294..5067f97224b1af 100644
--- a/src/core/shape_inference/include/shuffle_channels_shape_inference.hpp
+++ b/src/core/shape_inference/include/shuffle_channels_shape_inference.hpp
@@ -26,7 +26,9 @@ std::vector<TShape> shape_infer(const ShuffleChannels* op, const std::vector<TSh
 
     if (input_shape_rank.is_static()) {
         NODE_VALIDATION_CHECK(op, input_shape.size() >= 1, "The input tensor's shape is expected to be at least 1D.");
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto axis_zb = static_cast<size_t>(normalize_axis(op, op->get_axis(), input_shape_rank));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto& channel_dim = input_shape[axis_zb];
         NODE_VALIDATION_CHECK(op,
                               channel_dim.is_dynamic() || (channel_dim.get_length() % group) == 0,
diff --git a/src/core/shape_inference/include/slice_shape_inference.hpp b/src/core/shape_inference/include/slice_shape_inference.hpp
index 50fe6b9d42deff..4894b8d16866e8 100644
--- a/src/core/shape_inference/include/slice_shape_inference.hpp
+++ b/src/core/shape_inference/include/slice_shape_inference.hpp
@@ -107,7 +107,9 @@ void shape_infer(const Slice* op,
                               "Slice `axes` input must have compatible shape with `start`, `stop`, `step` inputs.");
 
         if (auto axes = get_input_const_data_as<T, int64_t>(op, 4, constant_data)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             ov::normalize_axes(op, input_shape.rank().get_length(), *axes);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             axes_map.add(*axes);
             NODE_VALIDATION_CHECK(op, axes_map.is_valid, "Slice values in `axes` input must be unique.");
         }
diff --git a/src/core/shape_inference/include/slice_shape_inference_utils.hpp b/src/core/shape_inference/include/slice_shape_inference_utils.hpp
index e76718714c831b..70eb8ea7299c63 100644
--- a/src/core/shape_inference/include/slice_shape_inference_utils.hpp
+++ b/src/core/shape_inference/include/slice_shape_inference_utils.hpp
@@ -194,7 +194,9 @@ inline element::Type get_input_const_element_type(const ov::Node* op,
                                                   const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     if (constant_data.count(idx)) {
         return constant_data.at(idx)->get_element_type();
+        OPENVINO_SUPPRESS_DEPRECATED_START
     } else if (const auto& constant = ov::get_constant_from_source(op->input_value(idx))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         return constant->get_element_type();
     } else {
         return element::undefined;
diff --git a/src/core/shape_inference/include/split_shape_inference.hpp b/src/core/shape_inference/include/split_shape_inference.hpp
index a7e2b0635b5d3e..103415e4427a31 100644
--- a/src/core/shape_inference/include/split_shape_inference.hpp
+++ b/src/core/shape_inference/include/split_shape_inference.hpp
@@ -52,7 +52,9 @@ void shape_infer(const Split* op,
                               axes_values.size(),
                               " axes");
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto axis = ov::normalize_axis(op, axes_values[0], data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         if (data_ps[axis].is_static()) {
             const auto dimension_at_axis = data_ps[axis].get_length();
diff --git a/src/core/shape_inference/include/squeeze_shape_inference.hpp b/src/core/shape_inference/include/squeeze_shape_inference.hpp
index 7318de03d71e46..8c78c9e7f0cfeb 100644
--- a/src/core/shape_inference/include/squeeze_shape_inference.hpp
+++ b/src/core/shape_inference/include/squeeze_shape_inference.hpp
@@ -42,14 +42,18 @@ void shape_infer(const Squeeze* op,
         unique_axes.reset(new std::set<int64_t>());
     } else if (number_of_inputs == 2) {
         const auto& axes_shape = input_shapes[1];
+        OPENVINO_SUPPRESS_DEPRECATED_START
         NODE_VALIDATION_CHECK(op,
                               axes_shape.is_dynamic() || is_rank_compatible_any_of(axes_shape.rank(), {0, 1}),
                               "Second input (axes) should not be of rank higher than 1. Got: ",
                               axes_shape.rank().get_length());
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         std::vector<int64_t> axes;
         if (arg_rank.is_static() && axes_shape.is_static() && get_data_as_int64<T>(1, op, axes, constant_data)) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             normalize_axes(op, arg_rank.get_length(), axes);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             unique_axes.reset(new std::set<int64_t>(axes.cbegin(), axes.cend()));
         }
     } else {
diff --git a/src/core/shape_inference/include/topk_shape_inference.hpp b/src/core/shape_inference/include/topk_shape_inference.hpp
index 93893a450f1dcf..7fd02ec38e98ab 100644
--- a/src/core/shape_inference/include/topk_shape_inference.hpp
+++ b/src/core/shape_inference/include/topk_shape_inference.hpp
@@ -67,7 +67,9 @@ std::vector<TShape> shape_infer(const util::TopKBase* op,
 
     auto output_shape = input_shape;
     if (input_shape.rank().is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto normalized_axis = ov::normalize_axis(op, op->get_provided_axis(), input_shape.rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto& dim_axis = output_shape[normalized_axis];
 
         if (auto k_as_shape = get_input_const_data_as_shape<TShape>(op, 1, constant_data, GetK<TDimValue>(op))) {
diff --git a/src/core/shape_inference/include/transpose_shape_inference.hpp b/src/core/shape_inference/include/transpose_shape_inference.hpp
index 91060d69c108d2..c3c74d2c39d08e 100644
--- a/src/core/shape_inference/include/transpose_shape_inference.hpp
+++ b/src/core/shape_inference/include/transpose_shape_inference.hpp
@@ -26,14 +26,18 @@ T calc_output_shape(const Transpose* const op, const T& input_shape, std::vector
     const auto output_rank = input_shape.size();
 
     if (axes_order.empty()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         generate_transpose_default_order(axes_order, output_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     } else {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         NODE_VALIDATION_CHECK(op,
                               is_valid_axes_order(axes_order, output_rank),
                               "Permutation ",
                               AxisVector(axes_order.begin(), axes_order.end()),
                               " is not valid for input shape ",
                               input_shape);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     T output_shape;
diff --git a/src/core/shape_inference/include/unsqueeze_shape_inference.hpp b/src/core/shape_inference/include/unsqueeze_shape_inference.hpp
index 3bca49649d49bf..0ea9751f48871b 100644
--- a/src/core/shape_inference/include/unsqueeze_shape_inference.hpp
+++ b/src/core/shape_inference/include/unsqueeze_shape_inference.hpp
@@ -12,10 +12,12 @@ namespace v0 {
 
 template <class TOp>
 void check_unsqueeze_axes_rank(const TOp* op, const Rank& rank) {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NODE_VALIDATION_CHECK(op,
                           is_rank_compatible_any_of(rank, {0, 1}),
                           "Second input (axes) should not be of rank higher than 1. Got: ",
                           rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 template <class T>
@@ -40,7 +42,9 @@ void shape_infer(const Unsqueeze* op,
         const auto expanded_rank = arg_shape.rank().get_length() + unique_axes.size();
 
         // Normalize then remove repeated axes after normalization.
+        OPENVINO_SUPPRESS_DEPRECATED_START
         normalize_axes(op, expanded_rank, unique_axes);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const std::set<int64_t> axes(unique_axes.begin(), unique_axes.end());
 
         out_shape = arg_shape;
diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp
index d8b85271c40943..0ceb89db413e11 100644
--- a/src/core/shape_inference/include/utils.hpp
+++ b/src/core/shape_inference/include/utils.hpp
@@ -281,7 +281,9 @@ std::unique_ptr<TRes> get_input_const_data_as(const ov::Node* op,
     if (constant_data.count(idx)) {
         return std::unique_ptr<TRes>(
             new TRes(get_tensor_data_as<TData, TRes>(constant_data.at(idx).get(), std::forward<UnaryOperation>(func))));
+        OPENVINO_SUPPRESS_DEPRECATED_START
     } else if (const auto& constant = ov::get_constant_from_source(op->input_value(idx))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto& et = constant->get_element_type();
         const auto& shape = constant->get_shape();
         return std::unique_ptr<TRes>(new TRes(get_raw_data_as<TData, TRes>(et,
@@ -325,7 +327,9 @@ std::unique_ptr<TShape> get_input_const_data_as_shape(const ov::Node* op,
         return std::unique_ptr<TShape>(new TShape(std::move(*d)));
     } else {
         PartialShape shape;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (ov::evaluate_as_partial_shape(op->input_value(idx), shape)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             return std::unique_ptr<TShape>(new TShape(std::move(shape)));
         }
     }
@@ -459,7 +463,9 @@ inline bool get_data_as_shape<ov::PartialShape>(
         shape = ov::PartialShape(ov::opset1::Constant(constant_data.at(idx)).cast_vector<int64_t>());
         return true;
     } else {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return ov::evaluate_as_partial_shape(op->input_value(idx), shape);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 }
 
diff --git a/src/core/shape_inference/include/variadic_split_shape_inference.hpp b/src/core/shape_inference/include/variadic_split_shape_inference.hpp
index fa1a56c7739f08..59efd8da9feeed 100644
--- a/src/core/shape_inference/include/variadic_split_shape_inference.hpp
+++ b/src/core/shape_inference/include/variadic_split_shape_inference.hpp
@@ -52,7 +52,9 @@ void shape_infer(const VariadicSplit* op,
                                   " axes");
             const auto axis_val = axis_values[0];
             // Adjust split axis in case of negatives
+            OPENVINO_SUPPRESS_DEPRECATED_START
             const int64_t axis = ov::normalize_axis(op, axis_val, data_shape.rank());
+            OPENVINO_SUPPRESS_DEPRECATED_END
 
             if (get_data_as_int64<T>(2, op, split_lengths, constant_data)) {
                 // Adjust split lengths in case of negatives
diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp
index 1aea44f9fa7ee2..a1ac9648b72c42 100644
--- a/src/core/src/bound_evaluate.cpp
+++ b/src/core/src/bound_evaluate.cpp
@@ -335,9 +335,10 @@ bool ov::interval_bound_evaluator(const Node* node,
         }
         unsqueezed_output_variants.push_back(vector_of_unsqueezed_output_variants);
     }
-
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto input_0_maximum_value = ngraph::get_constant_max_of_type(low_0.get_element_type());
     auto input_1_maximum_value = ngraph::get_constant_max_of_type(low_1.get_element_type());
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (input_0_maximum_value == nullptr || input_1_maximum_value == nullptr)
         return false;
 
@@ -442,7 +443,9 @@ bool ov::default_label_evaluator(const Node* node,
     for (size_t i = 0; i < inputs_count; ++i) {
         if (std::find(labeled_inputs.begin(), labeled_inputs.end(), i) != labeled_inputs.end()) {
             auto labels = node->get_input_tensor(i).get_value_label();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (!has_no_labels(labels) && !has_any_input_labels) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 has_any_input_labels = true;
             }
 
diff --git a/src/core/src/op/adaptive_avg_pool.cpp b/src/core/src/op/adaptive_avg_pool.cpp
index 775af472729a24..b49d2d68f21e12 100644
--- a/src/core/src/op/adaptive_avg_pool.cpp
+++ b/src/core/src/op/adaptive_avg_pool.cpp
@@ -19,7 +19,9 @@ op::v8::AdaptiveAvgPool::AdaptiveAvgPool(const Output<Node>& data, const Output<
 void op::v8::AdaptiveAvgPool::validate_and_infer_types() {
     OV_OP_SCOPE(v8_AdaptiveAvgPool_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/adaptive_max_pool.cpp b/src/core/src/op/adaptive_max_pool.cpp
index e3bced2b9a6f4a..ecc55b9ca838fb 100644
--- a/src/core/src/op/adaptive_max_pool.cpp
+++ b/src/core/src/op/adaptive_max_pool.cpp
@@ -31,7 +31,9 @@ void op::v8::AdaptiveMaxPool::validate_and_infer_types() {
                           m_index_element_type == element::i64 || m_index_element_type == element::i32,
                           "Index element type must be i32 or i64");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
     set_output_type(1, m_index_element_type, output_shapes[1]);
diff --git a/src/core/src/op/avg_pool.cpp b/src/core/src/op/avg_pool.cpp
index a9edcaf18c7c54..007ad910bb752e 100644
--- a/src/core/src/op/avg_pool.cpp
+++ b/src/core/src/op/avg_pool.cpp
@@ -47,7 +47,9 @@ bool ov::op::v1::AvgPool::visit_attributes(AttributeVisitor& visitor) {
 void ov::op::v1::AvgPool::validate_and_infer_types() {
     OV_OP_SCOPE(v1_AvgPool_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shapes.front());
 }
 
diff --git a/src/core/src/op/batch_norm.cpp b/src/core/src/op/batch_norm.cpp
index 1741e1ea1bb2c3..129e1bc54f7743 100644
--- a/src/core/src/op/batch_norm.cpp
+++ b/src/core/src/op/batch_norm.cpp
@@ -42,6 +42,7 @@ void op::v0::BatchNormInference::validate_and_infer_types() {
                           m_epsilon);
 
     set_output_size(1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     std::tie(result_et, result_batch_shape, result_channel_shape) =
         infer_batch_norm_forward(this,
                                  get_input_element_type(INPUT_DATA),
@@ -54,7 +55,7 @@ void op::v0::BatchNormInference::validate_and_infer_types() {
                                  get_input_partial_shape(INPUT_BETA),
                                  get_input_partial_shape(INPUT_MEAN),
                                  get_input_partial_shape(INPUT_VARIANCE));
-
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, result_et, result_batch_shape);
 }
 
@@ -98,6 +99,7 @@ void op::v5::BatchNormInference::validate_and_infer_types() {
                           m_epsilon);
 
     set_output_size(1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     std::tie(result_et, result_batch_shape, result_channel_shape) =
         infer_batch_norm_forward(this,
                                  get_input_element_type(INPUT_DATA),
@@ -110,7 +112,7 @@ void op::v5::BatchNormInference::validate_and_infer_types() {
                                  get_input_partial_shape(INPUT_BETA),
                                  get_input_partial_shape(INPUT_MEAN),
                                  get_input_partial_shape(INPUT_VARIANCE));
-
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, result_et, result_batch_shape);
 }
 
diff --git a/src/core/src/op/batch_to_space.cpp b/src/core/src/op/batch_to_space.cpp
index dfac266a0d00fe..cb52b92c68f1c8 100644
--- a/src/core/src/op/batch_to_space.cpp
+++ b/src/core/src/op/batch_to_space.cpp
@@ -60,7 +60,9 @@ void op::v1::BatchToSpace::validate_and_infer_types() {
                           "block_shape and crops inputs must have integer element type. Got: ",
                           inputs_integer_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, data_et, output_shape);
 }
 
@@ -173,8 +175,10 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe
 
 bool ngraph::op::v1::BatchToSpace::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_BatchToSpace_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 4));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     if (outputs[0]->get_partial_shape().is_dynamic()) {
         std::map<size_t, HostTensorPtr> constant_data;
diff --git a/src/core/src/op/binary_convolution.cpp b/src/core/src/op/binary_convolution.cpp
index e3cf0ecea77b5a..ca89c851ba9cf6 100644
--- a/src/core/src/op/binary_convolution.cpp
+++ b/src/core/src/op/binary_convolution.cpp
@@ -54,7 +54,9 @@ void ov::op::v1::BinaryConvolution::validate_and_infer_types() {
 
     // TODO: Add NodeValidationCheck to filters et once u1 is supported in nGraph Python API
     // (#52715)
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
     if (num_spatial != util::num_spatial_undefined) {
diff --git a/src/core/src/op/bucketize.cpp b/src/core/src/op/bucketize.cpp
index b522db6c4ea984..d75710e926957b 100644
--- a/src/core/src/op/bucketize.cpp
+++ b/src/core/src/op/bucketize.cpp
@@ -47,7 +47,9 @@ void op::v3::Bucketize::validate_and_infer_types() {
                           "Output type must be i32 or i64. Got: ",
                           m_output_type);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto output_shapes = shape_infer(this, input_shapes);
 
     if (get_input_partial_shape(0).is_dynamic()) {
diff --git a/src/core/src/op/clamp.cpp b/src/core/src/op/clamp.cpp
index ab4014cedd070e..797bddaa13525b 100644
--- a/src/core/src/op/clamp.cpp
+++ b/src/core/src/op/clamp.cpp
@@ -75,7 +75,9 @@ bool evaluate_clamp(const HostTensorPtr& arg, const HostTensorPtr& out, double m
 
 bool op::v0::Clamp::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Clamp_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return clamp::evaluate_clamp(inputs[0], outputs[0], get_min(), get_max());
 }
 
diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp
index 6334a1d8c8c964..00d85091d8dd52 100644
--- a/src/core/src/op/concat.cpp
+++ b/src/core/src/op/concat.cpp
@@ -104,8 +104,10 @@ bool evaluate_concat(const HostTensorVector& args, const HostTensorPtr& out, int
 bool op::Concat::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Concat_evaluate);
     NGRAPH_CHECK(!inputs.empty());
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, inputs.size()));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto concat_axis = get_axis() < 0 ? get_axis() + inputs[0]->get_shape().size() : get_axis();
     return evaluate_concat(inputs, outputs[0], concat_axis);
 }
@@ -155,7 +157,9 @@ bool op::Concat::evaluate_label(TensorLabelVector& output_labels) const {
     const auto& inputs = input_values();
     if (std::all_of(inputs.cbegin(), inputs.cend(), [](const Output<Node>& out) {
             const auto& labels = out.get_tensor().get_value_label();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             return has_no_labels(labels);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         })) {
         return false;
     }
diff --git a/src/core/src/op/convert.cpp b/src/core/src/op/convert.cpp
index 08e5b1bded2cfa..c66a14c515a21a 100644
--- a/src/core/src/op/convert.cpp
+++ b/src/core/src/op/convert.cpp
@@ -143,8 +143,10 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u
         }
 
         // constants for dynamic values translation
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto input_maximum_value = get_constant_max_of_type(input_element_type);
         auto output_maximum_value = get_constant_max_of_type(output_values[0].get_element_type());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (input_maximum_value == nullptr || output_maximum_value == nullptr)
             return false;
 
@@ -171,8 +173,10 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u
 }  // namespace convert
 bool op::v0::Convert::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v0_Convert_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 1));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return convert::evaluate_convert(input_values[0], output_values[0]);
 }
 
diff --git a/src/core/src/op/convolution.cpp b/src/core/src/op/convolution.cpp
index 2fb28bb9f2fba7..7f613449505644 100644
--- a/src/core/src/op/convolution.cpp
+++ b/src/core/src/op/convolution.cpp
@@ -54,7 +54,9 @@ void op::v1::Convolution::validate_and_infer_types() {
                           "Element types must be numeric. Got: ",
                           result_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
     if (num_spatial != util::num_spatial_undefined) {
@@ -129,7 +131,9 @@ bool op::v1::ConvolutionBackpropData::is_dynamic() const {
 const ov::PartialShape op::v1::ConvolutionBackpropData::get_output_shape() const {
     auto shape = PartialShape::dynamic();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (get_input_size() < 3 || !evaluate_as_partial_shape(input_value(2), shape)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto& data_rank = get_input_partial_shape(0).rank();
         const auto& filter_rank = get_input_partial_shape(1).rank();
 
@@ -215,7 +219,9 @@ void op::v1::ConvolutionBackpropData::validate_and_infer_types() {
                               ").");
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto out_spatial_shape = get_output_shape();
     auto num_spatial = convolution::calculate_num_spatial(this, input_shapes, out_spatial_shape);
 
diff --git a/src/core/src/op/cos.cpp b/src/core/src/op/cos.cpp
index ce9542f259530f..824b653cf880c1 100644
--- a/src/core/src/op/cos.cpp
+++ b/src/core/src/op/cos.cpp
@@ -58,8 +58,10 @@ bool evaluate_cos(const HostTensorPtr& arg0, const HostTensorPtr& out, const siz
 
 bool op::Cos::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Cos_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
     return cosop::evaluate_cos(inputs[0], outputs[0], shape_size(get_output_shape(0)));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 bool op::Cos::has_evaluate() const {
diff --git a/src/core/src/op/cosh.cpp b/src/core/src/op/cosh.cpp
index 9f40b4a9be10cf..573dfe9b135250 100644
--- a/src/core/src/op/cosh.cpp
+++ b/src/core/src/op/cosh.cpp
@@ -58,7 +58,9 @@ bool evaluate_cosh(const HostTensorPtr& arg0, const HostTensorPtr& out, const si
 
 bool op::Cosh::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Cosh_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return coshop::evaluate_cosh(inputs[0], outputs[0], shape_size(get_output_shape(0)));
 }
 
diff --git a/src/core/src/op/ctc_loss.cpp b/src/core/src/op/ctc_loss.cpp
index 89ef7f2260ae07..a6ce8b6bb8f7a2 100644
--- a/src/core/src/op/ctc_loss.cpp
+++ b/src/core/src/op/ctc_loss.cpp
@@ -62,7 +62,9 @@ void op::v4::CTCLoss::validate_and_infer_types() {
                               input_et);
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, ov::get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, logits_type, output_shape);
 }
 
diff --git a/src/core/src/op/deformable_convolution.cpp b/src/core/src/op/deformable_convolution.cpp
index 11fcd9a75cd453..ba9552759716a8 100644
--- a/src/core/src/op/deformable_convolution.cpp
+++ b/src/core/src/op/deformable_convolution.cpp
@@ -95,7 +95,9 @@ void op::v8::DeformableConvolution::validate_and_infer_types() {
                               mask_et);
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto num_spatial = deformable_conv::calculate_num_spatial(this, input_shapes);
     if (num_spatial != convolution::num_spatial_undefined) {
@@ -199,7 +201,9 @@ void op::v1::DeformableConvolution::validate_and_infer_types() {
                           "Element type of inputs must be numeric. Got: ",
                           result_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto num_spatial = deformable_conv::calculate_num_spatial(this, input_shapes);
     if (num_spatial != convolution::num_spatial_undefined) {
diff --git a/src/core/src/op/deformable_psroi_pooling.cpp b/src/core/src/op/deformable_psroi_pooling.cpp
index b770941db1e8e7..d5126489c99ea6 100644
--- a/src/core/src/op/deformable_psroi_pooling.cpp
+++ b/src/core/src/op/deformable_psroi_pooling.cpp
@@ -72,7 +72,9 @@ bool op::v1::DeformablePSROIPooling::visit_attributes(AttributeVisitor& visitor)
 void op::v1::DeformablePSROIPooling::validate_and_infer_types() {
     OV_OP_SCOPE(v1_DeformablePSROIPooling_validate_and_infer_types);
     const auto& input_et = get_input_element_type(0);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, input_et, shape_infer(this, input_shapes)[0]);
 }
 
diff --git a/src/core/src/op/depth_to_space.cpp b/src/core/src/op/depth_to_space.cpp
index 385824ffc56d12..582a319bdda352 100644
--- a/src/core/src/op/depth_to_space.cpp
+++ b/src/core/src/op/depth_to_space.cpp
@@ -44,7 +44,9 @@ std::shared_ptr<Node> op::DepthToSpace::clone_with_new_inputs(const OutputVector
 void op::DepthToSpace::validate_and_infer_types() {
     OV_OP_SCOPE(v0_DepthToSpace_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/detection_output.cpp b/src/core/src/op/detection_output.cpp
index 05fe7588311d29..4dd8e806f0bd5e 100644
--- a/src/core/src/op/detection_output.cpp
+++ b/src/core/src/op/detection_output.cpp
@@ -36,7 +36,9 @@ void ov::op::v0::DetectionOutput::validate_and_infer_types() {
     NODE_VALIDATION_CHECK(this, m_attrs.num_classes > 0, "Number of classes must be greater than zero");
     validate_base(m_attrs);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
     shape_infer(this, input_shapes, output_shapes);
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
@@ -94,7 +96,9 @@ void ov::op::v8::DetectionOutput::validate_and_infer_types() {
     OV_OP_SCOPE(v8_DetectionOutput_validate_and_infer_types);
     validate_base(m_attrs);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
     shape_infer(this, input_shapes, output_shapes);
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
diff --git a/src/core/src/op/divide.cpp b/src/core/src/op/divide.cpp
index d90d9dc6b82f06..aba23215d48777 100644
--- a/src/core/src/op/divide.cpp
+++ b/src/core/src/op/divide.cpp
@@ -112,8 +112,10 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u
     const auto zero_t = ov::Tensor(input2.get_element_type(), Shape{});
     memcpy(zero_t.data(), zeros_const->get_data_ptr(), zero_t.get_byte_size());
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto max_constant = get_constant_max_of_type(input2.get_element_type());
     auto dynamic_mask = or_tensor(equality_mask(input1_up, max_constant), equality_mask(input2_up, max_constant));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     // mask to find out positive values for arg2
     auto less_up_outputs = ov::TensorVector{{element::boolean, input2.get_shape()}};
@@ -157,7 +159,9 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u
             return status;
 
         // replace values where zeros inside range of second arg to maximum values
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto output_minimum_value = get_constant_min_of_type(output_values[0].get_element_type());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (output_minimum_value == nullptr)
             return false;
 
@@ -202,7 +206,9 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u
             return status;
 
         // replace values where zeros were found in the second argument to maximum values
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto output_maximum_value = get_constant_max_of_type(output_values[0].get_element_type());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (output_maximum_value == nullptr)
             return false;
 
diff --git a/src/core/src/op/einsum.cpp b/src/core/src/op/einsum.cpp
index 4ec85e5321faa1..462c71b606b46d 100644
--- a/src/core/src/op/einsum.cpp
+++ b/src/core/src/op/einsum.cpp
@@ -190,7 +190,9 @@ void op::v7::Einsum::validate_and_infer_types() {
                               "Inputs to Einsum operation must have the same type.");
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
 
     shape_infer(this, input_shapes, output_shapes);
diff --git a/src/core/src/op/embedding_segments_sum.cpp b/src/core/src/op/embedding_segments_sum.cpp
index cea947a07223d9..0058f99dc2a1fe 100644
--- a/src/core/src/op/embedding_segments_sum.cpp
+++ b/src/core/src/op/embedding_segments_sum.cpp
@@ -99,7 +99,9 @@ void op::v3::EmbeddingSegmentsSum::validate_and_infer_types() {
                               ")");
     }
     const auto& result_et = get_input_element_type(EMB_TABLE);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto result_shapes = shape_infer(this, input_shapes);
 
     if (result_shapes[EMB_TABLE].rank().is_dynamic() || result_shapes[EMB_TABLE][0].is_dynamic()) {
diff --git a/src/core/src/op/exp.cpp b/src/core/src/op/exp.cpp
index 714d71913a155e..d262556110c3c5 100644
--- a/src/core/src/op/exp.cpp
+++ b/src/core/src/op/exp.cpp
@@ -60,7 +60,9 @@ bool evaluate_exp(const HostTensorPtr& arg0, const HostTensorPtr& out) {
 
 bool op::Exp::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Exp_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return expop::evaluate_exp(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/eye.cpp b/src/core/src/op/eye.cpp
index f3fb9d7b121852..97e3582e8b7591 100644
--- a/src/core/src/op/eye.cpp
+++ b/src/core/src/op/eye.cpp
@@ -71,7 +71,9 @@ void ov::op::v9::Eye::validate_and_infer_types() {
                               input_et);
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_out_type(), output_shape);
 }
 
@@ -112,8 +114,10 @@ bool ov::op::v9::Eye::has_evaluate() const {
 
 bool ov::op::v9::Eye::evaluate(const ov::HostTensorVector& outputs, const ov::HostTensorVector& inputs) const {
     OV_OP_SCOPE(v9_Eye_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(inputs, get_input_size()), "Invalid Eye input TensorVector.");
     OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(outputs, 1), "Invalid Eye output TensorVector.");
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     int64_t diagonal_index;
 
diff --git a/src/core/src/op/gather.cpp b/src/core/src/op/gather.cpp
index 24d12668ddc481..a5e69144829704 100644
--- a/src/core/src/op/gather.cpp
+++ b/src/core/src/op/gather.cpp
@@ -18,8 +18,11 @@ op::v1::Gather::Gather(const Output<Node>& params, const Output<Node>& indices,
 }
 
 int64_t ngraph::op::v1::Gather::get_axis() const {
-    if (!get_constant_from_source(input_value(2)))
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    if (!get_constant_from_source(input_value(2))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         return AXIS_NOT_SET_VALUE;
+    }
     return GatherBase::get_axis();
 }
 
diff --git a/src/core/src/op/gather_elements.cpp b/src/core/src/op/gather_elements.cpp
index 391e4bb6e6ce94..cb519d2c9bce34 100644
--- a/src/core/src/op/gather_elements.cpp
+++ b/src/core/src/op/gather_elements.cpp
@@ -30,7 +30,9 @@ void op::v6::GatherElements::validate_and_infer_types() {
                           "indices must be of int32 or int64 type. But instead got: ",
                           indices_type);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
     shape_infer(this, input_shapes, output_shapes);
     set_output_type(0, data_type, output_shapes[0]);
diff --git a/src/core/src/op/gather_tree.cpp b/src/core/src/op/gather_tree.cpp
index f2fd41e6850239..67c41e792655c6 100644
--- a/src/core/src/op/gather_tree.cpp
+++ b/src/core/src/op/gather_tree.cpp
@@ -59,6 +59,8 @@ void op::v1::GatherTree::validate_and_infer_types() {
                           "Element type of inputs must be numeric. Got: ",
                           result_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, ov::get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, result_et, output_shape);
 }
diff --git a/src/core/src/op/gelu.cpp b/src/core/src/op/gelu.cpp
index 3dbf5086d95cdd..a2a4d2f313e464 100644
--- a/src/core/src/op/gelu.cpp
+++ b/src/core/src/op/gelu.cpp
@@ -132,7 +132,9 @@ bool evaluate_gelu(const HostTensorPtr& arg0, const HostTensorPtr& out, op::Gelu
 
 bool op::v7::Gelu::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v7_Gelu_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return gelu::evaluate_gelu(inputs[0], outputs[0], m_approximation_mode);
 }
 
diff --git a/src/core/src/op/greater_eq.cpp b/src/core/src/op/greater_eq.cpp
index 8c9158f9c78aca..cbb7a4494e7e8f 100644
--- a/src/core/src/op/greater_eq.cpp
+++ b/src/core/src/op/greater_eq.cpp
@@ -68,7 +68,9 @@ shared_ptr<Node> op::v1::GreaterEqual::clone_with_new_inputs(const OutputVector&
 
 bool op::v1::GreaterEqual::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_GreaterEqual_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return greater_equalop::evaluate_greater_equal(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
diff --git a/src/core/src/op/grid_sample.cpp b/src/core/src/op/grid_sample.cpp
index b492cf744614b5..9262f115ab9024 100644
--- a/src/core/src/op/grid_sample.cpp
+++ b/src/core/src/op/grid_sample.cpp
@@ -127,8 +127,10 @@ bool evaluate_grid_sample(const HostTensorPtr& output,
 
 bool op::v9::GridSample::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v9_GridSample_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(inputs, 2), "Invalid GridSample input TensorVector.");
     OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(outputs, 1), "Invalid GridSample output TensorVector.");
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     return evaluate_grid_sample(outputs[0], inputs[0], inputs[1], m_attributes);
 }
diff --git a/src/core/src/op/group_conv.cpp b/src/core/src/op/group_conv.cpp
index a2ec0a220fb45e..3a7b28108307c0 100644
--- a/src/core/src/op/group_conv.cpp
+++ b/src/core/src/op/group_conv.cpp
@@ -56,7 +56,9 @@ void op::v1::GroupConvolution::validate_and_infer_types() {
                           "Element type of inputs must be numeric. Got: ",
                           result_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto num_spatial = convolution::calculate_num_spatial(this, input_shapes);
     if (num_spatial != convolution::num_spatial_undefined) {
@@ -153,7 +155,9 @@ bool op::v1::GroupConvolutionBackpropData::is_dynamic() const {
 const ov::PartialShape op::v1::GroupConvolutionBackpropData::get_convolution_output_shape() const {
     auto shape = PartialShape::dynamic();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (get_input_size() < 3 || !evaluate_as_partial_shape(input_value(2), shape)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto& data_rank = get_input_partial_shape(0).rank();
         const auto& filter_rank = get_input_partial_shape(1).rank();
 
@@ -239,7 +243,9 @@ void op::v1::GroupConvolutionBackpropData::validate_and_infer_types() {
                               ").");
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto out_spatial_shape = get_convolution_output_shape();
     auto num_spatial = convolution::calculate_num_spatial(this, input_shapes, out_spatial_shape);
 
diff --git a/src/core/src/op/gru_cell.cpp b/src/core/src/op/gru_cell.cpp
index cc1871dc76992f..038d783419fc42 100644
--- a/src/core/src/op/gru_cell.cpp
+++ b/src/core/src/op/gru_cell.cpp
@@ -97,7 +97,9 @@ void op::v3::GRUCell::validate_and_infer_types() {
                           "Element types for X, initial_hidden_state, W, R and B inputs do not "
                           "match.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes{ov::PartialShape::dynamic(2)};
     shape_infer(this, input_shapes, output_shapes);
 
diff --git a/src/core/src/op/gru_sequence.cpp b/src/core/src/op/gru_sequence.cpp
index e26a778f75450b..e5c0b0321a3d59 100644
--- a/src/core/src/op/gru_sequence.cpp
+++ b/src/core/src/op/gru_sequence.cpp
@@ -58,7 +58,9 @@ void op::v5::GRUSequence::validate_and_infer_types() {
                           "Element types for X, initial_hidden_state, W, R and B inputs do not "
                           "match.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic(4), ov::PartialShape::dynamic(3)};
     shape_infer(this, input_shapes, output_shapes);
 
diff --git a/src/core/src/op/hsigmoid.cpp b/src/core/src/op/hsigmoid.cpp
index a5fa489921eced..296a2bf868d5af 100644
--- a/src/core/src/op/hsigmoid.cpp
+++ b/src/core/src/op/hsigmoid.cpp
@@ -57,7 +57,9 @@ bool evaluate_hsigmoid(const HostTensorPtr& arg, const HostTensorPtr& out) {
 
 bool op::v5::HSigmoid::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v5_HSigmoid_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return evaluate_hsigmoid(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/hswish.cpp b/src/core/src/op/hswish.cpp
index 59c240de90f395..387c74aacceda5 100644
--- a/src/core/src/op/hswish.cpp
+++ b/src/core/src/op/hswish.cpp
@@ -58,7 +58,9 @@ bool evaluate_hswish(const HostTensorPtr& arg, const HostTensorPtr& out) {
 
 bool op::v4::HSwish::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_HSwish_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return hswish::evaluate_hswish(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/if.cpp b/src/core/src/op/if.cpp
index f04a0a2dc05b4f..cd008e419e30ba 100644
--- a/src/core/src/op/if.cpp
+++ b/src/core/src/op/if.cpp
@@ -98,7 +98,9 @@ void ov::op::v8::If::validate_and_infer_types() {
     }
 
     // Trying to get cond as const value
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& cond_value = get_constant_from_source(if_condition)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         // If cond is const shape and inference is run for one of bodies another body is skipped
         auto val = cond_value->cast_vector<bool>();
         NODE_VALIDATION_CHECK(this,
diff --git a/src/core/src/op/interpolate.cpp b/src/core/src/op/interpolate.cpp
index b34d39bc60ec63..a8946b78242cde 100644
--- a/src/core/src/op/interpolate.cpp
+++ b/src/core/src/op/interpolate.cpp
@@ -112,7 +112,9 @@ std::vector<int64_t> ov::op::v4::Interpolate::get_axes() const {
         return default_value;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto axes_node = get_constant_from_source(input_value(3));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     NODE_VALIDATION_CHECK(this, axes_node, "Input 'axes' should be Constant or foldable.");
 
     return axes_node->cast_vector<int64_t>();
diff --git a/src/core/src/op/logical_and.cpp b/src/core/src/op/logical_and.cpp
index 44c6729b346800..8464df3ed10ced 100644
--- a/src/core/src/op/logical_and.cpp
+++ b/src/core/src/op/logical_and.cpp
@@ -65,7 +65,9 @@ bool evaluate_logand(const HostTensorPtr& arg0,
 
 bool op::v1::LogicalAnd::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalAnd_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return logand::evaluate_logand(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
diff --git a/src/core/src/op/logical_not.cpp b/src/core/src/op/logical_not.cpp
index a3221ec973e937..a4e7a2a6688b0b 100644
--- a/src/core/src/op/logical_not.cpp
+++ b/src/core/src/op/logical_not.cpp
@@ -63,7 +63,9 @@ bool evaluate_not(const HostTensorPtr& arg0, const HostTensorPtr& out, const siz
 
 bool op::v1::LogicalNot::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalNot_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return notop::evaluate_not(inputs[0], outputs[0], inputs[0]->get_element_count());
 }
 
diff --git a/src/core/src/op/logical_or.cpp b/src/core/src/op/logical_or.cpp
index 15dbed38051e41..e6814304e3a718 100644
--- a/src/core/src/op/logical_or.cpp
+++ b/src/core/src/op/logical_or.cpp
@@ -59,7 +59,9 @@ bool evaluate_logor(const HostTensorPtr& arg0,
 
 bool op::v1::LogicalOr::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalOr_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return logor::evaluate_logor(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
diff --git a/src/core/src/op/loop.cpp b/src/core/src/op/loop.cpp
index 34ada949fd4988..ef1f3b6ccdb091 100644
--- a/src/core/src/op/loop.cpp
+++ b/src/core/src/op/loop.cpp
@@ -61,7 +61,9 @@ void op::v5::Loop::validate_and_infer_types() {
                               loop_condition_rank.compatible(1) || loop_condition_rank.compatible(0),
                               "Rank of ExecutionCondition input must be equal to 0 or 1");
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& cond_value = get_constant_from_source(loop_execution_condition)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto val = cond_value->cast_vector<bool>();
         NODE_VALIDATION_CHECK(this,
                               val.size() == 1,
@@ -85,7 +87,9 @@ void op::v5::Loop::validate_and_infer_types() {
                               body_condition_rank.compatible(0) || body_condition_rank.compatible(1),
                               "Rank of BodyExecutionCondition output must be equal to 0 or 1");
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& cond_value = get_constant_from_source(body_execution_condition)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto val = cond_value->cast_vector<bool>();
         NODE_VALIDATION_CHECK(this,
                               val.size() == 1,
@@ -101,7 +105,9 @@ void op::v5::Loop::validate_and_infer_types() {
         // Const(true or false) -> Loop (body: Parameter -> execution_condition output)
         for (const auto& desc : get_input_descriptions()) {
             if (m_bodies[0]->get_parameters().at(desc->m_body_parameter_index) == cond_param) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 if (const auto& cond_value = get_constant_from_source(input_value(desc->m_input_index))) {
+                    OPENVINO_SUPPRESS_DEPRECATED_END
                     auto val = cond_value->cast_vector<bool>();
                     NODE_VALIDATION_CHECK(this,
                                           val.size() == 1,
@@ -124,7 +130,9 @@ void op::v5::Loop::validate_and_infer_types() {
                               trip_count_rank.compatible(1) || trip_count_rank.compatible(0),
                               "Rank of TripCount input must be equal to 0 or 1");
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& trip_count_val = get_constant_from_source(trip_count)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto val = trip_count_val->cast_vector<int64_t>();
         NODE_VALIDATION_CHECK(this,
                               val.size() == 1,
@@ -168,8 +176,10 @@ void op::v5::Loop::validate_and_infer_types() {
                 body_parameter->set_partial_shape(ov::PartialShape::dynamic());
             } else {
                 auto out_shape = input_partial_shape;
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 const auto axis =
                     ngraph::normalize_axis(this, slice_input_description->m_axis, input_partial_shape.rank());
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 out_shape[axis] = slice_input_description->m_part_size;
                 body_parameter->set_partial_shape(out_shape);
             }
@@ -278,7 +288,9 @@ void op::v5::Loop::validate_and_infer_types() {
             if (zero_number_of_iter) {
                 out_shape = ov::PartialShape{0};
             } else if (out_shape.rank().is_static()) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 const auto axis = ngraph::normalize_axis(this, concat_output_description->m_axis, out_shape.rank());
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 const auto rank = out_shape.rank().get_length();
                 if (rank == 0) {
                     out_shape = ov::PartialShape{1};
diff --git a/src/core/src/op/lrn.cpp b/src/core/src/op/lrn.cpp
index ff8b3ea4f92654..05b7ea406dae5c 100644
--- a/src/core/src/op/lrn.cpp
+++ b/src/core/src/op/lrn.cpp
@@ -29,8 +29,11 @@ op::LRN::LRN(const Output<Node>& arg, const Output<Node>& axes, double alpha, do
 AxisSet op::LRN::get_reduction_axes() const {
     AxisSet axes{1};  // channel axis as default
     auto axes_input_node = input_value(1).get_node_shared_ptr();
-    if (const auto& const_op = get_constant_from_source(axes_input_node))
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    if (const auto& const_op = get_constant_from_source(axes_input_node)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         axes = const_op->get_axis_set_val();
+    }
     return axes;
 }
 
diff --git a/src/core/src/op/max_pool.cpp b/src/core/src/op/max_pool.cpp
index 68c31b26a16da2..7651ea2979e332 100644
--- a/src/core/src/op/max_pool.cpp
+++ b/src/core/src/op/max_pool.cpp
@@ -41,7 +41,9 @@ bool ngraph::op::v1::MaxPool::visit_attributes(AttributeVisitor& visitor) {
 void op::v1::MaxPool::validate_and_infer_types() {
     OV_OP_SCOPE(v1_MaxPool_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shapes.front());
 }
 
@@ -275,10 +277,14 @@ void op::v8::MaxPool::validate_and_infer_types() {
 
     const auto input_shape = get_input_partial_shape(0);
     if (input_shape.rank().is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         m_axis = ngraph::normalize_axis(this, m_axis, input_shape.rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this), m_pads_begin, m_pads_end);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
     set_output_type(1, m_index_element_type, output_shapes[1]);
 }
diff --git a/src/core/src/op/mish.cpp b/src/core/src/op/mish.cpp
index 51b47e157c9c44..3c8f9479ad3d53 100644
--- a/src/core/src/op/mish.cpp
+++ b/src/core/src/op/mish.cpp
@@ -71,7 +71,9 @@ bool evaluate_mish(const HostTensorPtr& arg0, const HostTensorPtr& out) {
 
 bool op::v4::Mish::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_Mish_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return mish::evaluate_mish(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/negative.cpp b/src/core/src/op/negative.cpp
index 64351efb0cbf15..e7f7461afdf6b4 100644
--- a/src/core/src/op/negative.cpp
+++ b/src/core/src/op/negative.cpp
@@ -57,8 +57,10 @@ bool evaluate_negative(const HostTensorPtr& arg0, const HostTensorPtr& out, cons
 
 bool op::Negative::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Negative_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 1));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return negativeop::evaluate_negative(inputs[0], outputs[0], shape_size(outputs[0]->get_shape()));
 }
 
diff --git a/src/core/src/op/non_max_suppression.cpp b/src/core/src/op/non_max_suppression.cpp
index 60ce03c95faf39..19d923019ac259 100644
--- a/src/core/src/op/non_max_suppression.cpp
+++ b/src/core/src/op/non_max_suppression.cpp
@@ -154,7 +154,9 @@ void op::v1::NonMaxSuppression::validate_and_infer_types() {
                           "The last dimension of the 'boxes' input must be equal to 4. Got:",
                           boxes_ps[2]);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto& max_output_boxes_input = get_constant_from_source(input_value(2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && max_output_boxes_input) {
         const auto num_boxes = num_boxes_boxes.get_length();
         const auto max_output_boxes_per_class = max_output_boxes_input->cast_vector<int64_t>().at(0);
@@ -168,7 +170,9 @@ void op::v1::NonMaxSuppression::validate_and_infer_types() {
 int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const {
     int64_t max_output_boxes{0};
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto max_output_boxes_input = get_constant_from_source(input_value(2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
 
     return max_output_boxes;
@@ -336,7 +340,9 @@ void op::v3::NonMaxSuppression::validate_and_infer_types() {
 
     if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) {
         const auto num_boxes_boxes = boxes_ps[1];
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto max_output_boxes_input = get_constant_from_source(input_value(2));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && max_output_boxes_input) {
             const auto num_boxes = num_boxes_boxes.get_length();
             const auto num_classes = scores_ps[1].get_length();
@@ -351,7 +357,9 @@ void op::v3::NonMaxSuppression::validate_and_infer_types() {
 int64_t op::v3::NonMaxSuppression::max_boxes_output_from_input() const {
     int64_t max_output_boxes{0};
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto max_output_boxes_input = get_constant_from_source(input_value(2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
 
     return max_output_boxes;
@@ -444,7 +452,9 @@ void op::v4::NonMaxSuppression::validate_and_infer_types() {
 
     if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) {
         const auto num_boxes_boxes = boxes_ps[1];
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto max_output_boxes_input = get_constant_from_source(input_value(2));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static() &&
             max_output_boxes_input) {
             const auto num_boxes = num_boxes_boxes.get_length();
@@ -713,7 +723,9 @@ int64_t op::v5::NonMaxSuppression::max_boxes_output_from_input() const {
         return 0;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto max_output_boxes_input = get_constant_from_source(input_value(max_output_boxes_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
 
     return max_output_boxes;
@@ -726,7 +738,9 @@ float op::v5::NonMaxSuppression::iou_threshold_from_input() const {
         return iou_threshold;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto iou_threshold_input = get_constant_from_source(input_value(iou_threshold_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     iou_threshold = iou_threshold_input->cast_vector<float>().at(0);
 
     return iou_threshold;
@@ -739,7 +753,9 @@ float op::v5::NonMaxSuppression::score_threshold_from_input() const {
         return score_threshold;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto score_threshold_input = get_constant_from_source(input_value(score_threshold_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     score_threshold = score_threshold_input->cast_vector<float>().at(0);
 
     return score_threshold;
@@ -752,7 +768,9 @@ float op::v5::NonMaxSuppression::soft_nms_sigma_from_input() const {
         return soft_nms_sigma;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto soft_nms_sigma_input = get_constant_from_source(input_value(soft_nms_sigma_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     soft_nms_sigma = soft_nms_sigma_input->cast_vector<float>().at(0);
 
     return soft_nms_sigma;
@@ -1021,7 +1039,9 @@ int64_t op::v9::NonMaxSuppression::max_boxes_output_from_input() const {
         return 0;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto max_output_boxes_input = get_constant_from_source(input_value(max_output_boxes_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
 
     return max_output_boxes;
@@ -1034,7 +1054,9 @@ float op::v9::NonMaxSuppression::iou_threshold_from_input() const {
         return iou_threshold;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto iou_threshold_input = get_constant_from_source(input_value(iou_threshold_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     iou_threshold = iou_threshold_input->cast_vector<float>().at(0);
 
     return iou_threshold;
@@ -1047,7 +1069,9 @@ float op::v9::NonMaxSuppression::score_threshold_from_input() const {
         return score_threshold;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto score_threshold_input = get_constant_from_source(input_value(score_threshold_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     score_threshold = score_threshold_input->cast_vector<float>().at(0);
 
     return score_threshold;
@@ -1060,7 +1084,9 @@ float op::v9::NonMaxSuppression::soft_nms_sigma_from_input() const {
         return soft_nms_sigma;
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto soft_nms_sigma_input = get_constant_from_source(input_value(soft_nms_sigma_port));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     soft_nms_sigma = soft_nms_sigma_input->cast_vector<float>().at(0);
 
     return soft_nms_sigma;
diff --git a/src/core/src/op/non_zero.cpp b/src/core/src/op/non_zero.cpp
index c98b145808a221..16caf890782fa0 100644
--- a/src/core/src/op/non_zero.cpp
+++ b/src/core/src/op/non_zero.cpp
@@ -56,7 +56,9 @@ void op::v3::NonZero::validate_and_infer_types() {
 
     set_input_is_relevant_to_shape(0);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& input_constant = get_constant_from_source(input_value(0))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         // input_value is available to calculate output shape
         const auto& input_data = std::make_shared<HostTensor>(input_constant);
         auto output = std::make_shared<HostTensor>(m_output_type, get_output_partial_shape(0));
diff --git a/src/core/src/op/normalize_l2.cpp b/src/core/src/op/normalize_l2.cpp
index 58e616e2a19bad..a714836c7f214f 100644
--- a/src/core/src/op/normalize_l2.cpp
+++ b/src/core/src/op/normalize_l2.cpp
@@ -66,10 +66,14 @@ void op::v0::NormalizeL2::validate_and_infer_types() {
 
 AxisSet op::v0::NormalizeL2::get_reduction_axes() const {
     AxisSet axes;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto const_op = get_constant_from_source(input_value(1))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto const_data = const_op->cast_vector<int64_t>();
         const auto input_data_rank = get_input_partial_shape(0).rank();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto normalized_axes = ov::normalize_axes(get_friendly_name(), const_data, input_data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         axes = AxisSet{normalized_axes};
     }
     return axes;
diff --git a/src/core/src/op/not_equal.cpp b/src/core/src/op/not_equal.cpp
index bfc07058bfe340..f0971c1e5a371d 100644
--- a/src/core/src/op/not_equal.cpp
+++ b/src/core/src/op/not_equal.cpp
@@ -65,7 +65,9 @@ shared_ptr<Node> op::v1::NotEqual::clone_with_new_inputs(const OutputVector& new
 
 bool op::v1::NotEqual::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_NotEqual_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return not_equalop::evaluate_not_equal(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
diff --git a/src/core/src/op/one_hot.cpp b/src/core/src/op/one_hot.cpp
index adc6373075d7b0..5c90ec2539bc20 100644
--- a/src/core/src/op/one_hot.cpp
+++ b/src/core/src/op/one_hot.cpp
@@ -106,8 +106,10 @@ bool evaluate_onehot(const HostTensorVector& output_values, const HostTensorVect
 
 bool op::v1::OneHot::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v1_OneHot_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 4));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto& ind_Pshape = input_values[0]->get_partial_shape();
     const auto& out_Pshape = output_values[0]->get_partial_shape();
diff --git a/src/core/src/op/pad.cpp b/src/core/src/op/pad.cpp
index f601ff142357a8..1669b8882611e7 100644
--- a/src/core/src/op/pad.cpp
+++ b/src/core/src/op/pad.cpp
@@ -45,7 +45,9 @@ op::v1::Pad::Pad(const Output<Node>& arg,
 
 CoordinateDiff op::v1::Pad::get_pads_begin() const {
     CoordinateDiff pads_begin_coord{};
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto pads_begin_const = get_constant_from_source(input_value(1))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         pads_begin_coord = pads_begin_const->cast_vector<ptrdiff_t>();
     }
     return pads_begin_coord;
@@ -53,7 +55,9 @@ CoordinateDiff op::v1::Pad::get_pads_begin() const {
 
 CoordinateDiff op::v1::Pad::get_pads_end() const {
     CoordinateDiff pads_end_coord{};
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto pads_end_const = get_constant_from_source(input_value(2))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         pads_end_coord = pads_end_const->cast_vector<ptrdiff_t>();
     }
     return pads_end_coord;
@@ -104,7 +108,9 @@ void op::v1::Pad::validate_and_infer_types() {
                           pads_end_element_type,
                           ").");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, result_et, output_shapes[0]);
 }
 
@@ -183,5 +189,7 @@ bool op::v1::Pad::evaluate_upper(ov::TensorVector& output_values) const {
 
 bool op::v1::Pad::evaluate_label(ov::TensorLabelVector& output_labels) const {
     OV_OP_SCOPE(v1_Pad_evaluate_label);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return ov::default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/prelu.cpp b/src/core/src/op/prelu.cpp
index 6aadeb877206d4..6044ee722d8d5e 100644
--- a/src/core/src/op/prelu.cpp
+++ b/src/core/src/op/prelu.cpp
@@ -64,7 +64,9 @@ bool evaluate_prelu(const ngraph::HostTensorPtr& arg,
 
 bool ov::op::v0::PRelu::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_PRelu_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(ngraph::validate_host_tensor_vector(outputs, 1) && ngraph::validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return prelu::evaluate_prelu(inputs[0], inputs[1], outputs[0]);
 }
 
diff --git a/src/core/src/op/prior_box.cpp b/src/core/src/op/prior_box.cpp
index cef4a6f1f542c3..f84806a9e72b15 100644
--- a/src/core/src/op/prior_box.cpp
+++ b/src/core/src/op/prior_box.cpp
@@ -50,7 +50,9 @@ void op::v0::PriorBox::validate_and_infer_types() {
     set_input_is_relevant_to_shape(0);
 
     PartialShape spatials;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (evaluate_as_partial_shape(input_value(0), spatials)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         NODE_VALIDATION_CHECK(this,
                               spatials.rank().is_static() && spatials.size() == 2,
                               "Layer shape must have rank 2",
@@ -237,7 +239,9 @@ void op::v8::PriorBox::validate_and_infer_types() {
     set_input_is_relevant_to_shape(0);
 
     PartialShape spatials;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (evaluate_as_partial_shape(input_value(0), spatials)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         NODE_VALIDATION_CHECK(this,
                               spatials.rank().is_static() && spatials.size() == 2,
                               "Layer shape must have rank 2",
diff --git a/src/core/src/op/prior_box_clustered.cpp b/src/core/src/op/prior_box_clustered.cpp
index c06c8a80f47d29..61e298a7f642f4 100644
--- a/src/core/src/op/prior_box_clustered.cpp
+++ b/src/core/src/op/prior_box_clustered.cpp
@@ -55,7 +55,9 @@ void ov::op::v0::PriorBoxClustered::validate_and_infer_types() {
 
     set_input_is_relevant_to_shape(0);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto const_shape = get_constant_from_source(input_value(0).get_node_shared_ptr())) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         NODE_VALIDATION_CHECK(this,
                               shape_size(const_shape->get_shape()) == 2,
                               "Layer shape must have rank 2",
diff --git a/src/core/src/op/psroi_pooling.cpp b/src/core/src/op/psroi_pooling.cpp
index 44173d09612199..7b4e2748960398 100644
--- a/src/core/src/op/psroi_pooling.cpp
+++ b/src/core/src/op/psroi_pooling.cpp
@@ -55,7 +55,9 @@ void PSROIPooling::validate_and_infer_types() {
                           coords_et.is_real(),
                           "Coords' data type must be floating point. Got " + coords_et.get_type_name());
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, feat_maps_et, output_shapes[0]);
 }
 
diff --git a/src/core/src/op/random_uniform.cpp b/src/core/src/op/random_uniform.cpp
index 602fe00ea562b6..b756d529c292a4 100644
--- a/src/core/src/op/random_uniform.cpp
+++ b/src/core/src/op/random_uniform.cpp
@@ -40,7 +40,9 @@ void op::v8::RandomUniform::validate_and_infer_types() {
                               input_shape.rank() == 1,
                               "The rank of the tensor defining output shape must be equal to 1.");
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         if (const auto& const_shape = get_constant_from_source(input_value(0))) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             output_shape = ov::PartialShape(const_shape->cast_vector<int64_t>());
         } else {
             output_shape = ov::PartialShape::dynamic(input_shape[0]);
@@ -76,8 +78,10 @@ void op::v8::RandomUniform::validate_and_infer_types() {
                           min_element_type == get_out_type(),
                           "'min_val' and 'max_val' should have the same type as 'out_type' attribute.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& const_min = get_constant_from_source(input_value(1))) {
         if (const auto& const_max = get_constant_from_source(input_value(2))) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
             if (get_out_type() == ngraph::element::Type_t::i64 || get_out_type() == ngraph::element::Type_t::i32) {
                 int64_t min_val = const_min->cast_vector<int64_t>()[0];
                 int64_t max_val = const_max->cast_vector<int64_t>()[0];
diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp
index 5532a4c0f2aab9..77d11edcb28ebe 100644
--- a/src/core/src/op/range.cpp
+++ b/src/core/src/op/range.cpp
@@ -272,9 +272,11 @@ adjust_for_step_and_sign(T span, T step) {
 
 template <typename T>
 static ov::PartialShape infer_output_shape(const op::v0::Range* node, const element::Type& /* et */) {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto const_start = get_constant_from_source(node->input_value(0));
     auto const_stop = get_constant_from_source(node->input_value(1));
     auto const_step = get_constant_from_source(node->input_value(2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     T start = static_cast<T>(0);
     T stop = static_cast<T>(0);
diff --git a/src/core/src/op/reduce_l1.cpp b/src/core/src/op/reduce_l1.cpp
index 7e727a9e71fa16..622bc457851282 100644
--- a/src/core/src/op/reduce_l1.cpp
+++ b/src/core/src/op/reduce_l1.cpp
@@ -55,8 +55,10 @@ bool evaluate_sum(const HostTensorPtr& arg, const HostTensorPtr& out, const Axis
 
 bool op::v4::ReduceL1::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_ReduceL1_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_l2.cpp b/src/core/src/op/reduce_l2.cpp
index 5a37d7d8883ca4..6a59a62885ad1a 100644
--- a/src/core/src/op/reduce_l2.cpp
+++ b/src/core/src/op/reduce_l2.cpp
@@ -53,8 +53,10 @@ bool evaluate_reduce_l2(const HostTensorPtr& arg, const HostTensorPtr& out, cons
 
 bool op::v4::ReduceL2::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_ReduceL2_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_logical_and.cpp b/src/core/src/op/reduce_logical_and.cpp
index 5e2a7267c40ac4..7033f76a78fe37 100644
--- a/src/core/src/op/reduce_logical_and.cpp
+++ b/src/core/src/op/reduce_logical_and.cpp
@@ -51,8 +51,11 @@ bool evaluate_reduce_logical_and(const HostTensorPtr& data,
 
 bool op::v1::ReduceLogicalAnd::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceLogicalAnd_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
+
     const auto& data = inputs[0];
     const auto& axes = inputs[1];
     const auto& out = outputs[0];
diff --git a/src/core/src/op/reduce_logical_or.cpp b/src/core/src/op/reduce_logical_or.cpp
index fecd760fbabb1e..e401d7d27a5622 100644
--- a/src/core/src/op/reduce_logical_or.cpp
+++ b/src/core/src/op/reduce_logical_or.cpp
@@ -51,8 +51,11 @@ bool evaluate_reduce_logical_or(const HostTensorPtr& data,
 
 bool op::v1::ReduceLogicalOr::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceLogicalOr_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
+
     const auto& data = inputs[0];
     const auto& axes = inputs[1];
     const auto& out = outputs[0];
diff --git a/src/core/src/op/reduce_max.cpp b/src/core/src/op/reduce_max.cpp
index 0fa1c4f30a80db..d6faa03bac7529 100644
--- a/src/core/src/op/reduce_max.cpp
+++ b/src/core/src/op/reduce_max.cpp
@@ -57,8 +57,10 @@ shared_ptr<Node> op::v1::ReduceMax::clone_with_new_inputs(const OutputVector& ne
 
 bool op::v1::ReduceMax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceMax_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_mean.cpp b/src/core/src/op/reduce_mean.cpp
index 9f7f7bf04299db..48885583431782 100644
--- a/src/core/src/op/reduce_mean.cpp
+++ b/src/core/src/op/reduce_mean.cpp
@@ -57,8 +57,10 @@ bool evaluate_mean(const HostTensorPtr& arg, const HostTensorPtr& out, const Axi
 
 bool op::v1::ReduceMean::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceMean_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_min.cpp b/src/core/src/op/reduce_min.cpp
index 6066ace0c26647..0bc74d92232833 100644
--- a/src/core/src/op/reduce_min.cpp
+++ b/src/core/src/op/reduce_min.cpp
@@ -57,8 +57,10 @@ shared_ptr<Node> op::v1::ReduceMin::clone_with_new_inputs(const OutputVector& ne
 
 bool op::v1::ReduceMin::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceMin_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_prod.cpp b/src/core/src/op/reduce_prod.cpp
index f015a2dcf56d8c..fd7a67dc3701c3 100644
--- a/src/core/src/op/reduce_prod.cpp
+++ b/src/core/src/op/reduce_prod.cpp
@@ -55,8 +55,10 @@ bool evaluate_product(const HostTensorPtr& arg, const HostTensorPtr& out, const
 
 bool op::v1::ReduceProd::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceProd_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/reduce_sum.cpp b/src/core/src/op/reduce_sum.cpp
index df1f7aaaf881f2..e945b7ecafc9fd 100644
--- a/src/core/src/op/reduce_sum.cpp
+++ b/src/core/src/op/reduce_sum.cpp
@@ -58,8 +58,10 @@ bool evaluate_sum(const HostTensorPtr& arg, const HostTensorPtr& out, const Axis
 
 bool op::v1::ReduceSum::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_ReduceSum_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto reduction_axes =
         get_normalized_axes_from_tensor(inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name());
diff --git a/src/core/src/op/relu.cpp b/src/core/src/op/relu.cpp
index 6d862359c84c1e..f2ab8190a309d7 100644
--- a/src/core/src/op/relu.cpp
+++ b/src/core/src/op/relu.cpp
@@ -56,7 +56,9 @@ bool evaluate_relu(const HostTensorPtr& arg0, const HostTensorPtr& out) {
 
 bool op::Relu::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Relu_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return relu::evaluate_relu(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/reshape.cpp b/src/core/src/op/reshape.cpp
index 4182e7aaad462d..5bfd9993d76bcf 100644
--- a/src/core/src/op/reshape.cpp
+++ b/src/core/src/op/reshape.cpp
@@ -190,8 +190,10 @@ bool op::v1::Reshape::evaluate_reshape(const HostTensorVector& outputs, const Ho
 
 bool op::v1::Reshape::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_Reshape_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return evaluate_reshape(outputs, inputs);
 }
 
@@ -224,7 +226,9 @@ bool op::v1::Reshape::evaluate_upper(ov::TensorVector& output_values) const {
 bool op::v1::Reshape::evaluate_label(TensorLabelVector& output_labels) const {
     if (!get_input_tensor(1).has_and_set_bound())
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 bool op::v1::Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) {
diff --git a/src/core/src/op/reverse.cpp b/src/core/src/op/reverse.cpp
index af1d6ac8b06100..b9afee30fe63d3 100644
--- a/src/core/src/op/reverse.cpp
+++ b/src/core/src/op/reverse.cpp
@@ -50,7 +50,9 @@ void op::v1::Reverse::validate_and_infer_types() {
                               "In 'index' mode the second input must contain integer values.");
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/reverse_sequence.cpp b/src/core/src/op/reverse_sequence.cpp
index dc476232a27ffb..af6ab100e627fc 100644
--- a/src/core/src/op/reverse_sequence.cpp
+++ b/src/core/src/op/reverse_sequence.cpp
@@ -41,10 +41,14 @@ void op::ReverseSequence::validate_and_infer_types() {
                           "Sequence lengths element type must be numeric type. Got: ",
                           seq_lengths_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     m_normalized_seq_axis = ov::normalize_axis(this, m_seq_axis, get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 shared_ptr<Node> op::ReverseSequence::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -59,10 +63,14 @@ void op::ReverseSequence::set_batch_axis(int64_t batch_axis) {
 
 size_t op::ReverseSequence::get_batch_axis() const {
     const auto& data_rank = get_input_partial_shape(0).rank();
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return static_cast<size_t>(ov::normalize_axis(this, m_batch_axis, data_rank));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 void op::ReverseSequence::set_sequence_axis(int64_t sequence_axis) {
     m_seq_axis = sequence_axis;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     m_normalized_seq_axis = ov::normalize_axis(this, m_seq_axis, get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/roi_pooling.cpp b/src/core/src/op/roi_pooling.cpp
index 00ee8dacf46447..7b508e46a14338 100644
--- a/src/core/src/op/roi_pooling.cpp
+++ b/src/core/src/op/roi_pooling.cpp
@@ -43,7 +43,9 @@ void ROIPooling::validate_and_infer_types() {
                           " and: ",
                           coords_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, feat_maps_et, output_shapes[0]);
 
     const auto& feat_maps_ps = get_input_partial_shape(0);
diff --git a/src/core/src/op/roll.cpp b/src/core/src/op/roll.cpp
index f68be96300c216..01fc204d152dcc 100644
--- a/src/core/src/op/roll.cpp
+++ b/src/core/src/op/roll.cpp
@@ -29,7 +29,9 @@ void Roll::validate_and_infer_types() {
                           axes_et.is_dynamic() || axes_et == element::i32 || axes_et == element::i64,
                           "Axes must have int32 or int64 element type.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/scatter_elements_update.cpp b/src/core/src/op/scatter_elements_update.cpp
index 365745255332e2..721ef2255ce911 100644
--- a/src/core/src/op/scatter_elements_update.cpp
+++ b/src/core/src/op/scatter_elements_update.cpp
@@ -52,7 +52,9 @@ void op::v3::ScatterElementsUpdate::validate_and_infer_types() {
                           " and: ",
                           updates_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, data_et, output_shape);
     if (output_shape.is_dynamic())
         set_input_is_relevant_to_shape(0);
@@ -198,9 +200,13 @@ bool op::v3::ScatterElementsUpdate::evaluate_scatter_element_update(const HostTe
 
     if (normalized_axis < 0) {
         if (input_rank.is_static()) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             normalized_axis = ngraph::normalize_axis(this, axis, input_rank);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         } else {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             normalized_axis = ngraph::normalize_axis(this, axis, static_cast<int64_t>(inputs[0]->get_shape().size()));
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
     }
 
@@ -261,5 +267,7 @@ bool op::v3::ScatterElementsUpdate::evaluate_upper(ov::TensorVector& output_valu
 bool op::v3::ScatterElementsUpdate::evaluate_label(TensorLabelVector& output_labels) const {
     OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_label);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return ov::default_label_evaluator(this, {0, 2}, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/scatter_nd_update.cpp b/src/core/src/op/scatter_nd_update.cpp
index e67a4e992c6e1d..8ba94140f5e4e4 100644
--- a/src/core/src/op/scatter_nd_update.cpp
+++ b/src/core/src/op/scatter_nd_update.cpp
@@ -82,8 +82,10 @@ bool evaluate_scatter(const HostTensorPtr& arg0,
 bool op::v3::ScatterNDUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate);
     NGRAPH_CHECK(!inputs.empty());
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 3));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     return scatter::evaluate_scatter(inputs[0], inputs[1], inputs[2], outputs[0]);
 }
@@ -126,5 +128,7 @@ bool op::v3::ScatterNDUpdate::evaluate_upper(ov::TensorVector& output_values) co
 bool op::v3::ScatterNDUpdate::evaluate_label(TensorLabelVector& output_labels) const {
     OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_label);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return ov::default_label_evaluator(this, {0, 2}, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/scatter_update.cpp b/src/core/src/op/scatter_update.cpp
index 1b754dc0a22143..2d3cb0c28eb3b6 100644
--- a/src/core/src/op/scatter_update.cpp
+++ b/src/core/src/op/scatter_update.cpp
@@ -58,7 +58,9 @@ bool op::v3::ScatterUpdate::evaluate_scatter_update(const HostTensorVector& outp
 
     int64_t axis_val = host_tensor_2_vector<int64_t>(axis)[0];
     if (axis_val < 0) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis_val = ngraph::normalize_axis(this, axis_val, static_cast<int64_t>(data->get_shape().size()));
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     std::vector<int64_t> indices_casted_vector;
@@ -126,5 +128,7 @@ bool op::v3::ScatterUpdate::has_evaluate() const {
 
 bool op::v3::ScatterUpdate::evaluate_label(TensorLabelVector& output_labels) const {
     OV_OP_SCOPE(v3_ScatterUpdate_evaluate_label);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return ov::default_label_evaluator(this, {0, 2}, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/select.cpp b/src/core/src/op/select.cpp
index da9bb36d17b1e8..c7df95bbd138ec 100644
--- a/src/core/src/op/select.cpp
+++ b/src/core/src/op/select.cpp
@@ -39,7 +39,9 @@ void op::v1::Select::validate_and_infer_types() {
                           element::Type::merge(result_et, get_input_element_type(1), get_input_element_type(2)),
                           "Argument 1 and 2 element types must match.");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
 
     shape_infer(this, input_shapes, output_shapes);
@@ -115,8 +117,10 @@ bool evaluate_select(const HostTensorVector& output_values,
 
 bool op::v1::Select::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v1_Select_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 3));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto autob = get_auto_broadcast();
     return detail::evaluate_select(output_values, input_values, autob, output_values[0]->get_element_type());
 }
diff --git a/src/core/src/op/shape_of.cpp b/src/core/src/op/shape_of.cpp
index 5d499a56b250b3..d8b796acd1f655 100644
--- a/src/core/src/op/shape_of.cpp
+++ b/src/core/src/op/shape_of.cpp
@@ -173,8 +173,10 @@ bool evaluate_label(const Node* shape_of_node, TensorLabelVector& output_labels)
 
 bool op::v3::ShapeOf::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v3_ShapeOf_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 1));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return shape_of::evaluate_shape_of(output_values[0], input_values[0]);
 }
 
@@ -251,8 +253,10 @@ shared_ptr<Node> op::v0::ShapeOf::clone_with_new_inputs(const OutputVector& new_
 
 bool op::v0::ShapeOf::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v0_ShapeOf_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 1));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return shape_of::evaluate_shape_of(output_values[0], input_values[0]);
 }
 
diff --git a/src/core/src/op/shuffle_channels.cpp b/src/core/src/op/shuffle_channels.cpp
index 51b057c9f7c3b3..d6c7afe64971f8 100644
--- a/src/core/src/op/shuffle_channels.cpp
+++ b/src/core/src/op/shuffle_channels.cpp
@@ -37,7 +37,9 @@ bool ngraph::op::v0::ShuffleChannels::visit_attributes(AttributeVisitor& visitor
 size_t op::ShuffleChannels::get_zero_based_axis() const {
     const auto input_rank = get_input_partial_shape(0).rank();
     if (input_rank.is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         return ov::normalize_axis(this, m_axis, input_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     } else {
         throw ngraph_error("Cannot request zero-based axis with a input of unknown rank");
     }
@@ -46,7 +48,9 @@ size_t op::ShuffleChannels::get_zero_based_axis() const {
 void op::ShuffleChannels::validate_and_infer_types() {
     OV_OP_SCOPE(v0_ShuffleChannels_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/sigmoid.cpp b/src/core/src/op/sigmoid.cpp
index f90d52c7306b74..12f7e9cb75b4e7 100644
--- a/src/core/src/op/sigmoid.cpp
+++ b/src/core/src/op/sigmoid.cpp
@@ -57,7 +57,9 @@ bool evaluate_sigmoid(const HostTensorPtr& arg0, const HostTensorPtr& out) {
 
 bool ov::op::v0::Sigmoid::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Sigmoid_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return sigmoid::evaluate_sigmoid(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/sign.cpp b/src/core/src/op/sign.cpp
index b46225dcdf0f92..c202c1f2acd2c5 100644
--- a/src/core/src/op/sign.cpp
+++ b/src/core/src/op/sign.cpp
@@ -58,7 +58,9 @@ bool evaluate_sign(const HostTensorPtr& arg0, const HostTensorPtr& out, const si
 
 bool op::Sign::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Sign_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return signop::evaluate_sign(inputs[0], outputs[0], shape_size(get_output_shape(0)));
 }
 
diff --git a/src/core/src/op/sinh.cpp b/src/core/src/op/sinh.cpp
index d945387df00ddb..3594be082b0552 100644
--- a/src/core/src/op/sinh.cpp
+++ b/src/core/src/op/sinh.cpp
@@ -58,7 +58,9 @@ bool evaluate_sinh(const HostTensorPtr& arg0, const HostTensorPtr& out, const si
 
 bool op::Sinh::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Sinh_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return sinhop::evaluate_sinh(inputs[0], outputs[0], shape_size(get_output_shape(0)));
 }
 
diff --git a/src/core/src/op/slice.cpp b/src/core/src/op/slice.cpp
index 0c2a489693e34d..4d313913ff4ddb 100644
--- a/src/core/src/op/slice.cpp
+++ b/src/core/src/op/slice.cpp
@@ -87,7 +87,9 @@ void op::v8::Slice::validate_and_infer_types() {
         set_input_is_relevant_to_shape(i);
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes = {ov::PartialShape::dynamic()};
 
     shape_infer(this, input_shapes, output_shapes);
@@ -216,5 +218,7 @@ bool op::v8::Slice::evaluate_upper(ov::TensorVector& output_values) const {
 bool op::v8::Slice::evaluate_label(TensorLabelVector& output_labels) const {
     if (!slice_input_check(this))
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/softmax.cpp b/src/core/src/op/softmax.cpp
index ef02e146e623c1..ab189fc9184324 100644
--- a/src/core/src/op/softmax.cpp
+++ b/src/core/src/op/softmax.cpp
@@ -74,7 +74,9 @@ shared_ptr<Node> op::v1::Softmax::clone_with_new_inputs(const OutputVector& new_
 
 bool op::v1::Softmax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_Softmax_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     outputs[0]->set_unary(inputs[0]);
     return evaluate_softmax(inputs[0], outputs[0], AxisSet{m_axis});
 }
@@ -129,7 +131,9 @@ shared_ptr<Node> op::v8::Softmax::clone_with_new_inputs(const OutputVector& new_
 
 bool op::v8::Softmax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v8_Softmax_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     outputs[0]->set_unary(inputs[0]);
     auto rank = static_cast<int64_t>(inputs[0]->get_shape().size());
     NGRAPH_CHECK(-rank <= m_axis && m_axis < rank,
@@ -138,7 +142,9 @@ bool op::v8::Softmax::evaluate(const HostTensorVector& outputs, const HostTensor
                  ") is out of bounds (argument shape: ",
                  inputs[0]->get_shape(),
                  ").");
+    OPENVINO_SUPPRESS_DEPRECATED_START
     size_t axis = static_cast<size_t>(ov::normalize_axis(this->description(), m_axis, rank));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return evaluate_softmax(inputs[0], outputs[0], AxisSet{axis});
 }
 
diff --git a/src/core/src/op/softplus.cpp b/src/core/src/op/softplus.cpp
index e0419291dc70ab..e0f8cf9a5a33ae 100644
--- a/src/core/src/op/softplus.cpp
+++ b/src/core/src/op/softplus.cpp
@@ -71,7 +71,9 @@ bool evaluate_softplus(const HostTensorPtr& arg, const HostTensorPtr& out) {
 
 bool op::v4::SoftPlus::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_SoftPlus_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return softplus::evaluate_softplus(inputs[0], outputs[0]);
 }
 
diff --git a/src/core/src/op/space_to_batch.cpp b/src/core/src/op/space_to_batch.cpp
index 92c9d95f20a3ae..57721c5e120472 100644
--- a/src/core/src/op/space_to_batch.cpp
+++ b/src/core/src/op/space_to_batch.cpp
@@ -58,7 +58,9 @@ void op::v1::SpaceToBatch::validate_and_infer_types() {
                           "pads_end must be an integral number but got (",
                           pads_end_type,
                           ").");
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, data_type, output_shape);
 }
 
diff --git a/src/core/src/op/space_to_depth.cpp b/src/core/src/op/space_to_depth.cpp
index 3faa4074f8ba27..cb10742d9df2df 100644
--- a/src/core/src/op/space_to_depth.cpp
+++ b/src/core/src/op/space_to_depth.cpp
@@ -46,7 +46,9 @@ std::shared_ptr<Node> ov::op::v0::SpaceToDepth::clone_with_new_inputs(const Outp
 void ngraph::op::v0::SpaceToDepth::validate_and_infer_types() {
     OV_OP_SCOPE(v0_SpaceToDepth_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shape);
 }
 
diff --git a/src/core/src/op/split.cpp b/src/core/src/op/split.cpp
index 9e437748d3f40f..e13ac15a098c95 100644
--- a/src/core/src/op/split.cpp
+++ b/src/core/src/op/split.cpp
@@ -46,7 +46,9 @@ void op::v1::Split::validate_and_infer_types() {
                           "Attribute 'num_splits' must be greater than zero. Got: ",
                           m_num_splits);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes;
     shape_infer(this, input_shapes, output_shapes);
 
@@ -65,7 +67,9 @@ shared_ptr<Node> op::v1::Split::clone_with_new_inputs(const OutputVector& new_ar
 
 bool op::v1::Split::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_Split_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     OPENVINO_ASSERT(validate_host_tensor_vector(outputs, m_num_splits) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     if (has_evaluate()) {
         const auto& data_tensor = inputs[0];
@@ -85,7 +89,9 @@ bool op::v1::Split::evaluate(const HostTensorVector& outputs, const HostTensorVe
         }
 
         auto axis = host_tensor_2_vector<int64_t>(axis_tensor)[0];
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis = normalize_axis(this, axis, data_tensor->get_partial_shape().rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         ngraph::runtime::reference::split(data_tensor->get_data_ptr<char>(),
                                           data_tensor->get_shape(),
@@ -118,5 +124,7 @@ bool op::v1::Split::evaluate_upper(ov::TensorVector& output_values) const {
 bool op::v1::Split::evaluate_label(TensorLabelVector& output_labels) const {
     OPENVINO_ASSERT(output_labels.size() == get_num_splits());
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return input(1).get_tensor().has_and_set_bound() && default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp
index 179966599f98f6..78e37b140f148d 100644
--- a/src/core/src/op/squeeze.cpp
+++ b/src/core/src/op/squeeze.cpp
@@ -31,7 +31,9 @@ op::Squeeze::Squeeze(const Output<Node>& data) : Op({data}) {
 void op::Squeeze::validate_and_infer_types() {
     OV_OP_SCOPE(v0_Squeeze_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
     shape_infer(this, input_shapes, output_shapes);
 
@@ -57,8 +59,10 @@ shared_ptr<Node> op::Squeeze::clone_with_new_inputs(const OutputVector& new_args
 
 bool op::v0::Squeeze::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Squeeze_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, inputs.size()));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     if (has_evaluate()) {
         auto output_shapes = std::vector<PartialShape>{outputs[0]->get_partial_shape()};
@@ -126,7 +130,9 @@ bool op::v0::Squeeze::evaluate_upper(ov::TensorVector& output_values) const {
 bool op::v0::Squeeze::evaluate_label(TensorLabelVector& output_labels) const {
     if (get_input_size() > 1 && !get_input_tensor(1).has_and_set_bound())
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 bool op::v0::Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) {
diff --git a/src/core/src/op/strided_slice.cpp b/src/core/src/op/strided_slice.cpp
index b5e26295d4e8e5..1ba48d885df4a3 100644
--- a/src/core/src/op/strided_slice.cpp
+++ b/src/core/src/op/strided_slice.cpp
@@ -137,7 +137,9 @@ void op::v1::StridedSlice::validate_and_infer_types() {
     set_input_is_relevant_to_shape(2);
     set_input_is_relevant_to_shape(3);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1, PartialShape::dynamic());
 
     shape_infer(this, input_shapes, output_shapes);
@@ -214,8 +216,10 @@ bool evaluate_strided_slice(const HostTensorPtr& in,
 bool op::v1::StridedSlice::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
     OV_OP_SCOPE(v1_StridedSlice_evaluate);
     // FIXME: 4th input is optional, but it is required by the following code
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(input_values, 4));
     NGRAPH_CHECK(validate_host_tensor_vector(output_values, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return strided_slice::evaluate_strided_slice(input_values[0],
                                                  input_values[1],
                                                  input_values[2],
@@ -253,5 +257,7 @@ bool op::v1::StridedSlice::evaluate_upper(ov::TensorVector& output_values) const
 bool op::v1::StridedSlice::evaluate_label(TensorLabelVector& output_labels) const {
     if (!strided_slice_input_check(this))
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/swish.cpp b/src/core/src/op/swish.cpp
index f21faeb2182a2e..d41fcb64368060 100644
--- a/src/core/src/op/swish.cpp
+++ b/src/core/src/op/swish.cpp
@@ -112,8 +112,10 @@ bool evaluate_swish(const HostTensorVector& inputs, const HostTensorPtr& out) {
 
 bool op::v4::Swish::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v4_Swish_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) &&
                  (validate_host_tensor_vector(inputs, 2) || validate_host_tensor_vector(inputs, 1)));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return swish::evaluate_swish(inputs, outputs[0]);
 }
 
diff --git a/src/core/src/op/tile.cpp b/src/core/src/op/tile.cpp
index e7c15a36d6fd85..203998b483fe53 100644
--- a/src/core/src/op/tile.cpp
+++ b/src/core/src/op/tile.cpp
@@ -35,7 +35,9 @@ void op::v0::Tile::validate_and_infer_types() {
                           "Tile repeats must have any integer element type, but has ",
                           repeats_et);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
 
     set_input_is_relevant_to_shape(0);
@@ -126,5 +128,7 @@ bool op::v0::Tile::evaluate_label(TensorLabelVector& output_labels) const {
     OV_OP_SCOPE(v0_Tile_evaluate_label);
     OPENVINO_ASSERT(output_labels.size() == 1);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return get_input_tensor(1).has_and_set_bound() && default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/topk.cpp b/src/core/src/op/topk.cpp
index 7a856a2c590cb2..9bc85add54b3ac 100644
--- a/src/core/src/op/topk.cpp
+++ b/src/core/src/op/topk.cpp
@@ -107,7 +107,9 @@ bool TopK_evaluate(const ov::op::util::TopKBase* const node,
                    const HostTensorVector& outputs,
                    const HostTensorVector& inputs) {
     const auto& arg_shape = inputs[0]->get_shape();
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto axis = normalize_axis(node, node->get_provided_axis(), arg_shape.size());
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto compute_max = node->get_mode() == ov::op::TopKMode::MAX;
     const auto sort_type = node->get_sort_type();
 
diff --git a/src/core/src/op/transpose.cpp b/src/core/src/op/transpose.cpp
index c0cdd5f0d47a9b..a161552b0de343 100644
--- a/src/core/src/op/transpose.cpp
+++ b/src/core/src/op/transpose.cpp
@@ -93,5 +93,7 @@ bool op::v1::Transpose::evaluate_upper(ov::TensorVector& output_values) const {
 }
 
 bool op::v1::Transpose::evaluate_label(TensorLabelVector& output_labels) const {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return get_input_tensor(ORDER).has_and_set_bound() && default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/unique.cpp b/src/core/src/op/unique.cpp
index 5d57b1d4b2893d..0ddff3c854c399 100644
--- a/src/core/src/op/unique.cpp
+++ b/src/core/src/op/unique.cpp
@@ -179,7 +179,9 @@ void op::v10::Unique::validate_and_infer_types() {
                 extract_axis(std::dynamic_pointer_cast<op::v0::Constant>(input_value(1).get_node_shared_ptr()));
 
             if (input_shape.rank().is_static()) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 const auto normalized_axis = ngraph::normalize_axis(this, axis, input_shape.rank());
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 const auto dim_at_axis = input_shape[normalized_axis];
 
                 Dimension output_dim_at_axis;
diff --git a/src/core/src/op/unsqueeze.cpp b/src/core/src/op/unsqueeze.cpp
index c2c00a17b32ac1..4bd98e4cc01870 100644
--- a/src/core/src/op/unsqueeze.cpp
+++ b/src/core/src/op/unsqueeze.cpp
@@ -23,7 +23,9 @@ op::v0::Unsqueeze::Unsqueeze(const Output<Node>& data, const Output<Node>& axes)
 void op::v0::Unsqueeze::validate_and_infer_types() {
     OV_OP_SCOPE(v0_Unsqueeze_validate_and_infer_types);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto output_shapes = std::vector<ov::PartialShape>(1);
 
     shape_infer(this, input_shapes, output_shapes);
@@ -70,7 +72,9 @@ bool evaluate_unsqueeze(const Node* node,
 
     // Get axes and normalize
     auto axes = read_index_vector(arg1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     normalize_axes(node, out_rank, axes);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     // Sort in increasing order
     std::set<int64_t> axes_set(axes.begin(), axes.end());
@@ -103,8 +107,10 @@ bool evaluate_unsqueeze(const Node* node,
 
 bool op::v0::Unsqueeze::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v0_Unsqueeze_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2));
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return unsqueeze::evaluate_unsqueeze(this, inputs[0], inputs[1], outputs[0]);
 }
 
@@ -137,7 +143,9 @@ bool op::v0::Unsqueeze::evaluate_upper(ov::TensorVector& output_values) const {
 bool op::v0::Unsqueeze::evaluate_label(TensorLabelVector& output_labels) const {
     if (!get_input_tensor(1).has_and_set_bound())
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 bool op::v0::Unsqueeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) {
diff --git a/src/core/src/op/util/broadcast_base.cpp b/src/core/src/op/util/broadcast_base.cpp
index b0a4394c0680a7..8463562d974d5a 100644
--- a/src/core/src/op/util/broadcast_base.cpp
+++ b/src/core/src/op/util/broadcast_base.cpp
@@ -194,7 +194,9 @@ void ov::op::util::BroadcastBase::validate_and_infer_types() {
     }
 
     PartialShape output_shape;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     bool output_shape_defined = ngraph::evaluate_as_partial_shape(get_input_source_output(1), output_shape);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     if (auto concat = ov::as_type_ptr<ngraph::op::v0::Concat>(input_value(1).get_node_shared_ptr())) {
         auto concat_inputs = concat->inputs();
@@ -235,7 +237,9 @@ void ov::op::util::BroadcastBase::validate_and_infer_types() {
                                   input_rank);
 
             if (output_shape_defined && has_and_set_equal_bounds(input_value(2))) {
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 auto axes_mapping_val = get_constant_from_source(input_value(2))->get_axis_vector_val();
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 validate_target_shape_none(arg_shape, axes_mapping_val, output_shape);
             }
         }
@@ -292,7 +296,9 @@ std::pair<bool, ov::AxisSet> ov::op::util::BroadcastBase::get_broadcast_axes() c
     bool axes_known = false;
 
     if (m_mode.m_type == BroadcastType::NONE) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto axes_mapping_constant = get_constant_from_source(input_value(2));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (get_input_partial_shape(1).is_static() && axes_mapping_constant) {
             auto axes_mapping_val = axes_mapping_constant->get_axis_vector_val();
             auto target_shape = get_input_shape(1);
@@ -443,9 +449,10 @@ ov::Shape ov::op::util::BroadcastBase::get_target_shape(const HostTensorPtr& inp
 
 bool ov::op::util::BroadcastBase::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(util_BroadcastBase_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(ngraph::validate_host_tensor_vector(inputs, 2) || ngraph::validate_host_tensor_vector(inputs, 3));
     NGRAPH_CHECK(ngraph::validate_host_tensor_vector(outputs, 1));
-
+    OPENVINO_SUPPRESS_DEPRECATED_END
     Shape target_shape = get_target_shape(inputs[1]);
 
     PartialShape result_shape;
diff --git a/src/core/src/op/util/embeddingbag_offsets_base.cpp b/src/core/src/op/util/embeddingbag_offsets_base.cpp
index 2276df55b90374..6a58f2cb5b205b 100644
--- a/src/core/src/op/util/embeddingbag_offsets_base.cpp
+++ b/src/core/src/op/util/embeddingbag_offsets_base.cpp
@@ -80,7 +80,9 @@ void ov::op::util::EmbeddingBagOffsetsBase::validate_and_infer_types() {
     }
 
     const auto& result_et = get_input_element_type(EMB_TABLE);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, result_et, shape_infer(this, input_shapes)[0]);
 }
 
diff --git a/src/core/src/op/util/embeddingbag_packed_base.cpp b/src/core/src/op/util/embeddingbag_packed_base.cpp
index b3cdaab7e16d08..7fce2be82e4ceb 100644
--- a/src/core/src/op/util/embeddingbag_packed_base.cpp
+++ b/src/core/src/op/util/embeddingbag_packed_base.cpp
@@ -41,7 +41,9 @@ void ov::op::util::EmbeddingBagPackedBase::validate_and_infer_types() {
     }
 
     const auto& emb_et = get_input_element_type(EMB_TABLE);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     set_output_type(0, emb_et, shape_infer(this, input_shapes)[0]);
 }
 
diff --git a/src/core/src/op/util/evaluate_helpers.cpp b/src/core/src/op/util/evaluate_helpers.cpp
index 06481a1aa1d3aa..80e62644e05eff 100644
--- a/src/core/src/op/util/evaluate_helpers.cpp
+++ b/src/core/src/op/util/evaluate_helpers.cpp
@@ -9,7 +9,9 @@ AxisSet get_normalized_axes_from_tensor(const HostTensorPtr tensor,
                                         const ngraph::Rank& rank,
                                         const std::string& node_description) {
     const auto axes_vector = host_tensor_2_vector<int64_t>(tensor);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto normalized_axes = ngraph::normalize_axes(node_description, axes_vector, rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return AxisSet{normalized_axes};
 }
 }  // namespace ngraph
diff --git a/src/core/src/op/util/gather_base.cpp b/src/core/src/op/util/gather_base.cpp
index bb57a196407da4..f61bffd1e345e4 100644
--- a/src/core/src/op/util/gather_base.cpp
+++ b/src/core/src/op/util/gather_base.cpp
@@ -39,7 +39,9 @@ void ov::op::util::GatherBase::validate_and_infer_types() {
 }
 
 int64_t ov::op::util::GatherBase::get_axis() const {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto& const_op = get_constant_from_source(input_value(2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     OPENVINO_ASSERT(const_op, "axis value is not set");
 
     int64_t axis = const_op->cast_vector<int64_t>()[0];
@@ -192,9 +194,10 @@ bool cf_gather_with_subgraph(ov::OutputVector& output_values,
 
 bool ov::op::util::GatherBase::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(util_GatherBase_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(ngraph::validate_host_tensor_vector(inputs, 3));
     NGRAPH_CHECK(ngraph::validate_host_tensor_vector(outputs, 1));
-
+    OPENVINO_SUPPRESS_DEPRECATED_END
     int64_t axis = 0;
     switch (inputs[2]->get_element_type()) {
     case element::Type_t::i32:
@@ -255,7 +258,9 @@ bool ov::op::util::GatherBase::evaluate_upper(ov::TensorVector& output_values) c
 bool ov::op::util::GatherBase::evaluate_label(TensorLabelVector& output_labels) const {
     if (!get_input_tensor(1).has_and_set_bound() || !get_input_tensor(2).has_and_set_bound())
         return false;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 bool ov::op::util::GatherBase::constant_fold(OutputVector& output_values, const OutputVector& input_values) {
diff --git a/src/core/src/op/util/reduction_base.cpp b/src/core/src/op/util/reduction_base.cpp
index fde2f105dd5584..72e3a4c2e8a36c 100644
--- a/src/core/src/op/util/reduction_base.cpp
+++ b/src/core/src/op/util/reduction_base.cpp
@@ -26,10 +26,14 @@ bool ov::op::util::ReductionBase::reduction_axes_constant() const {
 
 const ov::AxisSet ov::op::util::ReductionBase::get_reduction_axes() const {
     AxisSet axes;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& const_op = get_constant_from_source(input_value(1))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         const auto const_data = const_op->cast_vector<int64_t>();
         const auto input_data_rank = get_input_partial_shape(0).rank();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto normalized_axes = ov::normalize_axes(get_friendly_name(), const_data, input_data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         axes = AxisSet{normalized_axes};
     }
     return axes;
diff --git a/src/core/src/op/util/scatter_base.cpp b/src/core/src/op/util/scatter_base.cpp
index 9f833eb8014b35..643ce1a99d54cc 100644
--- a/src/core/src/op/util/scatter_base.cpp
+++ b/src/core/src/op/util/scatter_base.cpp
@@ -74,11 +74,15 @@ void ov::op::util::ScatterBase::validate_and_infer_types() {
         return;
 
     // Get axis value if possible.
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (const auto& axis_const_input = get_constant_from_source(input_value(AXIS))) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         bool compatible = true;
         int64_t axis = axis_const_input->cast_vector<int64_t>().at(0);
         int64_t data_rank = data_shape.rank().get_length();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis = ngraph::normalize_axis(this, axis, data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         if (indices_shape.rank().is_static() && updates_shape.rank().is_static()) {
             int64_t indices_rank = indices_shape.rank().get_length();
diff --git a/src/core/src/op/util/topk_base.cpp b/src/core/src/op/util/topk_base.cpp
index 3bc07aa0b7ba5b..7461ef977e8d19 100644
--- a/src/core/src/op/util/topk_base.cpp
+++ b/src/core/src/op/util/topk_base.cpp
@@ -45,7 +45,9 @@ void ov::op::util::TopKBase::validate_and_infer_types() {
 
     set_axis(get_input_partial_shape(0).rank(), get_provided_axis());
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto output_shapes = op::util::shape_infer(this, get_node_input_partial_shapes(*this));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     set_output_type(0, get_input_element_type(0), output_shapes[0]);
     set_output_type(1, m_index_element_type, output_shapes[1]);
@@ -149,7 +151,9 @@ void ov::op::util::TopKBase::set_axis(const int64_t axis) {
 }
 
 void ov::op::util::TopKBase::set_axis(const Rank& input_rank, const int64_t axis) {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     m_normalized_axis = input_rank.is_static() ? normalize_axis(this, axis, input_rank) : UNKNOWN_NORMALIZED_AXIS;
+    OPENVINO_SUPPRESS_DEPRECATED_END
     m_axis = axis;
 }
 
diff --git a/src/core/src/op/variadic_split.cpp b/src/core/src/op/variadic_split.cpp
index 4c1f7ad1427601..91c9069e6ec5f8 100644
--- a/src/core/src/op/variadic_split.cpp
+++ b/src/core/src/op/variadic_split.cpp
@@ -34,7 +34,9 @@ void ngraph::op::v1::VariadicSplit::validate_and_infer_types() {
         set_input_is_relevant_to_value(i);
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto input_shapes = get_node_input_partial_shapes(*this);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     std::vector<ov::PartialShape> output_shapes;
     shape_infer(this, input_shapes, output_shapes);
 
@@ -85,7 +87,9 @@ bool op::v1::VariadicSplit::evaluate_variadic_split(const HostTensorVector& inpu
                  "split_lengths element type is not integral data type");
 
     int64_t axis = host_tensor_2_vector<int64_t>(axis_tensor)[0];
+    OPENVINO_SUPPRESS_DEPRECATED_START
     axis = ngraph::normalize_axis(this, axis, data_tensor->get_partial_shape().rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     std::vector<ov::PartialShape> input_shapes = {data_tensor->get_partial_shape(),
                                                   axis_tensor->get_partial_shape(),
@@ -141,5 +145,7 @@ bool op::v1::VariadicSplit::evaluate_upper(ov::TensorVector& output_values) cons
 }
 
 bool op::v1::VariadicSplit::evaluate_label(TensorLabelVector& output_labels) const {
+    OPENVINO_SUPPRESS_DEPRECATED_START
     return has_axis_and_splits_bound_set() && default_label_evaluator(this, output_labels);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/src/op/xor.cpp b/src/core/src/op/xor.cpp
index 39c0926b98f1a6..d43277cec3b936 100644
--- a/src/core/src/op/xor.cpp
+++ b/src/core/src/op/xor.cpp
@@ -60,7 +60,9 @@ bool evaluate_logxor(const HostTensorPtr& arg0,
 
 bool op::v1::LogicalXor::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
     OV_OP_SCOPE(v1_LogicalXor_evaluate);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
+    OPENVINO_SUPPRESS_DEPRECATED_END
     return logxor::evaluate_logxor(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp
index 5cd17a83fce3f1..4961d7cdb39f7c 100644
--- a/src/core/tests/eval.cpp
+++ b/src/core/tests/eval.cpp
@@ -101,14 +101,18 @@ TEST(eval, bad_get_data_ptr) {
 TEST(eval, max_eval_parameter) {
     auto p = make_shared<op::Parameter>(element::i64, Shape{});
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto result = maximum_value(p);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     EXPECT_FALSE(result.first);
     EXPECT_EQ(result.second, numeric_limits<uint64_t>::max());
 }
 
 TEST(eval, max_eval_constant) {
     auto c = op::Constant::create<int64_t>(element::i64, Shape{}, {27});
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto result = maximum_value(c);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     ASSERT_TRUE(result.first);
     EXPECT_EQ(result.second, 27);
 }
@@ -117,7 +121,9 @@ TEST(eval, max_eval_minimum_constant) {
     auto c = op::Constant::create<int64_t>(element::i64, Shape{}, {27});
     auto p = make_shared<op::Parameter>(element::i64, Shape{});
     auto m = make_shared<op::v1::Minimum>(c, p);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto result = maximum_value(m);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     ASSERT_TRUE(result.first);
     EXPECT_EQ(result.second, 27);
 }
@@ -134,7 +140,9 @@ TEST(eval, max_eval_reduce_min) {
     auto squeezes = make_shared<op::v0::Squeeze>(
         make_shared<op::v0::Unsqueeze>(reduce, make_shared<op::v0::Constant>(element::i32, Shape{1}, 0)),
         make_shared<op::v0::Constant>(element::i64, Shape{1}, 0));
+    OPENVINO_SUPPRESS_DEPRECATED_START
     EXPECT_EQ(maximum_value(squeezes).second, 37);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(eval, evaluate_shape_of) {
diff --git a/src/core/tests/partial_shape.cpp b/src/core/tests/partial_shape.cpp
index 219069f6d44feb..3b6ae3f829a1ea 100644
--- a/src/core/tests/partial_shape.cpp
+++ b/src/core/tests/partial_shape.cpp
@@ -808,7 +808,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_ok) {
     Strides window_strides{1, 1, 1, 1};
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
-
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -818,6 +818,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_ok) {
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(
         PartialShape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}));
@@ -833,6 +834,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_data
     Strides window_strides{1, 1, 1, 1};
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -846,6 +848,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_data
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_window_dilation) {
@@ -858,6 +861,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_wind
     Strides window_strides{1, 1, 1, 1};
     Strides window_dilation{1, 0, 1, 1};
     bool is_window_all_in_padding_allowed = true;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -871,6 +875,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_wind
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_window_strides) {
@@ -883,6 +888,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_wind
     Strides window_strides{1, 1, 1, 0};
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -896,6 +902,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_wind
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ok) {
@@ -909,6 +916,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ok
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -918,6 +926,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ok
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4)));
 }
@@ -932,6 +941,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ze
     Strides window_strides{1, 1, 1, 1};
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -945,6 +955,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ze
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_neg_padding_ok) {
@@ -957,6 +968,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ne
     Strides window_strides{1, 1, 1, 1};
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -966,6 +978,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ne
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4)));
 }
@@ -981,6 +994,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_ok
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -990,6 +1004,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_ok
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4)));
 }
@@ -1005,6 +1020,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -1018,6 +1034,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_dilated_dim_zero) {
@@ -1031,6 +1048,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
     Strides window_dilation{1, 1, 3, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -1044,6 +1062,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_all_in_padding_ok) {
@@ -1057,6 +1076,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1066,6 +1086,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4)));
 }
@@ -1081,6 +1102,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = false;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -1094,6 +1116,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_wi
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_dilated_window_not_all_in_padding) {
@@ -1107,6 +1130,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_di
     Strides window_dilation{1, 1, 2, 1};
     bool is_window_all_in_padding_allowed = false;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1116,6 +1140,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_di
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4)));
 }
@@ -1131,6 +1156,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1140,6 +1166,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(
         result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 4, Dimension::dynamic()}));
@@ -1156,6 +1183,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1165,6 +1193,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(
         result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 5, Dimension::dynamic()}));
@@ -1181,6 +1210,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1190,6 +1220,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(
         result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 3, Dimension::dynamic()}));
@@ -1206,6 +1237,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -1219,6 +1251,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
 TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_window_not_too_big_padding) {
@@ -1232,6 +1265,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 1, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
                                                                       data_shape,
                                                                       data_dilation,
@@ -1241,6 +1275,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                       window_strides,
                                                                       window_dilation,
                                                                       is_window_all_in_padding_allowed);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(
         result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 2, Dimension::dynamic()}));
@@ -1257,6 +1292,7 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
     Strides window_dilation{1, 1, 2, 1};
     bool is_window_all_in_padding_allowed = true;
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_THROW(
         {
             PartialShape result_shape = infer_windowed_reduction_output_shape(node.get(),
@@ -1270,4 +1306,5 @@ TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dyn
                                                                               is_window_all_in_padding_allowed);
         },
         NodeValidationFailure);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 }
diff --git a/src/core/tests/type_prop/variadic_split.cpp b/src/core/tests/type_prop/variadic_split.cpp
index 41aa784b90334f..175c04f217c8e5 100644
--- a/src/core/tests/type_prop/variadic_split.cpp
+++ b/src/core/tests/type_prop/variadic_split.cpp
@@ -39,7 +39,9 @@ class VariadicSplitTest : public TestWithParam<VSplitTypePropTestParam> {
         std::generate_n(std::back_inserter(in_labels), p_shape.size(), ov::SeqGen<ov::label_t>(10));
 
         auto exp_labels = in_labels;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto n_axis = normalize_axis("", axis, p_shape.rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         exp_labels[n_axis] = ov::no_label;
 
         return {in_labels, exp_labels};
diff --git a/src/core/tests/validation_utils.cpp b/src/core/tests/validation_utils.cpp
index 1168911bcfaaed..694633ae03b152 100644
--- a/src/core/tests/validation_utils.cpp
+++ b/src/core/tests/validation_utils.cpp
@@ -18,7 +18,9 @@ TEST(get_constant_from_source, invalidation_check) {
     auto r = std::make_shared<ov::opset8::Reshape>(div, s, true);
     auto tmp_consumer = std::make_shared<ov::opset8::ShapeOf>(s);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_TRUE(ov::get_constant_from_source(r));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(r->get_output_tensor(0).get_lower_value());
     ASSERT_TRUE(r->get_output_tensor(0).get_upper_value());
@@ -43,7 +45,9 @@ TEST(get_constant_from_source, extract_static_dim_from_dynamic_shape_check) {
     auto zero = ov::opset8::Constant::create(ov::element::i64, {1}, {0});
     const auto extract_static_dimension = std::make_shared<ov::opset8::Gather>(shape, one, zero);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ASSERT_TRUE(ov::get_constant_from_source(extract_static_dimension));
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     ASSERT_TRUE(extract_static_dimension->get_output_tensor(0).get_lower_value());
     ASSERT_TRUE(extract_static_dimension->get_output_tensor(0).get_upper_value());
diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp
index 299dcdfb5086b0..cf919a5a5e315c 100644
--- a/src/frontends/ir/src/input_model.cpp
+++ b/src/frontends/ir/src/input_model.cpp
@@ -174,7 +174,9 @@ void parse_pre_process(pugi::xml_node& root,
             const char* data = weights->get_ptr<char>() + offset;
             per_channel_values[item.first] = ngraph::opset1::Constant::create(input_type, mean_shape, data);
         }
+        OPENVINO_SUPPRESS_DEPRECATED_START
         auto const_node = get_constant_from_source(std::make_shared<ngraph::opset1::Concat>(per_channel_values, 0));
+        OPENVINO_SUPPRESS_DEPRECATED_END
         IE_ASSERT(const_node);
         const auto& consumers = input_node->output(0).get_target_inputs();
         auto add = std::make_shared<ngraph::opset1::Subtract>(input_node, const_node);
diff --git a/src/frontends/onnx/frontend/src/op/clip.cpp b/src/frontends/onnx/frontend/src/op/clip.cpp
index 77c6eceef3c431..29a207904a5798 100644
--- a/src/frontends/onnx/frontend/src/op/clip.cpp
+++ b/src/frontends/onnx/frontend/src/op/clip.cpp
@@ -41,7 +41,9 @@ OutputVector clip(const Node& node) {
     if (inputs.size() > 1 && !ngraph::op::is_null(inputs.at(1))) {
         min = inputs.at(1);
     } else {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         min = ngraph::get_constant_lowest_of_type(data_type);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     // If third input is provided, assign to max input, otherwise set maximum
@@ -49,7 +51,9 @@ OutputVector clip(const Node& node) {
     if (inputs.size() == 3 && !ngraph::op::is_null(inputs.at(2))) {
         max = inputs.at(2);
     } else {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         max = ngraph::get_constant_max_of_type(data_type);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     const auto max_of_min_and_data = std::make_shared<default_opset::Maximum>(min, data);
diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp
index e31809218b6aaf..66337618e6193c 100644
--- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp
+++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp
@@ -143,7 +143,9 @@ OutputVector dequantize_linear(const Output<ngraph::Node>& x,
 
     NGRAPH_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static).");
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     axis = ngraph::normalize_axis(node.get_description(), axis, x_shape.rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     validate_scale(scale, x, axis);
     const auto scale_reshaped = reshape_input(scale, axis, x_shape);
diff --git a/src/frontends/onnx/frontend/src/op/flatten.cpp b/src/frontends/onnx/frontend/src/op/flatten.cpp
index 52ded54ae3a1ba..4222fa03eed716 100644
--- a/src/frontends/onnx/frontend/src/op/flatten.cpp
+++ b/src/frontends/onnx/frontend/src/op/flatten.cpp
@@ -23,7 +23,9 @@ OutputVector flatten(const Node& node) {
     if (data_rank.is_static()) {
         const std::int64_t data_rank_value = data_rank.get_length();
         // Accepted range is [-r, r] where r = rank(input).
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis = ngraph::normalize_axis(node.get_description(), axis, data_rank_value, -data_rank_value, data_rank_value);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
     return {ngraph::builder::opset1::flatten(data, static_cast<int>(axis))};
 }
diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp
index eb9b421cd71ed6..0021cdc53f67d7 100644
--- a/src/frontends/onnx/frontend/src/op/hardmax.cpp
+++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp
@@ -22,7 +22,9 @@ OutputVector hardmax(const Node& node) {
 
     auto axis = node.get_attribute_value<std::int64_t>("axis", 1);
     if (input_shape.rank().is_static()) {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         axis = ngraph::normalize_axis(node.get_description(), axis, input_shape.rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 
     // reshape to 2D - "batch size" x "input feature dimensions" (NxD)
@@ -61,7 +63,9 @@ OutputVector hardmax(const Node& node) {
     const auto& input_shape = input.get_partial_shape();
 
     auto axis = node.get_attribute_value<std::int64_t>("axis", -1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     axis = ngraph::normalize_axis(node.get_description(), axis, input_shape.rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto input_runtime_shape = std::make_shared<default_opset::ShapeOf>(input);
     Output<ngraph::Node> row_size =
diff --git a/src/frontends/onnx/frontend/src/op/log_softmax.cpp b/src/frontends/onnx/frontend/src/op/log_softmax.cpp
index c3b4ce405fb953..df442c8358bb02 100644
--- a/src/frontends/onnx/frontend/src/op/log_softmax.cpp
+++ b/src/frontends/onnx/frontend/src/op/log_softmax.cpp
@@ -37,12 +37,16 @@ OutputVector log_softmax(const Node& node, const int64_t DEFAULT_AXIS) {
     }
     case 1: {
         // checks if the axis belongs to the allowed values set (-1 and 0 for 1D)
+        OPENVINO_SUPPRESS_DEPRECATED_START
         ngraph::normalize_axis(node.get_description(), axis, data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         result = std::make_shared<default_opset::LogSoftmax>(data, 0);
         break;
     }
     default: {
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto normalized_axis = ngraph::normalize_axis(node.get_description(), axis, data_rank);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         result = onnx_logsoftmax(data, normalized_axis);
         break;
diff --git a/src/frontends/onnx/frontend/src/op/lp_norm.cpp b/src/frontends/onnx/frontend/src/op/lp_norm.cpp
index 9a1214f06f687b..8a92186548ec96 100644
--- a/src/frontends/onnx/frontend/src/op/lp_norm.cpp
+++ b/src/frontends/onnx/frontend/src/op/lp_norm.cpp
@@ -29,7 +29,9 @@ OutputVector lp_norm(const Node& node) {
     const std::int64_t p_norm{node.get_attribute_value<std::int64_t>("p", 2)};
 
     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const size_t normalize_axis = ngraph::normalize_axis(node.get_description(), axis, data_rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     CHECK_VALID_NODE(node,
                      p_norm == 1 || p_norm == 2,
diff --git a/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp b/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp
index 304de07faa2a13..b8c3d04c5d3e83 100644
--- a/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp
+++ b/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp
@@ -31,8 +31,10 @@ namespace set_9 {
 OutputVector mean_variance_normalization(const Node& node) {
     auto data = node.get_ng_inputs().at(0);
     auto axes = node.get_attribute_value<std::vector<std::int64_t>>("axes", {0, 2, 3});
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const std::vector<std::size_t> normalized_axes =
         ngraph::normalize_axes(node.get_description(), axes, data.get_partial_shape().rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto const_axes = default_opset::Constant::create(element::i64, Shape{normalized_axes.size()}, normalized_axes);
     return {
         std::make_shared<ngraph::op::v6::MVN>(data, const_axes, true, 1e-09f, ngraph::op::MVNEpsMode::OUTSIDE_SQRT)};
diff --git a/src/frontends/onnx/frontend/src/op/quantize_linear.cpp b/src/frontends/onnx/frontend/src/op/quantize_linear.cpp
index 4f69691236a2ab..7bac76b0359359 100644
--- a/src/frontends/onnx/frontend/src/op/quantize_linear.cpp
+++ b/src/frontends/onnx/frontend/src/op/quantize_linear.cpp
@@ -88,13 +88,19 @@ std::tuple<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>> get_inp
     input_low =
         std::make_shared<default_opset::Multiply>(y_scale,
                                                   std::make_shared<default_opset::Subtract>(output_low, zero_point));
-    if (auto constant = ov::get_constant_from_source(input_low))
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    if (auto constant = ov::get_constant_from_source(input_low)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         input_low = constant;
+    }
     input_high =
         std::make_shared<default_opset::Multiply>(y_scale,
                                                   std::make_shared<default_opset::Subtract>(output_high, zero_point));
-    if (auto constant = ov::get_constant_from_source(input_high))
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    if (auto constant = ov::get_constant_from_source(input_high)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         input_high = constant;
+    }
 
     return std::make_tuple(input_low, input_high);
 }
@@ -152,7 +158,9 @@ OutputVector quantize_linear(Output<ngraph::Node> x,
 
     const auto& x_shape = x.get_partial_shape();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     axis = normalize_axis(node.get_description(), axis, x_shape.rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const auto& y_scale_shape = y_scale.get_partial_shape();
     const auto& y_zero_point_shape = y_zero_point.get_partial_shape();
diff --git a/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp b/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp
index ae9fcf09004546..d5b0de1a14b301 100644
--- a/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp
+++ b/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp
@@ -26,9 +26,13 @@ OutputVector reverse_sequence(const Node& node) {
     const auto data_rank = data.get_partial_shape().rank();
 
     const auto batch_axis = node.get_attribute_value<int64_t>("batch_axis", 1);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto normalized_batch_axis = ngraph::normalize_axis(node.get_description(), batch_axis, data_rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     const auto time_axis = node.get_attribute_value<int64_t>("time_axis", 0);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     const auto normalized_time_axis = ngraph::normalize_axis(node.get_description(), time_axis, data_rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     NGRAPH_CHECK(normalized_batch_axis == 0 || normalized_batch_axis == 1,
                  "Allowed values of the 'batch_axis' attribute for ReverseSequence "
diff --git a/src/frontends/onnx/frontend/src/op/scan.cpp b/src/frontends/onnx/frontend/src/op/scan.cpp
index d986cbb3c4475a..7df3661c1e5064 100644
--- a/src/frontends/onnx/frontend/src/op/scan.cpp
+++ b/src/frontends/onnx/frontend/src/op/scan.cpp
@@ -51,9 +51,11 @@ OutputVector scan_to_tensor_iterator(const OutputVector& node_inputs,
         const auto axis_node = default_opset::Constant::create(element::i64, Shape{1}, {axis});
         auto shape = node_inputs[in_idx + in_offset].get_partial_shape();
         if (shape.rank().is_static()) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axis = ov::normalize_axis(node_description,
                                       scan_input_axes[i],
                                       node_inputs[in_idx + in_offset].get_partial_shape().rank());
+            OPENVINO_SUPPRESS_DEPRECATED_END
             shape[axis] = 1;
         }
         body_inputs[in_idx]->set_partial_shape(shape);
@@ -81,9 +83,11 @@ OutputVector scan_to_tensor_iterator(const OutputVector& node_inputs,
     // Set slicing for Scan (TensorIterator) inputs
     for (int64_t i = 0; i < num_scan_inputs; ++i) {
         const auto in_idx = num_initial_values + i;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto axis = ov::normalize_axis(node_description,
                                              scan_input_axes[i],
                                              node_inputs[in_idx + in_offset].get_partial_shape().rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (scan_input_directions[i]) {  // reverse direction
             tensor_iterator->set_sliced_input(body_inputs[in_idx], node_inputs[in_idx + in_offset], -1, -1, 1, 0, axis);
         } else {  // forward direction
@@ -100,8 +104,10 @@ OutputVector scan_to_tensor_iterator(const OutputVector& node_inputs,
     }
     for (size_t i = 0; i < num_scan_outputs; ++i) {
         const auto out_idx = num_initial_values + i;
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto axis =
             ov::normalize_axis(node_description, scan_output_axes[i], body_outputs[out_idx].get_partial_shape().rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
         if (scan_output_directions[i]) {  // reverse direction
             outputs.push_back(tensor_iterator->get_concatenated_slices(body_outputs[out_idx], -1, -1, 1, 0, axis));
         } else {  // forward direction
diff --git a/src/frontends/onnx/frontend/src/op/squeeze.cpp b/src/frontends/onnx/frontend/src/op/squeeze.cpp
index 830c66de848a44..4444f7bf3c4447 100644
--- a/src/frontends/onnx/frontend/src/op/squeeze.cpp
+++ b/src/frontends/onnx/frontend/src/op/squeeze.cpp
@@ -19,7 +19,9 @@ OutputVector squeeze(const Node& node) {
     std::vector<std::int64_t> axes = node.get_attribute_value<std::vector<std::int64_t>>("axes", {});
     const auto data_rank = data.get_partial_shape().rank();
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     std::vector<std::size_t> normalized_axes = ngraph::normalize_axes(node.get_description(), axes, data_rank);
+    OPENVINO_SUPPRESS_DEPRECATED_END
     auto axes_node =
         std::make_shared<default_opset::Constant>(element::u64, Shape{normalized_axes.size()}, normalized_axes);
 
diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
index d5a3fdb827061f..ed87102ac20b68 100644
--- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
+++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp
@@ -53,8 +53,10 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset
         // res_index = dims_on_axis - topk->output(1) = 6 - 3 = 3
         // result = res_index - 1 = 3 - 1 = 2
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const int64_t normalized_axis =
             normalize_axis(m_input_node.get_node(), m_axis, m_input_node.get_partial_shape().rank());
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis});
         const auto reverse = std::make_shared<opset1::Reverse>(m_input_node, axis_node, opset1::Reverse::Mode::INDEX);
diff --git a/src/frontends/onnx/frontend/src/utils/convpool.cpp b/src/frontends/onnx/frontend/src/utils/convpool.cpp
index 92f387f102ee9c..5aecbe8a73ce20 100644
--- a/src/frontends/onnx/frontend/src/utils/convpool.cpp
+++ b/src/frontends/onnx/frontend/src/utils/convpool.cpp
@@ -137,6 +137,7 @@ void calculate_auto_pads(const Shape& data_shape,
         padding_above.clear();
         // Extract kernel shape - remove (N,C) channels
         Shape kernel_shape(std::next(std::begin(filter_shape), 2), std::end(filter_shape));
+        OPENVINO_SUPPRESS_DEPRECATED_START
         ngraph::infer_auto_padding(data_shape,
                                    kernel_shape,
                                    strides,
@@ -144,6 +145,7 @@ void calculate_auto_pads(const Shape& data_shape,
                                    pad_type,
                                    padding_above,
                                    padding_below);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
 }
 
diff --git a/src/frontends/tensorflow/src/op/queue_dequeue.cpp b/src/frontends/tensorflow/src/op/queue_dequeue.cpp
index 16a29006604dc3..c4fb43b0cfa5f1 100644
--- a/src/frontends/tensorflow/src/op/queue_dequeue.cpp
+++ b/src/frontends/tensorflow/src/op/queue_dequeue.cpp
@@ -84,7 +84,9 @@ OutputVector translate_queue_dequeue_many_op(const ov::frontend::tensorflow::Nod
     // compute batch dimension for outputs
     // this is a number of batch objects emitted from QueueDequeue
     Dimension batch_dim = Dimension::dynamic();
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto n_const = get_constant_from_source(n)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto n_value = n_const->cast_vector<int64_t>();
         if (n_value.size() > 0 && n_value[0] > 0) {
             batch_dim = n_value[0];
diff --git a/src/frontends/tensorflow_common/include/helper_ops/sparse_segment_ops.hpp b/src/frontends/tensorflow_common/include/helper_ops/sparse_segment_ops.hpp
index 0e867c2fd0e112..b4d38f54bd4766 100644
--- a/src/frontends/tensorflow_common/include/helper_ops/sparse_segment_ops.hpp
+++ b/src/frontends/tensorflow_common/include/helper_ops/sparse_segment_ops.hpp
@@ -52,7 +52,9 @@ class SparseSegmentSum : public ov::frontend::tensorflow::InternalOperation {
         // num_segments input is optional so it is not always possible to deduce the first dimension of the output shape
         if (get_input_size() > 3) {
             ov::PartialShape num_segments_value;
+            OPENVINO_SUPPRESS_DEPRECATED_START
             if (output_rank.is_static() && ov::evaluate_as_partial_shape(input_value(3), num_segments_value)) {
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 FRONT_END_OP_CONVERSION_CHECK(output_rank.get_length() >= 1,
                                               "Data input of SparseSegmentSum must be of rank >= 1.");
                 output_shape[0] = num_segments_value[0];
diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp
index d8e32948879d95..bdca58b713e776 100644
--- a/src/frontends/tensorflow_common/include/utils.hpp
+++ b/src/frontends/tensorflow_common/include/utils.hpp
@@ -67,7 +67,9 @@ void get_const_input(const NodeContext& node, int input_index, std::vector<T>* v
                                 std::to_string(input_size) + " inputs, but requested input port index to be " +
                                 std::to_string(input_size));
     auto ov_input = node.get_input(input_index);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     if (auto constant = get_constant_from_source(ov_input)) {
+        OPENVINO_SUPPRESS_DEPRECATED_END
         *vector = constant->cast_vector<T>();
         return;
     }
diff --git a/src/frontends/tensorflow_common/src/op/max_pool.cpp b/src/frontends/tensorflow_common/src/op/max_pool.cpp
index e0acc482075b6b..b2a4520249eb78 100644
--- a/src/frontends/tensorflow_common/src/op/max_pool.cpp
+++ b/src/frontends/tensorflow_common/src/op/max_pool.cpp
@@ -91,10 +91,12 @@ OutputVector translate_max_pool_v2(const NodeContext& node) {
     auto ksize = node.get_input(1);
     auto strides = node.get_input(2);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     auto ksize_constant = get_constant_from_source(ksize);
     TENSORFLOW_OP_VALIDATION(node, ksize_constant, "MaxPoolV2 is supported only with constant ksize.");
     auto strides_constant = get_constant_from_source(strides);
     TENSORFLOW_OP_VALIDATION(node, ksize_constant, "MaxPoolV2 is supported only with constant strides.");
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto ksize_vector = ksize_constant->cast_vector<int64_t>();
     auto strides_vector = strides_constant->cast_vector<int64_t>();
diff --git a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
index ccd203e84d8a8f..e1aff2366c8fc4 100644
--- a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
+++ b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
@@ -78,10 +78,13 @@ void fuse_zp_to_weights(ov::Output<ov::Node>& output, std::vector<int64_t>& zero
 
     auto check_in_bounds = [&](ov::Output<ov::Node>& value) -> bool {
         shared_ptr<ov::opset10::Constant> constant;
-        if (rank == 0)
+        if (rank == 0) {
             constant = ov::as_type_ptr<ov::opset10::Constant>(output.get_node_shared_ptr());
-        else
+        } else {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             constant = ov::get_constant_from_source(value);
+            OPENVINO_SUPPRESS_DEPRECATED_END
+        }
         if (!constant)
             return false;
         auto weight = constant->cast_vector<int64_t>()[0];
@@ -96,7 +99,9 @@ void fuse_zp_to_weights(ov::Output<ov::Node>& output, std::vector<int64_t>& zero
     auto zp_node = ov::opset10::Constant::create(ov::element::i32, zp_shape, zero_point);
     output = std::make_shared<ov::opset10::Subtract>(output, zp_node);
     output = std::make_shared<ov::opset10::Convert>(output, ov::element::i8);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     output = ov::get_constant_from_source(output);  // TODO: Check Me
+    OPENVINO_SUPPRESS_DEPRECATED_END
     zero_point = {0};
 }
 
@@ -176,10 +181,12 @@ pass::TFLQuantizeReplacer::TFLQuantizeReplacer() {
             output_low = ov::opset10::Constant::create(element::f32, {}, {low});
             output_high = ov::opset10::Constant::create(element::f32, {}, {high});
         }
+        OPENVINO_SUPPRESS_DEPRECATED_START
         input_low = get_constant_from_source(input_low);
         input_high = get_constant_from_source(input_high);
         output_low = get_constant_from_source(output_low);
         output_high = get_constant_from_source(output_high);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         output =
             std::make_shared<opset10::FakeQuantize>(output, input_low, input_high, output_low, output_high, levels);
         if (out_type != element::f32) {
diff --git a/src/inference/dev_api/performance_heuristics.hpp b/src/inference/dev_api/performance_heuristics.hpp
index 563d7627393701..41a3da4489b189 100644
--- a/src/inference/dev_api/performance_heuristics.hpp
+++ b/src/inference/dev_api/performance_heuristics.hpp
@@ -67,7 +67,9 @@ static MemBandwidthPressure MemBandwidthPressureTolerance(
                 output.get_partial_shape().is_static()) {
                 const auto& shapeInput0 = input0.get_shape();
                 const auto& shapeInput1 = input1.get_shape();
+                OPENVINO_SUPPRESS_DEPRECATED_START
                 const auto non_const = !get_constant_from_source(node->input_value(1));
+                OPENVINO_SUPPRESS_DEPRECATED_END
                 const auto& shapeOutput = output.get_shape();
                 const auto dataSizeInput0 =
                     std::accumulate(shapeInput0.begin(), shapeInput0.end(), size_t(1), std::multiplies<size_t>());
diff --git a/src/plugins/intel_gna/legacy/src/ngraph_ops/convolution_ie.cpp b/src/plugins/intel_gna/legacy/src/ngraph_ops/convolution_ie.cpp
index 182aeb814ad63e..c5b7091add166d 100644
--- a/src/plugins/intel_gna/legacy/src/ngraph_ops/convolution_ie.cpp
+++ b/src/plugins/intel_gna/legacy/src/ngraph_ops/convolution_ie.cpp
@@ -120,6 +120,7 @@ void op::ConvolutionIE::validate_and_infer_types() {
             m_pads_end.clear();
             auto filter_shape = filters_shape.to_shape();
             filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2);  // Remove {O,I}
+            OPENVINO_SUPPRESS_DEPRECATED_START
             infer_auto_padding(data_batch_shape.to_shape(),
                                filter_shape,
                                m_strides,
@@ -127,9 +128,11 @@ void op::ConvolutionIE::validate_and_infer_types() {
                                m_auto_pad,
                                m_pads_end,
                                m_pads_begin);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
     }
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     result_shape = infer_convolution_forward(this,
                                              data_batch_shape,
                                              Strides(m_strides.size(), 1),  // dummy data dilations
@@ -138,6 +141,7 @@ void op::ConvolutionIE::validate_and_infer_types() {
                                              filters_shape,
                                              m_strides,
                                              m_dilations);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     set_output_type(0, m_output_type, result_shape);
 }
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index f4afd4ad6d909b..003f5ba7708887 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -2,9 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "behavior/ov_plugin/core_integration.hpp"
+
 #include <gna/gna_config.hpp>
 
-#include "behavior/ov_plugin/core_integration.hpp"
 #include "behavior/ov_plugin/properties_tests.hpp"
 #include "openvino/runtime/intel_gna/properties.hpp"
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp
index 40b71ceb58bb93..d8bf96d8efb050 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp
@@ -98,6 +98,7 @@ struct pooling_impl : typed_primitive_impl_ocl<pooling> {
         if (auto_pad == ov::op::PadType::SAME_UPPER || auto_pad == ov::op::PadType::SAME_LOWER) {
             pads_begin.clear();
             pads_end.clear();
+            OPENVINO_SUPPRESS_DEPRECATED_START
             ngraph::try_apply_auto_padding(input_layout.get_partial_shape(),
                                            kernel,
                                            stride,
@@ -105,6 +106,7 @@ struct pooling_impl : typed_primitive_impl_ocl<pooling> {
                                            auto_pad,
                                            pads_end,
                                            pads_begin);
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
         if (auto_pad == ov::op::PadType::VALID) {
             pads_begin = ov::CoordinateDiff(pads_begin.size(), 0);
diff --git a/src/plugins/intel_gpu/src/graph/pooling.cpp b/src/plugins/intel_gpu/src/graph/pooling.cpp
index 908f1b44384d6c..cd00c2c32bf0c7 100644
--- a/src/plugins/intel_gpu/src/graph/pooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/pooling.cpp
@@ -224,6 +224,7 @@ std::vector<layout> pooling_inst::calc_output_layouts(pooling_node const& /*node
     if (auto_pad == ov::op::PadType::SAME_UPPER || auto_pad == ov::op::PadType::SAME_LOWER) {
         pads_begin.clear();
         pads_end.clear();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         ngraph::try_apply_auto_padding(input_shape,
                                        kernel_size,
                                        stride,
@@ -231,6 +232,7 @@ std::vector<layout> pooling_inst::calc_output_layouts(pooling_node const& /*node
                                        auto_pad,
                                        pads_end,
                                        pads_begin);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     }
     if (auto_pad == ov::op::PadType::VALID) {
         pads_begin = ov::CoordinateDiff(pads_begin.size(), 0);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/cum_sum.cpp b/src/plugins/intel_gpu/src/plugin/ops/cum_sum.cpp
index a94504467afd3d..267b732898912f 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/cum_sum.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/cum_sum.cpp
@@ -29,7 +29,9 @@ static void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cum
         }
         axis = axes_constant->cast_vector<int64_t>()[0];
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     axis = ov::normalize_axis(op.get(), axis, op->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto primitive = cldnn::cum_sum(layerName,
                                     inputs[0],
diff --git a/src/plugins/intel_gpu/src/plugin/ops/dft.cpp b/src/plugins/intel_gpu/src/plugin/ops/dft.cpp
index 42e079e8149335..e084b2e125c8d3 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/dft.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/dft.cpp
@@ -33,7 +33,9 @@ void createDft(Program& p,
     if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) {
         --axis_correction;
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ov::normalize_axes(op.get(), axis_correction, axes);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     std::vector<int64_t> signal_size;
     if (op->get_input_size() == 3) {
diff --git a/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp b/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp
index f67b98d62944e7..d8dc228e53ce5f 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp
@@ -38,10 +38,14 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
         OPENVINO_ASSERT(axes_constant, "Unsupported parameter node type in ", op->get_friendly_name(), " (", op->get_type_name(), ")");
 
         axes = axes_constant->cast_vector<int64_t>();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         ov::normalize_axes(op.get(), inputRank, axes);
+        OPENVINO_SUPPRESS_DEPRECATED_END
     } else {
         for (size_t i = 0; i < inputRank; ++i) {
+            OPENVINO_SUPPRESS_DEPRECATED_START
             axes.push_back(ov::normalize_axis(op.get(), i, inputRank));
+            OPENVINO_SUPPRESS_DEPRECATED_END
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/plugin/ops/mvn.cpp b/src/plugins/intel_gpu/src/plugin/ops/mvn.cpp
index 7131e76f598972..d568763442082c 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/mvn.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/mvn.cpp
@@ -48,7 +48,9 @@ static void CreateMVNOp(Program& p, const std::shared_ptr<ngraph::op::v6::MVN>&
         IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
 
     std::vector<int64_t> axes = inConst->cast_vector<int64_t>();
+    OPENVINO_SUPPRESS_DEPRECATED_START
     ov::normalize_axes(op.get(), op->get_output_partial_shape(0).size(), axes);
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     const size_t chanelAxis = 1;
     bool across_channels = std::find(axes.begin(), axes.end(), chanelAxis) != axes.end();
diff --git a/src/plugins/intel_gpu/src/plugin/ops/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/plugin/ops/scatter_elements_update.cpp
index c8d63d86e3248b..d94b2b396e5ee4 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/scatter_elements_update.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/scatter_elements_update.cpp
@@ -22,7 +22,9 @@ static void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr<ngra
     if (!axes_constant) {
         OPENVINO_ASSERT("Unsupported parameter nodes type in ", op->get_friendly_name(), " (", op->get_type_name(), ")");
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     int64_t axis = ov::normalize_axis(op.get(), axes_constant->cast_vector<int64_t>()[0], op->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto primitive = cldnn::scatter_elements_update(layerName,
                                                     inputs[0],
diff --git a/src/plugins/intel_gpu/src/plugin/ops/shuffle_channels.cpp b/src/plugins/intel_gpu/src/plugin/ops/shuffle_channels.cpp
index b9a88d1cf61787..717841555b0407 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/shuffle_channels.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/shuffle_channels.cpp
@@ -18,7 +18,9 @@ static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op
     std::string layerName = layer_type_name_ID(op);
 
     int32_t group = op->get_group();
+    OPENVINO_SUPPRESS_DEPRECATED_START
     int64_t axis = ov::normalize_axis(op.get(), op->get_axis(), op->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto shuffleChannelsPrim = cldnn::shuffle_channels(layerName,
                                                        inputs[0],
diff --git a/src/plugins/intel_gpu/src/plugin/ops/softmax.cpp b/src/plugins/intel_gpu/src/plugin/ops/softmax.cpp
index f154ee92b4b066..6332d714085596 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/softmax.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/softmax.cpp
@@ -29,7 +29,9 @@ static void CreateSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v8::So
     auto inputs = p.GetInputInfo(op);
     std::string layerName = layer_type_name_ID(op);
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     int64_t axis = ov::normalize_axis(op.get(), op->get_axis(), op->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto softmaxPrim = cldnn::softmax(layerName,
                                       inputs[0],
@@ -43,7 +45,9 @@ static void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5:
     std::string layerName = layer_type_name_ID(op);
     std::string layerNameSoftmax = layer_type_name_ID(op) + "_softmax";
 
+    OPENVINO_SUPPRESS_DEPRECATED_START
     int64_t axis = ov::normalize_axis(op.get(), op->get_axis(), op->get_input_partial_shape(0).rank());
+    OPENVINO_SUPPRESS_DEPRECATED_END
 
     auto softmaxPrim = cldnn::softmax(layerNameSoftmax,
                                       inputs[0],
diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp
index 9220130236acf7..0c079a661f7e91 100644
--- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp
+++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp
@@ -139,8 +139,10 @@ bool SingleOpMatcher::match_ports(const std::shared_ptr<ov::Node> &node,
         const auto &cur_node_input = node->input_value(port_id);
         const auto &ref_node_input = ref->input_value(port_id);
 
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto &cur_const_input = ov::get_constant_from_source(cur_node_input);
         const auto &ref_const_input = ov::get_constant_from_source(ref_node_input);
+        OPENVINO_SUPPRESS_DEPRECATED_END
 
         // Check that both OP an reference port inputs are constant and have same data
         if (cur_const_input && ref_const_input &&
diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp
index e65b66e4497950..70d4c41151f43a 100644
--- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp
+++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp
@@ -77,7 +77,9 @@ std::shared_ptr<ov::Node> clone(const std::shared_ptr<ov::Node> &node, LayerTest
     auto add_input_func = [&](size_t index) {
         const auto input = node->input(index).get_source_output();
         auto port_info = LayerTestsUtils::PortInfo();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto constant = ov::get_constant_from_source(input);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         std::shared_ptr<ov::Node> input_node;
         if (constant) {
             get_port_range(constant, port_info);
@@ -142,7 +144,9 @@ std::shared_ptr<ov::Node> clone_weightable_node(const std::shared_ptr<ov::Node>
     bool has_parameters = false;
     for (size_t i = 0; i < node->get_input_size(); ++i) {
         const auto input = node->input(i).get_source_output();
+        OPENVINO_SUPPRESS_DEPRECATED_START
         const auto constant_input = ov::get_constant_from_source(input);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         auto port_info = LayerTestsUtils::PortInfo();
         // Input is Parameter or dynamic data pass
         if (!constant_input) {
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp
index 60563479ed02d0..f26b1f8dd0534e 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp
@@ -71,7 +71,9 @@ void MulConvFusion::SetUp() {
         std::shared_ptr<ngraph::Node> conv;
         if (conv_type == ngraph::opset8::Convolution::get_type_info_static()) {
             weights = std::make_shared<ngraph::opset8::Multiply>(weights, mul_const);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             weights = ngraph::get_constant_from_source(weights);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             ASSERT_NE(nullptr, weights);
             conv = std::make_shared<ngraph::opset8::Convolution>(param, weights, strides, pad_begin, pad_end, strides);
         } else if (conv_type == ngraph::opset8::GroupConvolution::get_type_info_static()) {
@@ -82,7 +84,9 @@ void MulConvFusion::SetUp() {
             auto reshape = std::make_shared<ngraph::opset8::Reshape>(mul_const,
                     ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false);
             weights = std::make_shared<ngraph::opset8::Multiply>(weights, reshape);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             weights = ngraph::get_constant_from_source(weights);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             ASSERT_NE(nullptr, weights);
             conv = std::make_shared<ngraph::opset8::GroupConvolution>(param, weights, strides, pad_begin, pad_end, strides);
         } else if (conv_type == ngraph::opset8::ConvolutionBackpropData::get_type_info_static()) {
@@ -92,7 +96,9 @@ void MulConvFusion::SetUp() {
             auto reshape = std::make_shared<ngraph::opset8::Reshape>(mul_const,
                     ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false);
             weights = std::make_shared<ngraph::opset8::Multiply>(weights, reshape);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             weights = ngraph::get_constant_from_source(weights);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             ASSERT_NE(nullptr, weights);
             conv = std::make_shared<ngraph::opset8::ConvolutionBackpropData>(param, weights, strides, pad_begin, pad_end, strides);
         } else if (conv_type == ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()) {
@@ -104,7 +110,9 @@ void MulConvFusion::SetUp() {
             auto reshape = std::make_shared<ngraph::opset8::Reshape>(mul_const,
                     ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false);
             weights = std::make_shared<ngraph::opset8::Multiply>(weights, reshape);
+            OPENVINO_SUPPRESS_DEPRECATED_START
             weights = ngraph::get_constant_from_source(weights);
+            OPENVINO_SUPPRESS_DEPRECATED_END
             ASSERT_NE(nullptr, weights);
             conv = std::make_shared<ngraph::opset8::GroupConvolutionBackpropData>(param, weights, strides, pad_begin, pad_end, strides);
         } else {
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_batch_norm.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_batch_norm.cpp
index 0df6d61620854e..983cc6875aff4e 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_batch_norm.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_batch_norm.cpp
@@ -140,7 +140,9 @@ void QuantizedConvolutionBatchNorm::SetUp() {
         auto output_high_weights = opset8::Constant::create(element::f32, Shape{}, {254});
         weights = std::make_shared<opset8::FakeQuantize>(weights, low_weights, high_weights, output_low_weights, output_high_weights, 255);
         weights = std::make_shared<opset8::Convert>(weights, element::i8);
+        OPENVINO_SUPPRESS_DEPRECATED_START
         weights = get_constant_from_source(weights);
+        OPENVINO_SUPPRESS_DEPRECATED_END
         weights = std::make_shared<opset8::Convert>(weights, element::f32);
         auto scale_weights = opset8::Constant::create(element::f32, weights_intervals_shape, {2.0 / 255.0});
         weights = std::make_shared<opset8::Multiply>(weights, scale_weights);

From f95fd27c16783c995e5d734e3786e08aa5f352ff Mon Sep 17 00:00:00 2001
From: Sun Xiaoxia <xiaoxia.sun@intel.com>
Date: Thu, 6 Apr 2023 05:47:34 +0000
Subject: [PATCH 256/296] HOT FIX: CPU binding cannot follow numactl's control
 (#16736)

* fix numactl command issue

* fix comments
---
 src/inference/src/streams_executor.hpp | 4 ++++
 src/inference/src/system_conf.cpp      | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/inference/src/streams_executor.hpp b/src/inference/src/streams_executor.hpp
index a5210ca861dfd6..9af6121a11d7ab 100644
--- a/src/inference/src/streams_executor.hpp
+++ b/src/inference/src/streams_executor.hpp
@@ -12,6 +12,8 @@
 #include <string>
 #include <vector>
 
+#include "dev/threading/parallel_custom_arena.hpp"
+
 namespace ov {
 
 struct CPU {
@@ -24,8 +26,10 @@ struct CPU {
     std::mutex _task_mutex;
     int _plugin_status = PLUGIN_USED_START;
     int _socket_idx = 0;
+    int _num_threads = 0;
 
     CPU() {
+        _num_threads = parallel_get_max_threads();
         init_cpu(*this);
     }
     void init_cpu(CPU& cpu);
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
index 20e078d3d9347c..47646e16e8ddf5 100644
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -230,7 +230,7 @@ std::vector<std::vector<int>> get_num_available_cpu_cores() {
 }
 
 bool is_cpu_map_available() {
-    return cpu._cpu_mapping_table.size() > 0;
+    return cpu._proc_type_table.size() > 0 && cpu._num_threads == cpu._proc_type_table[0][ALL_PROC];
 }
 
 std::vector<int> reserve_available_cpus(const ColumnOfProcessorTypeTable core_type,

From 362389c73313fee7359af5e2e8573eadb4171d4a Mon Sep 17 00:00:00 2001
From: Wang Wangwang <wangwang.wang@intel.com>
Date: Thu, 6 Apr 2023 14:35:55 +0800
Subject: [PATCH 257/296] =?UTF-8?q?[DOCS][AUTO]=20Add=20enable=5Fruntime?=
 =?UTF-8?q?=5Ffallback=20property=20to=20AUTO=20Device=20Sele=E2=80=A6=20(?=
 =?UTF-8?q?#16645)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [DOCS][AUTO] Add enable_runtime_fallback property to AUTO Device Selection article

* Update docs/OV_Runtime_UG/auto_device_selection.md

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>

* Update docs/OV_Runtime_UG/auto_device_selection.md

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>

* Update docs/OV_Runtime_UG/auto_device_selection.md

---------

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
---
 docs/OV_Runtime_UG/auto_device_selection.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/OV_Runtime_UG/auto_device_selection.md b/docs/OV_Runtime_UG/auto_device_selection.md
index 4567100e9d50f2..d2089a078f131a 100644
--- a/docs/OV_Runtime_UG/auto_device_selection.md
+++ b/docs/OV_Runtime_UG/auto_device_selection.md
@@ -135,6 +135,15 @@ Following the OpenVINO™ naming convention, the Automatic Device Selection mode
 | |                                             | | beginning. The default value is ``true``, indicating that CPU is   |
 | |                                             | | used as acceleration by default.                                   |
 +-----------------------------------------------+----------------------------------------------------------------------+
+| | ``ov::intel_auto::enable_runtime_fallback`` | | **Values**:                                                        |
+| |                                             | |       ``true``                                                     |
+| |                                             | |       ``false``                                                    |
+| |                                             | |                                                                    |
+| |                                             | | Enables/disables runtime fallback to other devices and performs    |
+| |                                             | | the failed inference request again, if inference request fails on  |
+| |                                             | | the currently selected device.                                     |
+| |                                             | | The default value is ``true``.                                     |
++-----------------------------------------------+----------------------------------------------------------------------+
 
 Inference with AUTO is configured similarly to when device plugins are used:
 you compile the model on the plugin with configuration and execute inference.

From 38c8a3d15bfbfa93f5968c398184a9f2d110d43a Mon Sep 17 00:00:00 2001
From: Roman Lyamin <Roman.Lyamin@intel.com>
Date: Thu, 6 Apr 2023 10:50:57 +0400
Subject: [PATCH 258/296] [GPU] Added custom canonicalize_shapes for Gather
 (#16733)

---
 .../intel_gpu/src/graph/impls/ocl/gather.cpp  | 24 +++++++++++
 .../test_cases/canonicalization_gpu_test.cpp  | 41 +++++++++++++++++++
 .../gpu/single_layer_tests/dynamic/gather.cpp |  7 +++-
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
index 3be827b3b3b288..0995e890ee7dbb 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
@@ -99,6 +99,30 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
         return {params, optional_params};
     }
 
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+        const auto& prim = impl_params.typed_desc<gather>();
+
+        auto input_pshape = updated_impl_params.input_layouts[0].get_partial_shape();
+        auto& out_layout = updated_impl_params.output_layouts[0];
+        auto output_pshape = out_layout.get_partial_shape();
+
+        OPENVINO_ASSERT(input_pshape.size() <= output_pshape.size() || input_pshape.size() - output_pshape.size() == 1,
+                        "[GPU] Gather output rank must be greater than or equal to the input rank, or less by one");
+
+        if (input_pshape.size() > output_pshape.size()) {
+            output_pshape.insert(output_pshape.begin() + prim->axis, ov::Dimension(1));
+            out_layout.set_partial_shape(output_pshape);
+            out_layout.format = format::adjust_to_rank(out_layout.format, output_pshape.size());
+        }
+
+        return primitive_impl::static_canonicalize_shapes(updated_impl_params);
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
diff --git a/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp
index cfff825479aaaa..b91a0b8e9903e1 100644
--- a/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/canonicalization_gpu_test.cpp
@@ -13,6 +13,7 @@
 #include "eltwise_inst.h"
 #include "fully_connected_inst.h"
 #include "gemm_inst.h"
+#include "gather_inst.h"
 
 using namespace cldnn;
 using namespace ::tests;
@@ -185,6 +186,46 @@ TEST(canonicalization, gemm) {
     }
 }
 
+struct gather_params {
+    int64_t axis;
+    int64_t batch_dim;
+    bool support_neg_ind;
+};
+
+std::vector<std::pair<Shapes, gather_params>> gather_shapes_with_params {
+    {
+        {{{8, 2, 3}, {}}, {{8, 2, 3, 1}, {1, 1, 1, 1}}, {{1, 2, 3, 1}}},
+        {0, 0, false}
+    },
+    {
+        {{{8, -1, -1, 2}, {}}, {{8, -1, -1, 2}, {1, 1, 1, 1}}, {{1, -1, -1, 2}}},
+        {0, 0, false}
+    },
+    {
+        {{{8, 2, 3}, {1}}, {{8, 2, 3, 1}, {1, 1, 1, 1}}, {{1, 2, 3, 1}}},
+        {0, 0, false}
+    },
+    {
+        {{{8, 2, 3, 4}, {8}}, {{8, 2, 3, 4}, {8, 1, 1, 1}}, {{8, 2, 1, 4}}},
+        {2, 1, false}
+    }
+};
+
+TEST(canonicalization, gather) {
+    for (const auto& params : gather_shapes_with_params) {
+        layout data_layout = create_default_layout(std::get<0>(params.first)[0]);
+        layout indices_layout = create_default_layout(std::get<0>(params.first)[1]);
+
+        topology topology;
+        topology.add(input_layout("data", data_layout));
+        topology.add(input_layout("indices", indices_layout));
+        topology.add(gather("gather", input_info("data"), input_info("indices"), params.second.axis,
+                            ov::Shape{}, params.second.batch_dim, params.second.support_neg_ind));
+
+        canonicalization_test(topology, "gather", std::get<1>(params.first), std::get<2>(params.first));
+    }
+}
+
 struct fusing_gemm_eltwise_params {
     ov::PartialShape input_gemm_first;
     ov::PartialShape weights_gemm_first;
diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp
index 4dbd78c6fd0ab0..dcb8a0c7264127 100644
--- a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp
+++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp
@@ -169,6 +169,11 @@ const std::vector<GatherShapeParams> dynamicInputShapeConstTargetShape = {
         ov::test::InputShape(ov::PartialShape({}), {{2, 1}}),
         3, 2
     },
+    {
+        ov::test::InputShape(ov::PartialShape({8, -1, -1, 2}), {{8, 2, 3, 2}, {8, 4, 5, 2}}),
+        ov::test::InputShape(ov::PartialShape({}), {{}}),
+        0, 0
+    },
     {
         ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{2, 6, 7, 8, 9}, {2, 6, 9, 1, 2}}),
         ov::test::InputShape(ov::PartialShape({}), {{2, 6}}),
@@ -193,7 +198,7 @@ const std::vector<GatherShapeParams> dynamicInputShapeConstTargetShape = {
         ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1, -1}), {{2, 4, 2, 3, 1, 3}, {2, 4, 7, 8, 9, 10}}),
         ov::test::InputShape(ov::PartialShape({}), {{2, 4}}),
         2, 2
-    },
+    }
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_input_shapes_const_target_shapes, GatherGPUTest,

From f2894d09e95d07b099b972015657432263c7789a Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 6 Apr 2023 11:02:10 +0400
Subject: [PATCH 259/296] Fixed windows build after #16716 (#16773)

---
 src/bindings/python/src/pyopenvino/graph/util.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/bindings/python/src/pyopenvino/graph/util.cpp b/src/bindings/python/src/pyopenvino/graph/util.cpp
index 35ea9003c70eb1..affc8ff6b47e91 100644
--- a/src/bindings/python/src/pyopenvino/graph/util.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/util.cpp
@@ -27,6 +27,7 @@ inline void* numpy_to_c(py::array a) {
 void regmodule_graph_util(py::module m) {
     py::module mod = m.def_submodule("util", "openvino.runtime.utils");
     mod.def("numpy_to_c", &numpy_to_c);
+    OPENVINO_SUPPRESS_DEPRECATED_START
     mod.def("get_constant_from_source",
             &ov::get_constant_from_source,
             py::arg("output"),
@@ -40,6 +41,7 @@ void regmodule_graph_util(py::module m) {
                          from the resulting bound, otherwise Null.
                 :rtype: openvino.runtime.op.Constant or openvino.runtime.Node
             )");
+    OPENVINO_SUPPRESS_DEPRECATED_END
     mod.def(
         "clone_model",
         [](ov::Model& model) {

From 70ef0b5316f5c668114ca120ea3352f550e58b62 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 6 Apr 2023 11:02:28 +0400
Subject: [PATCH 260/296] Minimize rebuild for Makefiles generator (#16729)

* Add dependency from ov_plugins.hpp only for files which use it

* Remove rebuild files depends on CI_BUILD_NUMBER changes

* Try to fix static build

* Fixed comments

* Fixed build

* Merged some change

* Try to fix build

* Try to fix nvidia build

* Take LTO value from target property
---
 cmake/developer_package/plugins/plugins.cmake | 25 ++----------
 cmake/developer_package/version.cmake         | 40 +++++++++++++++++++
 .../python/src/pyopenvino/CMakeLists.txt      |  2 +-
 src/cmake/openvino.cmake                      |  2 +
 src/core/CMakeLists.txt                       |  2 +-
 src/inference/CMakeLists.txt                  |  4 +-
 .../auto_batch/tests/unit/CMakeLists.txt      |  2 +-
 src/plugins/intel_cpu/CMakeLists.txt          |  5 ++-
 src/plugins/intel_gna/CMakeLists.txt          |  5 ++-
 src/tests/unit/auto/CMakeLists.txt            |  2 +-
 10 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
index 0d8db5561e5ada..80a8266e83292a 100644
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@@ -49,10 +49,6 @@ function(ie_add_plugin)
     # create and configure target
 
     if(NOT IE_PLUGIN_PSEUDO_PLUGIN_FOR)
-        if(IE_PLUGIN_VERSION_DEFINES_FOR)
-            addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER)
-        endif()
-
         set(input_files ${IE_PLUGIN_SOURCES})
         foreach(obj_lib IN LISTS IE_PLUGIN_OBJECT_LIBRARIES)
             list(APPEND input_files $<TARGET_OBJECTS:${obj_lib}>)
@@ -67,6 +63,10 @@ function(ie_add_plugin)
 
         add_library(${IE_PLUGIN_NAME} ${library_type} ${input_files})
 
+        if(IE_PLUGIN_VERSION_DEFINES_FOR)
+            ov_add_version_defines(${IE_PLUGIN_VERSION_DEFINES_FOR} ${IE_PLUGIN_NAME})
+        endif()
+
         target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN)
         if(NOT BUILD_SHARED_LIBS)
             # to distinguish functions creating plugin objects
@@ -351,21 +351,4 @@ function(ov_generate_plugins_hpp)
     # so, we have to use explicit target and make it dependency for inference_engine
     add_custom_target(_ov_plugins_hpp DEPENDS ${ov_plugins_hpp})
     add_dependencies(inference_engine_obj _ov_plugins_hpp)
-
-    # add dependency for object files
-    get_target_property(sources inference_engine_obj SOURCES)
-    foreach(source IN LISTS sources)
-        if("${source}" MATCHES "\\$\\<TARGET_OBJECTS\\:([A-Za-z0-9_]*)\\>")
-            # object library
-            set(obj_library ${CMAKE_MATCH_1})
-            get_target_property(obj_sources ${obj_library} SOURCES)
-            list(APPEND all_sources ${obj_sources})
-        else()
-            # usual source
-            list(APPEND all_sources ${source})
-        endif()
-    endforeach()
-
-    # add dependency on header file generation for all inference_engine source files
-    set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ov_plugins_hpp})
 endfunction()
diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake
index 35f1616e5e89e2..9a461c43a73499 100644
--- a/cmake/developer_package/version.cmake
+++ b/cmake/developer_package/version.cmake
@@ -185,6 +185,46 @@ macro (addVersionDefines FILE)
     unset(__version_file)
 endmacro()
 
+macro (ov_add_version_defines FILE TARGET)
+    set(__version_file ${FILE})
+    if(NOT IS_ABSOLUTE ${__version_file})
+        set(__version_file "${CMAKE_CURRENT_SOURCE_DIR}/${__version_file}")
+    endif()
+    if(NOT EXISTS ${__version_file})
+        message(FATAL_ERROR "${FILE} does not exists in current source directory")
+    endif()
+    _remove_source_from_target(${TARGET} ${FILE})
+    _remove_source_from_target(${TARGET} ${__version_file})
+    if (BUILD_SHARED_LIBS)
+        add_library(${TARGET}_version OBJECT ${__version_file})
+    else()
+        add_library(${TARGET}_version STATIC ${__version_file})
+    endif()
+    if(SUGGEST_OVERRIDE_SUPPORTED)
+        set_source_files_properties(${__version_file}
+            PROPERTIES COMPILE_OPTIONS -Wno-suggest-override)
+    endif()
+
+    target_compile_definitions(${TARGET}_version PRIVATE
+        CI_BUILD_NUMBER=\"${CI_BUILD_NUMBER}\"
+        $<TARGET_PROPERTY:${TARGET},INTERFACE_COMPILE_DEFINITIONS>
+        $<TARGET_PROPERTY:${TARGET},COMPILE_DEFINITIONS>)
+    target_include_directories(${TARGET}_version PRIVATE
+        $<TARGET_PROPERTY:${TARGET},INTERFACE_INCLUDE_DIRECTORIES>
+        $<TARGET_PROPERTY:${TARGET},INCLUDE_DIRECTORIES>)
+    target_link_libraries(${TARGET}_version PRIVATE
+        $<TARGET_PROPERTY:${TARGET},LINK_LIBRARIES>)
+    target_compile_options(${TARGET}_version PRIVATE
+        $<TARGET_PROPERTY:${TARGET},INTERFACE_COMPILE_OPTIONS>
+        $<TARGET_PROPERTY:${TARGET},COMPILE_OPTIONS>)
+    set_target_properties(${TARGET}_version
+        PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE 
+        $<TARGET_PROPERTY:${TARGET},INTERPROCEDURAL_OPTIMIZATION_RELEASE>)
+
+    target_sources(${TARGET} PRIVATE $<TARGET_OBJECTS:${TARGET}_version>)
+    unset(__version_file)
+endmacro()
+
 function(ov_add_library_version library)
     if(NOT DEFINED OpenVINO_SOVERSION)
         message(FATAL_ERROR "Internal error: OpenVINO_SOVERSION is not defined")
diff --git a/src/bindings/python/src/pyopenvino/CMakeLists.txt b/src/bindings/python/src/pyopenvino/CMakeLists.txt
index 7e0e90c0766a7c..6dc2226e450499 100644
--- a/src/bindings/python/src/pyopenvino/CMakeLists.txt
+++ b/src/bindings/python/src/pyopenvino/CMakeLists.txt
@@ -78,7 +78,7 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
     INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}
     OUTPUT_NAME "_pyopenvino")
 
-addVersionDefines(pyopenvino.cpp CI_BUILD_NUMBER)
+ov_add_version_defines(pyopenvino.cpp ${PROJECT_NAME})
 
 if(OV_GENERATOR_MULTI_CONFIG)
     string(APPEND _cmd_echo
diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake
index 0a0b9f9d1896a4..373cffcb9817e2 100644
--- a/src/cmake/openvino.cmake
+++ b/src/cmake/openvino.cmake
@@ -20,8 +20,10 @@ endif()
 
 add_library(${TARGET_NAME}
     $<TARGET_OBJECTS:ngraph_obj>
+    $<TARGET_OBJECTS:ngraph_obj_version>
     $<TARGET_OBJECTS:frontend_common_obj>
     $<TARGET_OBJECTS:inference_engine_obj>
+    $<TARGET_OBJECTS:inference_engine_obj_version>
     $<TARGET_OBJECTS:inference_engine_transformations_obj>
     $<TARGET_OBJECTS:inference_engine_lp_transformations_obj>)
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f14298f15eb63f..566a085c9f9003 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -76,7 +76,7 @@ ie_faster_build(ngraph_obj
     UNITY
     PCH PRIVATE "src/precomp.hpp")
 
-addVersionDefines(src/version.cpp CI_BUILD_NUMBER)
+ov_add_version_defines(src/version.cpp ngraph_obj)
 
 target_link_libraries(ngraph_obj PRIVATE ngraph::builder ngraph::reference openvino::util
                                          openvino::pugixml ov_shape_inference openvino::core::dev)
diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt
index 25d4272b3d3620..6bc88dc8ea309c 100644
--- a/src/inference/CMakeLists.txt
+++ b/src/inference/CMakeLists.txt
@@ -79,8 +79,6 @@ if(ENABLE_SSE42)
     endif()
 endif()
 
-addVersionDefines(src/ie_version.cpp CI_BUILD_NUMBER)
-
 set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
 
 file (GLOB_RECURSE PUBLIC_HEADERS
@@ -130,6 +128,8 @@ add_library(${TARGET_NAME}_obj OBJECT
             ${LIBRARY_HEADERS}
             ${PUBLIC_HEADERS})
 
+ov_add_version_defines(src/ie_version.cpp ${TARGET_NAME}_obj)
+
 if(NOT BUILD_SHARED_LIBS)
     target_compile_definitions(${TARGET_NAME}_obj PUBLIC OPENVINO_STATIC_LIBRARY)
 endif()
diff --git a/src/plugins/auto_batch/tests/unit/CMakeLists.txt b/src/plugins/auto_batch/tests/unit/CMakeLists.txt
index 7ad52b58c6615f..e6a33d25f4896c 100644
--- a/src/plugins/auto_batch/tests/unit/CMakeLists.txt
+++ b/src/plugins/auto_batch/tests/unit/CMakeLists.txt
@@ -5,7 +5,6 @@
 set(TARGET_NAME ov_auto_batch_unit_tests)
 
 set(CI_BUILD_NUMBER "unittest")
-addVersionDefines(${OpenVINO_SOURCE_DIR}/src/plugins/auto_batch/src/auto_batch.cpp CI_BUILD_NUMBER)
 add_definitions(-DAUTOBATCH_UNITTEST)
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
@@ -52,5 +51,6 @@ addIeTargetTest(
         LABELS
             Auto_Batch
 )
+ov_add_version_defines(${OpenVINO_SOURCE_DIR}/src/plugins/auto_batch/src/auto_batch.cpp ${TARGET_NAME})
 
 set_ie_threading_interface_for(${TARGET_NAME})
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
index 3410d7f3d43805..85f8b8c5fe875f 100644
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@@ -32,8 +32,6 @@ file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
 file(GLOB_RECURSE HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h
                           ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp)
 
-addVersionDefines(${CMAKE_CURRENT_SOURCE_DIR}/src/plugin.cpp CI_BUILD_NUMBER)
-
 add_subdirectory(thirdparty)
 
 if(ENABLE_TESTS)
@@ -45,6 +43,7 @@ endif()
 ie_add_plugin(NAME ${TARGET_NAME}
               DEVICE_NAME "CPU"
               AS_EXTENSION
+              VERSION_DEFINES_FOR src/plugin.cpp
               SOURCES ${SOURCES} ${HEADERS})
 
 set_ie_threading_interface_for(${TARGET_NAME})
@@ -81,6 +80,8 @@ if(BUILD_SHARED_LIBS)
     add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS})
     link_system_libraries(${TARGET_NAME}_obj PUBLIC dnnl openvino::pugixml)
 
+    ov_add_version_defines(src/plugin.cpp ${TARGET_NAME}_obj)
+
     target_include_directories(${TARGET_NAME}_obj
         PRIVATE
             $<TARGET_PROPERTY:openvino::runtime::dev,INTERFACE_INCLUDE_DIRECTORIES>
diff --git a/src/plugins/intel_gna/CMakeLists.txt b/src/plugins/intel_gna/CMakeLists.txt
index 3432d9f11cf378..9122ba98474d7f 100644
--- a/src/plugins/intel_gna/CMakeLists.txt
+++ b/src/plugins/intel_gna/CMakeLists.txt
@@ -43,8 +43,6 @@ file(GLOB_RECURSE HEADERS
         ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h
         ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp)
 
-addVersionDefines(src/gna_plugin_entry_points.cpp CI_BUILD_NUMBER)
-
 find_package(libGNA REQUIRED
              PATHS "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
              NO_DEFAULT_PATH)
@@ -56,6 +54,7 @@ find_package(libGNA REQUIRED
 ie_add_plugin(NAME ${TARGET_NAME}
               DEVICE_NAME "GNA"
               SOURCES ${SOURCES} ${HEADERS}
+              VERSION_DEFINES_FOR src/gna_plugin_entry_points.cpp
               ADD_CLANG_FORMAT)
 
 # Enable support of CC for the plugin
@@ -79,6 +78,8 @@ ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 
 add_library(${TARGET_NAME}_test_static STATIC EXCLUDE_FROM_ALL ${SOURCES} ${HEADERS})
 
+ov_add_version_defines(src/gna_plugin_entry_points.cpp ${TARGET_NAME}_test_static)
+
 target_compile_definitions(${TARGET_NAME}_test_static
         PRIVATE
             _NO_MKL_
diff --git a/src/tests/unit/auto/CMakeLists.txt b/src/tests/unit/auto/CMakeLists.txt
index 9e52af392f2ef1..8173c30a0287bf 100644
--- a/src/tests/unit/auto/CMakeLists.txt
+++ b/src/tests/unit/auto/CMakeLists.txt
@@ -5,7 +5,6 @@
 set(TARGET_NAME ieMultiPluginUnitTests)
 
 set(CI_BUILD_NUMBER "unittest")
-addVersionDefines(${OpenVINO_SOURCE_DIR}/src/plugins/auto/plugin.cpp CI_BUILD_NUMBER)
 add_definitions(-DMULTIUNITTEST)
 
 addIeTargetTest(
@@ -28,5 +27,6 @@ addIeTargetTest(
             Multi
             Auto
 )
+ov_add_version_defines(${OpenVINO_SOURCE_DIR}/src/plugins/auto/plugin.cpp ${TARGET_NAME})
 
 set_ie_threading_interface_for(${TARGET_NAME})

From 9f0e55774465b67959d56a5436c90b8e2c08d0f9 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Thu, 6 Apr 2023 15:08:42 +0800
Subject: [PATCH 261/296] HOT FIX: update cpu map calculation for Windows to
 avoid incorrect total number of processors (#16763)

* update cpu map calculation for Windows

* update typo

* update typo

* update for hybrid CPU

* update for hybrid CPU

* update for typo
---
 src/inference/src/os/win/win_system_conf.cpp |  85 ++++----
 src/inference/src/streams_executor.hpp       |   7 +-
 src/inference/tests/unit/cpu_map_parser.cpp  | 196 ++++++++++++++++++-
 3 files changed, 247 insertions(+), 41 deletions(-)

diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp
index b61985339cdc0a..bbf8181bc57e43 100644
--- a/src/inference/src/os/win/win_system_conf.cpp
+++ b/src/inference/src/os/win/win_system_conf.cpp
@@ -29,8 +29,6 @@ void CPU::init_cpu(CPU& cpu) {
         return;
     }
 
-    cpu._processors = GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
-
     parse_processor_info_win(base_ptr,
                              len,
                              cpu._processors,
@@ -42,21 +40,30 @@ void CPU::init_cpu(CPU& cpu) {
 
 void parse_processor_info_win(const char* base_ptr,
                               const unsigned long len,
-                              const int _processors,
+                              int& _processors,
                               int& _sockets,
                               int& _cores,
                               std::vector<std::vector<int>>& _proc_type_table,
                               std::vector<std::vector<int>>& _cpu_mapping_table) {
-    _cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
-
     std::vector<int> list;
+    std::vector<int> proc_info;
+
+    std::vector<int> proc_init_line(PROC_TYPE_TABLE_SIZE, 0);
+    std::vector<int> cpu_init_line(CPU_MAP_TABLE_SIZE, -1);
 
     char* info_ptr = (char*)base_ptr;
     int list_len = 0;
     int base_proc = 0;
-    int proc_count = 0;
     int group = 0;
+
+    int group_start = 0;
+    int group_end = 0;
+    int group_id = 0;
+    int group_type = 0;
+
+    _processors = 0;
     _sockets = -1;
+    _cores = 0;
 
     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = NULL;
 
@@ -77,8 +84,6 @@ void parse_processor_info_win(const char* base_ptr,
         return;
     };
 
-    std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
-
     for (; info_ptr < base_ptr + len; info_ptr += (DWORD)info->Size) {
         info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)info_ptr;
 
@@ -86,56 +91,66 @@ void parse_processor_info_win(const char* base_ptr,
             _sockets++;
             MaskToList(info->Processor.GroupMask->Mask);
             if (0 == _sockets) {
-                _proc_type_table.push_back(line_value_0);
+                _proc_type_table.push_back(proc_init_line);
             } else {
                 _proc_type_table.push_back(_proc_type_table[0]);
-                _proc_type_table[0] = line_value_0;
+                _proc_type_table[0] = proc_init_line;
             }
         } else if (info->Relationship == RelationProcessorCore) {
             MaskToList(info->Processor.GroupMask->Mask);
 
-            if (proc_count >= _processors) {
-                break;
-            }
-
             if (0 == list[0]) {
-                base_proc = proc_count;
+                base_proc = _processors;
             }
 
             if (2 == list_len) {
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
-                _cpu_mapping_table[list[1] + base_proc][CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
-
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
-                _cpu_mapping_table[list[1] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
-
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
-                _cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_ID] = _cores;
-
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
-                _cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
-
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_GROUP_ID] = group;
-                _cpu_mapping_table[list[1] + base_proc][CPU_MAP_GROUP_ID] = group;
+                proc_info = cpu_init_line;
+                proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
+                proc_info[CPU_MAP_SOCKET_ID] = _sockets;
+                proc_info[CPU_MAP_CORE_ID] = _cores;
+                proc_info[CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
+                proc_info[CPU_MAP_GROUP_ID] = group;
+                _cpu_mapping_table.push_back(proc_info);
+
+                proc_info = cpu_init_line;
+                proc_info[CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
+                proc_info[CPU_MAP_SOCKET_ID] = _sockets;
+                proc_info[CPU_MAP_CORE_ID] = _cores;
+                proc_info[CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
+                proc_info[CPU_MAP_GROUP_ID] = group;
+                _cpu_mapping_table.push_back(proc_info);
 
                 _proc_type_table[0][MAIN_CORE_PROC]++;
                 _proc_type_table[0][HYPER_THREADING_PROC]++;
                 group++;
 
             } else {
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
-                _cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
+                proc_info = cpu_init_line;
+                proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
+                proc_info[CPU_MAP_SOCKET_ID] = _sockets;
+                proc_info[CPU_MAP_CORE_ID] = _cores;
+                if ((_processors > group_start) && (_processors <= group_end)) {
+                    proc_info[CPU_MAP_CORE_TYPE] = group_type;
+                    proc_info[CPU_MAP_GROUP_ID] = group_id;
+                    _proc_type_table[0][group_type]++;
+                }
+                _cpu_mapping_table.push_back(proc_info);
             }
             _proc_type_table[0][ALL_PROC] += list_len;
-            proc_count += list_len;
+            _processors += list_len;
             _cores++;
 
         } else if ((info->Relationship == RelationCache) && (info->Cache.Level == 2)) {
             MaskToList(info->Cache.GroupMask.Mask);
 
             if (4 == list_len) {
-                for (int m = 0; m < list_len; m++) {
+                if (_processors < list[list_len - 1] + base_proc) {
+                    group_start = list[0];
+                    group_end = list[list_len - 1];
+                    group_id = group;
+                    group_type = EFFICIENT_CORE_PROC;
+                }
+                for (int m = 0; m < _processors - list[0]; m++) {
                     _cpu_mapping_table[list[m] + base_proc][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
                     _cpu_mapping_table[list[m] + base_proc][CPU_MAP_GROUP_ID] = group;
                     _proc_type_table[0][EFFICIENT_CORE_PROC]++;
@@ -153,7 +168,7 @@ void parse_processor_info_win(const char* base_ptr,
     _sockets++;
     if (_sockets > 1) {
         _proc_type_table.push_back(_proc_type_table[0]);
-        _proc_type_table[0] = line_value_0;
+        _proc_type_table[0] = proc_init_line;
 
         for (int m = 1; m <= _sockets; m++) {
             for (int n = 0; n < PROC_TYPE_TABLE_SIZE; n++) {
diff --git a/src/inference/src/streams_executor.hpp b/src/inference/src/streams_executor.hpp
index 9af6121a11d7ab..abe6c5ca5d2e5e 100644
--- a/src/inference/src/streams_executor.hpp
+++ b/src/inference/src/streams_executor.hpp
@@ -38,7 +38,6 @@ struct CPU {
 #ifdef __linux__
 /**
  * @brief      Parse processors infomation on Linux
- * @ingroup    ie_dev_api_system_conf
  * @param[in]  _processors total number for processors in system.
  * @param[in]  _system_info_table system information for this platform.
  * @param[out] _sockets total number for sockets in system
@@ -53,16 +52,14 @@ void parse_processor_info_linux(const int _processors,
                                 int& _cores,
                                 std::vector<std::vector<int>>& _proc_type_table,
                                 std::vector<std::vector<int>>& _cpu_mapping_table);
-
 #endif
 
 #if (defined(_WIN32) || defined(_WIN64))
 /**
  * @brief      Parse processors infomation on Windows
- * @ingroup    ie_dev_api_system_conf
  * @param[in]  base_ptr buffer object pointer of Windows system infomation
  * @param[in]  len buffer object length of Windows system infomation
- * @param[in]  _processors total number for processors in system.
+ * @param[out] _processors total number for processors in system.
  * @param[out] _sockets total number for sockets in system
  * @param[out] _cores total number for physical CPU cores in system
  * @param[out] _proc_type_table summary table of number of processors per type
@@ -71,7 +68,7 @@ void parse_processor_info_linux(const int _processors,
  */
 void parse_processor_info_win(const char* base_ptr,
                               const unsigned long len,
-                              const int _processors,
+                              int& _processors,
                               int& _sockets,
                               int& _cores,
                               std::vector<std::vector<int>>& _proc_type_table,
diff --git a/src/inference/tests/unit/cpu_map_parser.cpp b/src/inference/tests/unit/cpu_map_parser.cpp
index 20f8ace1862eb7..fc2d08b589c948 100644
--- a/src/inference/tests/unit/cpu_map_parser.cpp
+++ b/src/inference/tests/unit/cpu_map_parser.cpp
@@ -623,6 +623,7 @@ class WinCpuMapParserTests : public CommonTestUtils::TestsCommon,
 
         Hex2Bin(test_ptr, test_len, test_info_ptr);
 
+        int test_processors = 0;
         int test_sockets = 0;
         int test_cores = 0;
         unsigned long len = unsigned long(test_len / 2);
@@ -631,12 +632,13 @@ class WinCpuMapParserTests : public CommonTestUtils::TestsCommon,
 
         ov::parse_processor_info_win(test_info_ptr,
                                      len,
-                                     test_data._processors,
+                                     test_processors,
                                      test_sockets,
                                      test_cores,
                                      test_proc_type_table,
                                      test_cpu_mapping_table);
 
+        ASSERT_EQ(test_data._processors, test_processors);
         ASSERT_EQ(test_data._sockets, test_sockets);
         ASSERT_EQ(test_data._cores, test_cores);
         ASSERT_EQ(test_data._proc_type_table, test_proc_type_table);
@@ -1544,6 +1546,197 @@ WinCpuMapTestCase _2sockets_48cores_hyperthreading = {
      "fffff000030300000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffff0000"},
 };
 
+WinCpuMapTestCase _2sockets_36cores_hyperthreading = {
+    72,
+    2,
+    36,
+    {{72, 36, 0, 36}, {36, 18, 0, 18}, {36, 18, 0, 18}},
+    {
+        {0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 0, 0, MAIN_CORE_PROC, 0, -1},
+        {2, 0, 1, HYPER_THREADING_PROC, 1, -1},    {3, 0, 1, MAIN_CORE_PROC, 1, -1},
+        {4, 0, 2, HYPER_THREADING_PROC, 2, -1},    {5, 0, 2, MAIN_CORE_PROC, 2, -1},
+        {6, 0, 3, HYPER_THREADING_PROC, 3, -1},    {7, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {8, 0, 4, HYPER_THREADING_PROC, 4, -1},    {9, 0, 4, MAIN_CORE_PROC, 4, -1},
+        {10, 0, 5, HYPER_THREADING_PROC, 5, -1},   {11, 0, 5, MAIN_CORE_PROC, 5, -1},
+        {12, 0, 6, HYPER_THREADING_PROC, 6, -1},   {13, 0, 6, MAIN_CORE_PROC, 6, -1},
+        {14, 0, 7, HYPER_THREADING_PROC, 7, -1},   {15, 0, 7, MAIN_CORE_PROC, 7, -1},
+        {16, 0, 8, HYPER_THREADING_PROC, 8, -1},   {17, 0, 8, MAIN_CORE_PROC, 8, -1},
+        {18, 0, 9, HYPER_THREADING_PROC, 9, -1},   {19, 0, 9, MAIN_CORE_PROC, 9, -1},
+        {20, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 0, 10, MAIN_CORE_PROC, 10, -1},
+        {22, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 0, 11, MAIN_CORE_PROC, 11, -1},
+        {24, 0, 12, HYPER_THREADING_PROC, 12, -1}, {25, 0, 12, MAIN_CORE_PROC, 12, -1},
+        {26, 0, 13, HYPER_THREADING_PROC, 13, -1}, {27, 0, 13, MAIN_CORE_PROC, 13, -1},
+        {28, 0, 14, HYPER_THREADING_PROC, 14, -1}, {29, 0, 14, MAIN_CORE_PROC, 14, -1},
+        {30, 0, 15, HYPER_THREADING_PROC, 15, -1}, {31, 0, 15, MAIN_CORE_PROC, 15, -1},
+        {32, 0, 16, HYPER_THREADING_PROC, 16, -1}, {33, 0, 16, MAIN_CORE_PROC, 16, -1},
+        {34, 0, 17, HYPER_THREADING_PROC, 17, -1}, {35, 0, 17, MAIN_CORE_PROC, 17, -1},
+        {36, 1, 18, HYPER_THREADING_PROC, 18, -1}, {37, 1, 18, MAIN_CORE_PROC, 18, -1},
+        {38, 1, 19, HYPER_THREADING_PROC, 19, -1}, {39, 1, 19, MAIN_CORE_PROC, 19, -1},
+        {40, 1, 20, HYPER_THREADING_PROC, 20, -1}, {41, 1, 20, MAIN_CORE_PROC, 20, -1},
+        {42, 1, 21, HYPER_THREADING_PROC, 21, -1}, {43, 1, 21, MAIN_CORE_PROC, 21, -1},
+        {44, 1, 22, HYPER_THREADING_PROC, 22, -1}, {45, 1, 22, MAIN_CORE_PROC, 22, -1},
+        {46, 1, 23, HYPER_THREADING_PROC, 23, -1}, {47, 1, 23, MAIN_CORE_PROC, 23, -1},
+        {48, 1, 24, HYPER_THREADING_PROC, 24, -1}, {49, 1, 24, MAIN_CORE_PROC, 24, -1},
+        {50, 1, 25, HYPER_THREADING_PROC, 25, -1}, {51, 1, 25, MAIN_CORE_PROC, 25, -1},
+        {52, 1, 26, HYPER_THREADING_PROC, 26, -1}, {53, 1, 26, MAIN_CORE_PROC, 26, -1},
+        {54, 1, 27, HYPER_THREADING_PROC, 27, -1}, {55, 1, 27, MAIN_CORE_PROC, 27, -1},
+        {56, 1, 28, HYPER_THREADING_PROC, 28, -1}, {57, 1, 28, MAIN_CORE_PROC, 28, -1},
+        {58, 1, 29, HYPER_THREADING_PROC, 29, -1}, {59, 1, 29, MAIN_CORE_PROC, 29, -1},
+        {60, 1, 30, HYPER_THREADING_PROC, 30, -1}, {61, 1, 30, MAIN_CORE_PROC, 30, -1},
+        {62, 1, 31, HYPER_THREADING_PROC, 31, -1}, {63, 1, 31, MAIN_CORE_PROC, 31, -1},
+        {64, 1, 32, HYPER_THREADING_PROC, 32, -1}, {65, 1, 32, MAIN_CORE_PROC, 32, -1},
+        {66, 1, 33, HYPER_THREADING_PROC, 33, -1}, {67, 1, 33, MAIN_CORE_PROC, 33, -1},
+        {68, 1, 34, HYPER_THREADING_PROC, 34, -1}, {69, 1, 34, MAIN_CORE_PROC, 34, -1},
+        {70, 1, 35, HYPER_THREADING_PROC, 35, -1}, {71, 1, 35, MAIN_CORE_PROC, 35, -1},
+    },
+    {"0300000030000000000000000000000000000000000000000000000000000100ffffffff0f000000000000000000000000000000300000000"
+     "10000000000000000000000000000000000000000000100030000000000000000000000000000000200000038000000010c400000c0000002"
+     "00000000000000000000000000000000000000000000000300000000000000000000000000000002000000380000000108400000800000010"
+     "00000000000000000000000000000000000000000000003000000000000000000000000000000020000003800000002144000000014000000"
+     "00000000000000000000000000000000000000000000030000000000000000000000000000000200000038000000030c40000000700200000"
+     "0000000000000000000000000000000000000000000ffffffff0f000000000000000000000000000000300000000100000000000000000000"
+     "000000000000000000000001000c0000000000000000000000000000000200000038000000010c400000c0000002000000000000000000000"
+     "00000000000000000000000000c00000000000000000000000000000002000000380000000108400000800000010000000000000000000000"
+     "0000000000000000000000000c000000000000000000000000000000020000003800000002144000000014000000000000000000000000000"
+     "000000000000000000000000c0000000000000000000000000000000000000030000000010000000000000000000000000000000000000000"
+     "000100300000000000000000000000000000000200000038000000010c400000c000000200000000000000000000000000000000000000000"
+     "00000300000000000000000000000000000000200000038000000010840000080000001000000000000000000000000000000000000000000"
+     "00003000000000000000000000000000000002000000380000000214400000001400000000000000000000000000000000000000000000000"
+     "000300000000000000000000000000000000000000030000000010000000000000000000000000000000000000000000100c0000000000000"
+     "0000000000000000000200000038000000010c400000c00000020000000000000000000000000000000000000000000000c00000000000000"
+     "0000000000000000002000000380000000108400000800000010000000000000000000000000000000000000000000000c000000000000000"
+     "000000000000000002000000380000000214400000001400000000000000000000000000000000000000000000000000c0000000000000000"
+     "00000000000000000000000300000000100000000000000000000000000000000000000000001000003000000000000000000000000000002"
+     "00000038000000010c400000c0000002000000000000000000000000000000000000000000000000030000000000000000000000000000020"
+     "00000380000000108400000800000010000000000000000000000000000000000000000000000000300000000000000000000000000000200"
+     "00003800000002144000000014000000000000000000000000000000000000000000000000000003000000000000000000000000000000000"
+     "00030000000010000000000000000000000000000000000000000000100000c00000000000000000000000000000200000038000000010c40"
+     "0000c00000020000000000000000000000000000000000000000000000000c000000000000000000000000000002000000380000000108400"
+     "000800000010000000000000000000000000000000000000000000000000c0000000000000000000000000000020000003800000002144000"
+     "00001400000000000000000000000000000000000000000000000000000c00000000000000000000000000000000000030000000010000000"
+     "000000000000000000000000000000000000100003000000000000000000000000000000200000038000000010c400000c000000200000000"
+     "00000000000000000000000000000000000000003000000000000000000000000000000200000038000000010840000080000001000000000"
+     "00000000000000000000000000000000000000030000000000000000000000000000002000000380000000214400000001400000000000000"
+     "00000000000000000000000000000000000000300000000000000000000000000000000000003000000001000000000000000000000000000"
+     "000000000000000010000c000000000000000000000000000000200000038000000010c400000c00000020000000000000000000000000000"
+     "00000000000000000000c00000000000000000000000000000020000003800000001084000008000000100000000000000000000000000000"
+     "0000000000000000000c000000000000000000000000000000200000038000000021440000000140000000000000000000000000000000000"
+     "000000000000000000c0000000000000000000000000000000000000300000000100000000000000000000000000000000000000000001000"
+     "00003000000000000000000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000000000"
+     "00030000000000000000000000000002000000380000000108400000800000010000000000000000000000000000000000000000000000000"
+     "00300000000000000000000000000020000003800000002144000000014000000000000000000000000000000000000000000000000000000"
+     "0300000000000000000000000000000000003000000001000000000000000000000000000000000000000000010000000c000000000000000"
+     "000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000000000000c0000000000000000"
+     "00000000000200000038000000010840000080000001000000000000000000000000000000000000000000000000000c00000000000000000"
+     "0000000000200000038000000021440000000140000000000000000000000000000000000000000000000000000000c000000000000000000"
+     "00000000000000003000000001000000000000000000000000000000000000000000010000003000000000000000000000000000020000003"
+     "8000000010c400000c00000020000000000000000000000000000000000000000000000000030000000000000000000000000000200000038"
+     "00000001084000008000000100000000000000000000000000000000000000000000000000300000000000000000000000000002000000380"
+     "00000021440000000140000000000000000000000000000000000000000000000000000003000000000000000000000000000000000003000"
+     "00000100000000000000000000000000000000000000000001000000c0000000000000000000000000000200000038000000010c400000c00"
+     "0000200000000000000000000000000000000000000000000000000c000000000000000000000000000020000003800000001084000008000"
+     "000100000000000000000000000000000000000000000000000000c0000000000000000000000000000200000038000000021440000000140"
+     "00000000000000000000000000000000000000000000000000000c00000000000000000000000000000000000300000000100000000000000"
+     "00000000000000000000000000000100000000030000000000000000000000000200000038000000010c400000c0000002000000000000000"
+     "00000000000000000000000000000000000000300000000000000000000000002000000380000000108400000800000010000000000000000"
+     "00000000000000000000000000000000000003000000000000000000000000020000003800000002144000000014000000000000000000000"
+     "00000000000000000000000000000000000030000000000000000000000000000000030000000010000000000000000000000000000000000"
+     "0000000001000000000c0000000000000000000000000200000038000000010c400000c000000200000000000000000000000000000000000"
+     "000000000000000000c0000000000000000000000000200000038000000010840000080000001000000000000000000000000000000000000"
+     "00000000000000000c00000000000000000000000002000000380000000214400000001400000000000000000000000000000000000000000"
+     "0000000000000000c000000000000000000000000000000003000000001000000000000000000000000000000000000000000010000000030"
+     "0000000000000000000000000200000038000000010c400000c00000020000000000000000000000000000000000000000000000000000300"
+     "00000000000000000000000020000003800000001084000008000000100000000000000000000000000000000000000000000000000003000"
+     "00000000000000000000000200000038000000021440000000140000000000000000000000000000000000000000000000000000000030000"
+     "0000000000000000000000000000030000000010000000000000000000000000000000000000000000100000000c000000000000000000000"
+     "00000200000038000000010c400000c00000020000000000000000000000000000000000000000000000000000c0000000000000000000000"
+     "00002000000380000000108400000800000010000000000000000000000000000000000000000000000000000c00000000000000000000000"
+     "0002000000380000000214400000001400000000000000000000000000000000000000000000000000000000c000000000000000000000000"
+     "00000000030000000010000000000000000000000000000000000000000000100000000000300000000000000000000000200000038000000"
+     "010c400000c000000200000000000000000000000000000000000000000000000000000003000000000000000000000002000000380000000"
+     "10840000080000001000000000000000000000000000000000000000000000000000000030000000000000000000000020000003800000002"
+     "14400000001400000000000000000000000000000000000000000000000000000000000300000000000000000000000000000030000000010"
+     "000000000000000000000000000000000000000000100000000000c00000000000000000000000200000038000000010c400000c000000200"
+     "00000000000000000000000000000000000000000000000000000c00000000000000000000000200000038000000010840000080000001000"
+     "0000000000000000000000000000000000000000000000000000c000000000000000000000002000000380000000214400000001400000000"
+     "000000000000000000000000000000000000000000000000000c0000000000000000000000030000003000000000000000000000000000000"
+     "0000000000000000000000100ffffffff0f000000010000000000000000000000300000000100000000000000000000000000000000000000"
+     "00000100030000000000000001000000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000"
+     "00000000300000000000000010000000000000002000000380000000108400000800000010000000000000000000000000000000000000000"
+     "00000003000000000000000100000000000000020000003800000002144000000014000000000000000000000000000000000000000000000"
+     "00000030000000000000001000000000000000200000038000000030c40000000700200000000000000000000000000000000000000000000"
+     "0000ffffffff0f000000010000000000000000000000300000000100000000000000000000000000000000000000000001000c00000000000"
+     "00001000000000000000200000038000000010c400000c000000200000000000000000000000000000000000000000000000c000000000000"
+     "000100000000000000020000003800000001084000008000000100000000000000000000000000000000000000000000000c0000000000000"
+     "00100000000000000020000003800000002144000000014000000000000000000000000000000000000000000000000000c00000000000000"
+     "01000000000000000000000030000000010000000000000000000000000000000000000000000100300000000000000001000000000000000"
+     "200000038000000010c400000c000000200000000000000000000000000000000000000000000003000000000000000010000000000000002"
+     "00000038000000010840000080000001000000000000000000000000000000000000000000000030000000000000000100000000000000020"
+     "00000380000000214400000001400000000000000000000000000000000000000000000000000300000000000000001000000000000000000"
+     "000030000000010000000000000000000000000000000000000000000100c00000000000000001000000000000000200000038000000010c4"
+     "00000c00000020000000000000000000000000000000000000000000000c00000000000000001000000000000000200000038000000010840"
+     "0000800000010000000000000000000000000000000000000000000000c000000000000000010000000000000002000000380000000214400"
+     "000001400000000000000000000000000000000000000000000000000c0000000000000000100000000000000000000003000000001000000"
+     "0000000000000000000000000000000000000100000300000000000001000000000000000200000038000000010c400000c00000020000000"
+     "00000000000000000000000000000000000000000030000000000000100000000000000020000003800000001084000008000000100000000"
+     "00000000000000000000000000000000000000000300000000000001000000000000000200000038000000021440000000140000000000000"
+     "00000000000000000000000000000000000000003000000000000010000000000000000000000300000000100000000000000000000000000"
+     "00000000000000000100000c00000000000001000000000000000200000038000000010c400000c0000002000000000000000000000000000"
+     "0000000000000000000000c000000000000010000000000000002000000380000000108400000800000010000000000000000000000000000"
+     "000000000000000000000c0000000000000100000000000000020000003800000002144000000014000000000000000000000000000000000"
+     "00000000000000000000c00000000000001000000000000000000000030000000010000000000000000000000000000000000000000000100"
+     "003000000000000001000000000000000200000038000000010c400000c000000200000000000000000000000000000000000000000000000"
+     "03000000000000001000000000000000200000038000000010840000080000001000000000000000000000000000000000000000000000000"
+     "30000000000000010000000000000002000000380000000214400000001400000000000000000000000000000000000000000000000000003"
+     "00000000000000100000000000000000000003000000001000000000000000000000000000000000000000000010000c00000000000000100"
+     "0000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000000000c000000000000001000"
+     "000000000000200000038000000010840000080000001000000000000000000000000000000000000000000000000c0000000000000010000"
+     "00000000000200000038000000021440000000140000000000000000000000000000000000000000000000000000c00000000000000100000"
+     "00000000000000000300000000100000000000000000000000000000000000000000001000000030000000000010000000000000002000000"
+     "38000000010c400000c0000002000000000000000000000000000000000000000000000000000300000000000100000000000000020000003"
+     "80000000108400000800000010000000000000000000000000000000000000000000000000003000000000001000000000000000200000038"
+     "00000002144000000014000000000000000000000000000000000000000000000000000000030000000000010000000000000000000000300"
+     "0000001000000000000000000000000000000000000000000010000000c000000000001000000000000000200000038000000010c400000c0"
+     "000002000000000000000000000000000000000000000000000000000c0000000000010000000000000002000000380000000108400000800"
+     "00001000000000000000000000000000000000000000000000000000c00000000000100000000000000020000003800000002144000000014"
+     "0000000000000000000000000000000000000000000000000000000c000000000001000000000000000000000030000000010000000000000"
+     "000000000000000000000000000000100000030000000000001000000000000000200000038000000010c400000c000000200000000000000"
+     "00000000000000000000000000000000000030000000000001000000000000000200000038000000010840000080000001000000000000000"
+     "00000000000000000000000000000000000300000000000010000000000000002000000380000000214400000001400000000000000000000"
+     "00000000000000000000000000000000003000000000000100000000000000000000003000000001000000000000000000000000000000000"
+     "00000000001000000c0000000000001000000000000000200000038000000010c400000c00000020000000000000000000000000000000000"
+     "0000000000000000c000000000000100000000000000020000003800000001084000008000000100000000000000000000000000000000000"
+     "000000000000000c0000000000001000000000000000200000038000000021440000000140000000000000000000000000000000000000000"
+     "00000000000000c00000000000010000000000000000000000300000000100000000000000000000000000000000000000000001000000000"
+     "30000000001000000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000000000000003"
+     "00000000010000000000000002000000380000000108400000800000010000000000000000000000000000000000000000000000000000030"
+     "00000000100000000000000020000003800000002144000000014000000000000000000000000000000000000000000000000000000000300"
+     "000000010000000000000000000000300000000100000000000000000000000000000000000000000001000000000c0000000001000000000"
+     "000000200000038000000010c400000c000000200000000000000000000000000000000000000000000000000000c00000000010000000000"
+     "0000020000003800000001084000008000000100000000000000000000000000000000000000000000000000000c000000000100000000000"
+     "000020000003800000002144000000014000000000000000000000000000000000000000000000000000000000c0000000001000000000000"
+     "00000000003000000001000000000000000000000000000000000000000000010000000030000000000100000000000000020000003800000"
+     "0010c400000c00000020000000000000000000000000000000000000000000000000000300000000001000000000000000200000038000000"
+     "01084000008000000100000000000000000000000000000000000000000000000000003000000000010000000000000002000000380000000"
+     "21440000000140000000000000000000000000000000000000000000000000000000030000000000100000000000000000000003000000001"
+     "0000000000000000000000000000000000000000000100000000c00000000001000000000000000200000038000000010c400000c00000020"
+     "000000000000000000000000000000000000000000000000000c0000000000100000000000000020000003800000001084000008000000100"
+     "00000000000000000000000000000000000000000000000000c00000000001000000000000000200000038000000021440000000140000000"
+     "0000000000000000000000000000000000000000000000000c000000000010000000000000000000000300000000100000000000000000000"
+     "00000000000000000000000100000000000300000001000000000000000200000038000000010c400000c0000002000000000000000000000"
+     "00000000000000000000000000000000003000000010000000000000002000000380000000108400000800000010000000000000000000000"
+     "00000000000000000000000000000000030000000100000000000000020000003800000002144000000014000000000000000000000000000"
+     "00000000000000000000000000000000300000001000000000000000000000030000000010000000000000000000000000000000000000000"
+     "000100000000000c00000001000000000000000200000038000000010c400000c000000200000000000000000000000000000000000000000"
+     "00000000000000c00000001000000000000000200000038000000010840000080000001000000000000000000000000000000000000000000"
+     "0000000000000c000000010000000000000002000000380000000214400000001400000000000000000000000000000000000000000000000"
+     "000000000000c00000001000000000000000100000030000000000000000000000000000000000000000000000000000000ffffffff0f0000"
+     "0000000000000000000100000030000000010000000000000000000000000000000000000000000000ffffffff0f000000010000000000000"
+     "00400000080000000020002000000000000000000000000000000000000000000402400000000000000000000000000000000000000000000"
+     "00000000000000000000000000000000ffffffff0f00000040240000000000000000000000000000000000000000000000000000000000000"
+     "000000000000000ffffffff0f000000"},
+};
+
 WinCpuMapTestCase _2sockets_48cores = {
     48,
     2,
@@ -2361,6 +2554,7 @@ INSTANTIATE_TEST_SUITE_P(CPUMap,
                          WinCpuMapParserTests,
                          testing::Values(_2sockets_104cores_hyperthreading,
                                          _2sockets_48cores_hyperthreading,
+                                         _2sockets_36cores_hyperthreading,
                                          _2sockets_48cores,
                                          _1sockets_24cores_hyperthreading_set1,
                                          _1sockets_24cores_hyperthreading_set2,

From b2a64e8c3a2ad326f97a2dc594a0bc5e44303fb8 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 6 Apr 2023 11:14:36 +0400
Subject: [PATCH 262/296] [GPU] Enable dynamic shapes support for gather
 elements (#16727)

---
 .../src/plugin/ops/gather_elements.cpp        |  22 ++--
 .../dynamic/gather_elements.cpp               | 111 ++++++++++++++++++
 2 files changed, 124 insertions(+), 9 deletions(-)
 create mode 100644 src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather_elements.cpp

diff --git a/src/plugins/intel_gpu/src/plugin/ops/gather_elements.cpp b/src/plugins/intel_gpu/src/plugin/ops/gather_elements.cpp
index 9834d03202722a..ad0501b537384a 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/gather_elements.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/gather_elements.cpp
@@ -18,21 +18,25 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op:
     auto inputs = p.GetInputInfo(op);
     std::string layerName = layer_type_name_ID(op);
 
-    auto outLayout = cldnn::format::get_default_format(op->get_output_shape(0).size());
-
-    size_t rank = op->get_input_shape(0).size();
+    size_t rank = op->get_input_partial_shape(0).size();
     int64_t axis = op->get_axis();
     if (axis < 0)
         axis += rank;
     OPENVINO_ASSERT(axis >= 0 && axis < static_cast<int64_t>(rank),
                     "GatherElements axis is not correspond to number of dimensions");
 
-    auto primitive = cldnn::gather_elements(layerName,
-                                            inputs[0],
-                                            inputs[1],
-                                            outLayout,
-                                            tensor_from_dims(op->get_output_shape(0)),
-                                            axis);
+    std::shared_ptr<cldnn::gather_elements> primitive = nullptr;
+    if (op->get_output_partial_shape(0).is_dynamic() || p.use_new_shape_infer()) {
+        primitive = std::make_shared<cldnn::gather_elements>(layerName, inputs[0], inputs[1], axis);
+    } else {
+        auto outLayout = cldnn::format::get_default_format(op->get_output_shape(0).size());
+        primitive = std::make_shared<cldnn::gather_elements>(layerName,
+                                                             inputs[0],
+                                                             inputs[1],
+                                                             outLayout,
+                                                             tensor_from_dims(op->get_output_shape(0)),
+                                                             axis);
+    }
 
     p.add_primitive(*op, primitive);
 }
diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather_elements.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather_elements.cpp
new file mode 100644
index 00000000000000..23cef5216d35bb
--- /dev/null
+++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather_elements.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ie_precision.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+#include <string>
+
+using namespace ov::test;
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace ngraph::helpers;
+
+namespace GPULayerTestsDefinitions  {
+
+using GatherElementsParams = std::tuple<
+        std::vector<InputShape>,           // Dynamic shape + Target static shapes
+        int,                               // Axis
+        ElementType,                       // Data precision
+        ElementType,                       // Indices precision
+        TargetDevice                       // Device name
+>;
+
+class GatherElementsGPUTest : public testing::WithParamInterface<GatherElementsParams>,
+                              virtual public ov::test::SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<GatherElementsParams>& obj) {
+        std::vector<InputShape> shapes;
+        ElementType dPrecision, iPrecision;
+        int axis;
+        std::string device;
+        std::tie(shapes, axis, dPrecision, iPrecision, device) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=(";
+        for (const auto& shape : shapes) {
+            result << CommonTestUtils::partialShape2str({shape.first}) << "_";
+        }
+        result << ")_TS=(";
+        for (const auto& shape : shapes) {
+            for (const auto& item : shape.second) {
+                result << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        result << "Ax=" << axis << "_";
+        result << "DP=" << dPrecision << "_";
+        result << "IP=" << iPrecision << "_";
+        result << "device=" << device;
+
+        return result.str();
+    }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 15, 0, 32768);
+
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
+protected:
+    void SetUp() override {
+        std::vector<InputShape> shapes;
+        ElementType dPrecision, iPrecision;
+        int axis;
+        std::tie(shapes, axis, dPrecision, iPrecision, targetDevice) = this->GetParam();
+        init_input_shapes(shapes);
+
+        ngraph::ParameterVector params = {
+            std::make_shared<ngraph::opset1::Parameter>(dPrecision, inputDynamicShapes[0]),
+            std::make_shared<ngraph::opset1::Parameter>(iPrecision, inputDynamicShapes[1]),
+        };
+
+        auto gather = std::make_shared<ngraph::op::v6::GatherElements>(params[0], params[1], axis);
+
+        ngraph::ResultVector results{std::make_shared<ngraph::opset4::Result>(gather)};
+        function = std::make_shared<ngraph::Function>(results, params, "GatherElements");
+    }
+};
+
+TEST_P(GatherElementsGPUTest, CompareWithRefs) {
+    run();
+}
+
+namespace {
+
+const std::vector<std::vector<InputShape>> inDynamicShapeParams = {
+    {{{-1, -1, -1, -1}, {{2, 3, 5, 7}, {3, 4, 6, 8}}},
+     {{-1, -1, -1, -1}, {{2, 3, 9, 7}, {3, 4, 4, 8}}}},
+    {{{{1, 10}, {1, 10}, {1, 10}, {1, 10}}, {{3, 4, 6, 8}, {2, 3, 5, 7}}},
+     {{{1, 10}, {1, 10}, {1, 10}, {1, 10}}, {{3, 4, 4, 8}, {2, 3, 9, 7}}}}
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_set1, GatherElementsGPUTest,
+                ::testing::Combine(
+                    ::testing::ValuesIn(inDynamicShapeParams),                // shape
+                    ::testing::ValuesIn(std::vector<int>({2, -2})),           // Axis
+                    ::testing::ValuesIn(std::vector<ElementType>({ElementType::f16, ElementType::f32})),
+                    ::testing::Values(ElementType::i32),
+                    ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        GatherElementsGPUTest::getTestCaseName);
+
+} // namespace
+} // namespace GPULayerTestsDefinitions

From 53d9b26e1fd438ed03e1333e1726ea3ecafe582e Mon Sep 17 00:00:00 2001
From: Wang Wangwang <wangwang.wang@intel.com>
Date: Thu, 6 Apr 2023 15:41:32 +0800
Subject: [PATCH 263/296] [AUTO] Show the detailed failure message when AUTO
 load network failed (#16297)

* Show the detailed failure message when AUTO load network failed

* Add test case

* Update test case to check multi load network failed

* Update test case based master

* RM _availableDevices hard code from AUTO

---------

Co-authored-by: Chen Peter <peter.chen@intel.com>
---
 src/plugins/auto/auto_schedule.cpp            |  9 ++-
 src/plugins/auto/plugin_config.cpp            |  1 -
 src/plugins/auto/utils/plugin_config.hpp      |  1 -
 .../auto_load_network_properties_test.cpp     | 62 +++++++++++++++++++
 4 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/auto_schedule.cpp
index 8e5eaf8c648b7a..cc71bdba17e0bd 100644
--- a/src/plugins/auto/auto_schedule.cpp
+++ b/src/plugins/auto/auto_schedule.cpp
@@ -637,9 +637,13 @@ SoExecNetwork AutoSchedule::WaitFirstNetworkReady() {
             }
         }
     }
+    std::ostringstream result;
     //print errMessage
+    result << "Load network failed, ";
     for (int i = CONTEXTNUM - 2; i >= 0; i--) {
         if (_loadContext[i].isEnabled) {
+            result << _loadContext[i].errMessage.c_str();
+            result << "; ";
             LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
         }
     }
@@ -654,6 +658,9 @@ SoExecNetwork AutoSchedule::WaitFirstNetworkReady() {
                     execNetwork = _pCTPUTLoadContext[i].executableNetwork;
                 }
                 nLoadSucNums++;
+            } else {
+                result << _pCTPUTLoadContext[i].errMessage.c_str();
+                result << "; ";
             }
         }
         // one or more devices loaded successfully
@@ -661,7 +668,7 @@ SoExecNetwork AutoSchedule::WaitFirstNetworkReady() {
             return execNetwork;
         }
     }
-    IE_THROW() << GetLogTag() << "load all devices failed";
+    IE_THROW() << "[" << GetLogTag() << "] " << result.str();
 }
 
 void AutoSchedule::WaitActualNetworkReady() const {
diff --git a/src/plugins/auto/plugin_config.cpp b/src/plugins/auto/plugin_config.cpp
index d35a8917fbbeca..d9a819aee49e14 100644
--- a/src/plugins/auto/plugin_config.cpp
+++ b/src/plugins/auto/plugin_config.cpp
@@ -4,7 +4,6 @@
 #include "utils/plugin_config.hpp"
 
 namespace MultiDevicePlugin {
-const std::set<std::string> PluginConfig::_availableDevices = {"AUTO", "CPU", "GPU", "TEMPLATE", "NVIDIA", "VPUX", "MULTI", "HETERO", "mock"};
 // AUTO will enable the blocklist if
 // 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);)
 // 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);).
diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/utils/plugin_config.hpp
index 243e1800199c19..30ab08957969ea 100644
--- a/src/plugins/auto/utils/plugin_config.hpp
+++ b/src/plugins/auto/utils/plugin_config.hpp
@@ -234,7 +234,6 @@ class PluginConfig {
     ov::AnyMap property_mutabilities; // mutability for supported configs/metrics installation
     std::map<std::string, BaseValidator::Ptr> property_validators;
     BaseValidator::Ptr device_property_validator;
-    static const std::set<std::string> _availableDevices;
     static const std::set<std::string> _deviceBlocklist;
 };
 } // namespace MultiDevicePlugin
diff --git a/src/tests/unit/auto/auto_load_network_properties_test.cpp b/src/tests/unit/auto/auto_load_network_properties_test.cpp
index 776a160cd422c9..d8986e8184ff25 100644
--- a/src/tests/unit/auto/auto_load_network_properties_test.cpp
+++ b/src/tests/unit/auto/auto_load_network_properties_test.cpp
@@ -29,8 +29,17 @@ using ::testing::Return;
 using ::testing::ReturnRef;
 using ::testing::StrEq;
 using ::testing::Throw;
+using ::testing::HasSubstr;
 using Config = std::map<std::string, std::string>;
 
+#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
+        try { \
+            stmt; \
+        } catch (const etype& ex) { \
+            EXPECT_THAT(std::string(ex.what()), HasSubstr(whatstring)); \
+            throw; \
+        } \
+    , etype)
 // define a matcher if all the elements of subMap are contained in the map.
 MATCHER_P(MapContains, subMap, "Check if all the elements of the subMap are contained in the map.") {
     if (subMap.empty())
@@ -266,7 +275,60 @@ TEST_P(LoadNetworkWithSecondaryConfigsMockTest, LoadNetworkWithSecondaryConfigsT
     ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
 }
 
+using AutoLoadExeNetworkFailedTest = LoadNetworkWithSecondaryConfigsMockTest;
+TEST_P(AutoLoadExeNetworkFailedTest, checkLoadFailMassage) {
+    std::string device;
+    std::vector<std::string> targetDevices;
+    Config config;
+    std::tie(device, targetDevices, config) = this->GetParam();
+    if (device.find("AUTO") != std::string::npos)
+        plugin->SetName("AUTO");
+    if (device.find("MULTI") != std::string::npos)
+        plugin->SetName("MULTI");
+
+    ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_GPU)),
+                ::testing::Matcher<const Config&>(_)))
+                .WillByDefault(Throw(InferenceEngine::GeneralError{"Mock GPU Load Failed"}));
+    ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_CPU)),
+                ::testing::Matcher<const Config&>(_)))
+                .WillByDefault(Throw(InferenceEngine::GeneralError{"Mock CPU Load Failed"}));
+    if (device == "AUTO") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[AUTO] Load network failed, GPU:Mock GPU Load Failed; CPU:Mock CPU Load Failed");
+    } else if (device == "AUTO:CPU") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[AUTO] Load network failed, CPU:Mock CPU Load Failed");
+    } else if (device == "AUTO:GPU") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[AUTO] Load network failed, GPU:Mock GPU Load Failed");
+    } else if (device == "MULTI") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[MULTI] Load network failed, GPU:Mock GPU Load Failed; CPU:Mock CPU Load Failed");
+    } else if (device == "MULTI:CPU") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[MULTI] Load network failed, CPU:Mock CPU Load Failed");
+    } else if (device == "MULTI:GPU") {
+        EXPECT_THROW_WITH_MESSAGE(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config), InferenceEngine::Exception,
+                                "[MULTI] Load network failed, GPU:Mock GPU Load Failed");
+    }
+}
+
 INSTANTIATE_TEST_SUITE_P(smoke_AutoMock_LoadNetworkWithSecondaryConfigs,
                          LoadNetworkWithSecondaryConfigsMockTest,
                          ::testing::ValuesIn(LoadNetworkWithSecondaryConfigsMockTest::CreateConfigs()),
                          LoadNetworkWithSecondaryConfigsMockTest::getTestCaseName);
+
+const std::vector<ConfigParams> testConfigsAutoLoadFailed = {
+    ConfigParams{"AUTO", {"CPU", "GPU"}, {{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}}},
+    ConfigParams{"AUTO:CPU", {"CPU"}, {{"MULTI_DEVICE_PRIORITIES", "CPU"}}},
+    ConfigParams{"AUTO:GPU", {"GPU"}, {{"MULTI_DEVICE_PRIORITIES", "GPU"}}},
+    ConfigParams{"MULTI", {"CPU", "GPU"}, {{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}}},
+    ConfigParams{"MULTI:CPU", {"CPU"}, {{"MULTI_DEVICE_PRIORITIES", "CPU"}}},
+    ConfigParams{"MULTI:GPU", {"GPU"}, {{"MULTI_DEVICE_PRIORITIES", "GPU"}}}
+    };
+
+INSTANTIATE_TEST_SUITE_P(smoke_AutoLoadExeNetworkFailedTest, AutoLoadExeNetworkFailedTest,
+                ::testing::ValuesIn(testConfigsAutoLoadFailed),
+            AutoLoadExeNetworkFailedTest::getTestCaseName);
\ No newline at end of file

From bc0c8374da3eac1a7d2a55a2e02dec2dbc40e52e Mon Sep 17 00:00:00 2001
From: Marcin Kusmierski <marcin.kusmierski@intel.com>
Date: Thu, 6 Apr 2023 10:34:38 +0200
Subject: [PATCH 264/296] [GNA] Fix issues with GNA 3.5 - Increasing kernel to
 stride for Convolution2D (#16642)

* [GNA] Add increasing kernel in case stride is bigger than kernel.

* [GNA] fix review comments
---
 .../intel_gna/src/backend/gna_limitations.cpp |   7 +
 .../intel_gna/src/gna_graph_compiler.cpp      |  66 +++++--
 .../single_layer_tests/convolution.cpp        | 186 ++++++++++++++----
 .../skip_tests_config.cpp                     |   4 +-
 4 files changed, 201 insertions(+), 62 deletions(-)

diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.cpp b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
index 4636201be17018..82c11e092b906b 100644
--- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
@@ -528,6 +528,13 @@ std::string Validator_35::ValidateCnn(const Validator_35::CnnLimits& limits,
     error += kerneHWlLimit.GetErrorOrEmpty(kernelH, kernelW);
     auto& strideHWLimit = (inPrecision == OvGnaTypeInt8) ? limits.kStrideHWLimit1B : limits.kStrideHWLimit2B;
     error += strideHWLimit.GetErrorOrEmpty(strideH, strideW);
+
+    const RangeLimit kKernelStrideHLimit{1, kernelH, "kernel stride height (must be up to kernel height)"};
+    const RangeLimit kKernelStrideWLimit{1, kernelW, "kernel stride width (must be up to kernel width)"};
+
+    error += kKernelStrideHLimit.GetErrorOrEmpty(strideH);
+    error += kKernelStrideWLimit.GetErrorOrEmpty(strideW);
+
     error += limits.kDilationLimit.GetErrorOrEmpty(dilationH, dilationW);
     return error;
 }
diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
index 63baaa93ec5b7e..651066844e8da7 100644
--- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
@@ -74,6 +74,13 @@ static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents:
         p->name(l);                          \
     }
 
+uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expected_ouput_width,
+                                                            uint32_t kernel_width,
+                                                            uint32_t stride_width,
+                                                            uint32_t padding_width) {
+    return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
+};
+
 GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
 
 void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
@@ -494,7 +501,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
         log::debug() << LAYER_NAME(&convolution) << "Kernel padding is " << num_conv_kernel_padding << "\n";
     }
 
-    // have to pad input to let last kernel meets it's corresponding input
+    // have to pad input to let last kernel meet its corresponding input
     const auto num_inputs = in_width * in_channels;
 
     uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
@@ -689,18 +696,24 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
                                          convolution._kernel_y,
                                          convolution._kernel_x);
 
-    if (convolution._kernel_x > in_width || convolution._kernel_y > in_height) {
-        THROW_GNA_LAYER_EXCEPTION(layer) << "Kernel dimensions XY (" << convolution._kernel_x << ", "
-                                         << convolution._kernel_y << ")"
-                                         << " are bigger than input dimensions WH (" << in_width << "," << in_height
-                                         << ")";
-    }
+    // Check if kernel width needs to be extended to stride width.
+    const auto effective_kernel_width = std::max(convolution._kernel_x, convolution._stride_x);
+
+    // Check if convolution input needs to be extended to accommodate for new stride
+    const auto temp_effective_input_width = count_conv2D_input_width_for_expected_output_width(out_width,
+                                                                                               effective_kernel_width,
+                                                                                               convolution._stride_x,
+                                                                                               convolution._padding_x);
+
+    const auto effective_input_width = std::max(in_width, temp_effective_input_width);
+
     const auto inputs = convolution.insData.front().lock();
     const auto outputs = convolution.outData.front();
 
     // have to pad input to let last kernel meets it's corresponding input
-    const auto num_inputs = in_width * in_height * in_channels;
-    uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
+    const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
+
+    uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs;
 
     const uint32_t filter_n = convolution._out_depth;
 
@@ -727,10 +740,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
 
     ValidateCnn2D(layer->name,
                   in_height,
-                  in_width,
+                  effective_input_width,
                   in_channels,
                   convolution._kernel_y,
-                  convolution._kernel_x,
+                  effective_kernel_width,
                   filter_n,
                   convolution._stride_y,
                   convolution._stride_x,
@@ -744,9 +757,9 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
     dnn->InitConvolutional2DComponent(
         currentComponent,
-        {{in_batch, in_height, in_width, in_channels}, inputPrec, {}},  // NHWC for GNA
+        {{in_batch, in_height, effective_input_width, in_channels}, inputPrec, {}},  // NHWC for GNA
         {{out_batch, out_height, out_width, out_channels}, outputPrec, {}},
-        {{filter_n, convolution._kernel_y, convolution._kernel_x, in_channels}, weightPrec, {}},
+        {{filter_n, convolution._kernel_y, effective_kernel_width, in_channels}, weightPrec, {}},
         {{filter_n}, biasPrec, {}},
         {convolution._stride_y, convolution._stride_x},
         {convolution._padding_y, convolution._padding_x},
@@ -789,19 +802,30 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
 
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
+    const auto convolution_precision = convolution.precision.size();
     const auto kernelHW = convolution._kernel_y * convolution._kernel_x;
+    const auto single_kernel_size = in_channels * kernelHW * convolution_precision;
 
-    std::vector<uint8_t> transposedWeights;
-    const auto singleKernelSize = in_channels * kernelHW * convolution.precision.size();
-    const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize;
+    const auto effective_kernel_h_w = convolution._kernel_y * effective_kernel_width;
+    const auto effective_single_kernel_size = in_channels * effective_kernel_h_w * convolution_precision;
+
+    std::vector<uint8_t> transposed_weights;
+
+    // Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
+    const auto kernel_pad =
+        ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size;
     for (uint32_t k = 0; k < convolution._out_depth; k++) {
-        uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * singleKernelSize;
-        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW);
-        transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
-        transposedWeights.resize(transposedWeights.size() + kernelPad);
+        uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
+        auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
+        transposed_weights.insert(transposed_weights.end(), transposed_part.begin(), transposed_part.end());
+        transposed_weights.resize(transposed_weights.size() + effective_single_kernel_size - single_kernel_size +
+                                  kernel_pad);
     }
 
-    gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights, transposedWeights.data(), transposedWeights.size());
+    gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
+                                                ptr_weights,
+                                                transposed_weights.data(),
+                                                transposed_weights.size());
 
     if (convolution._biases) {
         gnamem->getQueue(REGION_RO)->push_ptr(layer,
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
index a15aa9927ad3f6..21b5940d3dd5a5 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -13,21 +13,115 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
-class gna_convolution_layerTest : public ConvolutionLayerTest {
+// ! [test_convolution:definition]
+typedef std::tuple<InferenceEngine::SizeVector,  // Kernel size
+                   InferenceEngine::SizeVector,  // Strides
+                   std::vector<ptrdiff_t>,       // Pad begin
+                   std::vector<ptrdiff_t>,       // Pad end
+                   InferenceEngine::SizeVector,  // Dilation
+                   size_t,                       // Num out channels
+                   ngraph::op::PadType           // Padding type
+                   >
+    convSpecificParams;
+
+typedef std::tuple<convSpecificParams,
+                   InferenceEngine::Precision,         // Net precision
+                   InferenceEngine::Precision,         // Input precision
+                   InferenceEngine::Precision,         // Output precision
+                   InferenceEngine::Layout,            // Input layout
+                   InferenceEngine::Layout,            // Output layout
+                   InferenceEngine::SizeVector,        // Input shapes
+                   LayerTestsUtils::TargetDevice,      // Device name
+                   std::map<std::string, std::string>  // GNA Config params
+                   >
+    ConvLayer2DTestParamsSet;
+
+class Convolution2DLayerTest : public testing::WithParamInterface<ConvLayer2DTestParamsSet>,
+                               virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<ConvLayer2DTestParamsSet>& obj);
+
 protected:
-    void Run() override {
-        ConvolutionLayerTest::Run();
-    }
+    void SetUp() override;
+};
 
-    void SetUp() override {
-        ConvolutionLayerTest::SetUp();
+std::string Convolution2DLayerTest::getTestCaseName(const testing::TestParamInfo<ConvLayer2DTestParamsSet>& obj) {
+    convSpecificParams convParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::map<std::string, std::string> config;
+    std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice, config) =
+        obj.param;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+    result << "S" << CommonTestUtils::vec2str(stride) << "_";
+    result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+    result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+    result << "O=" << convOutChannels << "_";
+    result << "AP=" << padType << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "outPRC=" << outPrc.name() << "_";
+    result << "inL=" << inLayout << "_";
+    result << "outL=" << outLayout << "_";
+    result << "trgDev=" << targetDevice;
+    for (auto const& config_entry : config) {
+        result << "_config_entry=" << config_entry.first << "_" << config_entry.second;
     }
-};
+    return result.str();
+}
 
-TEST_P(gna_convolution_layerTest, CompareWithRefs) {
-    Run();
+void Convolution2DLayerTest::SetUp() {
+    convSpecificParams convParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice, configuration) =
+        this->GetParam();
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts =
+        ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    std::vector<float> filter_weights;
+
+    auto filter_size = std::accumulate(std::begin(kernel), std::end(kernel), 1, std::multiplies<size_t>());
+    filter_weights =
+        CommonTestUtils::generate_float_numbers(convOutChannels * inputShape[1] * filter_size, -0.5f, 0.5f);
+
+    auto conv =
+        std::dynamic_pointer_cast<ngraph::opset1::Convolution>(ngraph::builder::makeConvolution(paramOuts[0],
+                                                                                                ngPrc,
+                                                                                                kernel,
+                                                                                                stride,
+                                                                                                padBegin,
+                                                                                                padEnd,
+                                                                                                dilation,
+                                                                                                padType,
+                                                                                                convOutChannels,
+                                                                                                false,
+                                                                                                filter_weights));
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(conv)};
+    function = std::make_shared<ngraph::Function>(results, params, "convolution");
 }
 
+TEST_P(Convolution2DLayerTest, CompareWithRefs) {
+    Run();
+}
 const std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
                                                                InferenceEngine::Precision::FP16};
 
@@ -95,6 +189,10 @@ const std::vector<size_t> input2DNCHW_5x6 = {1, 16, 5, 6};
 
 const std::vector<std::vector<size_t>> inputShapesMapTo1d = {{1, 1, 56, 5}, {1, 32, 56, 5}, {1, 2, 64, 5}};
 
+std::vector<std::map<std::string, std::string>> configs = {
+    {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, {"GNA_EXEC_TARGET", "GNA_TARGET_3_0"}},
+    {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, {"GNA_EXEC_TARGET", "GNA_TARGET_3_5"}}};
+
 const auto conv2DParams_Kernels2D = ::testing::Combine(::testing::ValuesIn(kernels2D),
                                                        ::testing::ValuesIn(strides2D),
                                                        ::testing::ValuesIn(padBegins2D),
@@ -172,7 +270,7 @@ const auto conv2DParams_AutoPadValid_MapTo1d = ::testing::Combine(::testing::Val
 
 // TODO: padding isn't currently supported in GNA
 INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Convolution2D_ExplicitPadding_Height1,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_ExplicitPadding_Height1,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -180,11 +278,12 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Convolution2D_ExplicitPadding_Height1,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesH1),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Convolution2D_ExplicitPadding_Width1,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_ExplicitPadding_Width1,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -192,11 +291,12 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Convolution2D_ExplicitPadding_Width1,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesW1),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Height1,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_AutoPadValid_Height1,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -204,11 +304,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Height1,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesH1),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Height1_BigStride,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_AutoPadValid_Height1_BigStride,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -216,11 +317,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Height1_BigStride,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesH1),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Width1,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_AutoPadValid_Width1,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -228,11 +330,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_Width1,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesW1),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_MapTo1d,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_AutoPadValid_MapTo1d,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -240,11 +343,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid_MapTo1d,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::ValuesIn(inputShapesMapTo1d),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D,
-                         gna_convolution_layerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_Kernels2D,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -252,11 +356,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(input2DNCHW),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         gna_convolution_layerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D_big,
-                         gna_convolution_layerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_Kernels2D_big,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -264,11 +369,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D_big,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(input2DNCHW),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         gna_convolution_layerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Map2D_Not_Transpose_h_w_3_3,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_Kernels2D_3x3,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -276,11 +382,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Map2D_Not_Transpose_h_w_3_3,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(input2DNCHW_3x3),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Map2D_Not_Transpose_h_w_5_6,
-                         ConvolutionLayerTest,
+                         Convolution2DLayerTest,
                          ::testing::Combine(conv2DParams_Kernels2D_5x6,
                                             ::testing::ValuesIn(netPrecisions),
                                             ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -288,6 +395,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Map2D_Not_Transpose_h_w_5_6,
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(InferenceEngine::Layout::ANY),
                                             ::testing::Values(input2DNCHW_5x6),
-                                            ::testing::Values(CommonTestUtils::DEVICE_GNA)),
-                         ConvolutionLayerTest::getTestCaseName);
+                                            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         Convolution2DLayerTest::getTestCaseName);
 }  // namespace
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
index b900d14e1f06f8..8233e4bd0b8af2 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -34,8 +34,8 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: Issue: 46416
         R"(.*InferRequestVariableStateTest.inferreq_smoke_VariableState_2infers*.*)",
         // TODO: Issue 24839
-        R"(.*ConvolutionLayerTest.CompareWithRefs.*D=\(1.3\).*)",
-        R"(.*ConvolutionLayerTest.CompareWithRefs.*D=\(3.1\).*)",
+        R"(.*Convolution2DLayerTest.CompareWithRefs.*D=\(1.3\).*)",
+        R"(.*Convolution2DLayerTest.CompareWithRefs.*D=\(3.1\).*)",
         R"(.*ConstantResultSubgraphTest.*IS=\(2\.3\.4\.5\).*)",
         R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
         R"(.*importExportedFunctionParameterResultOnly.*)",

From 932a668a2f7b4bf13f6ddca12c37fe0adb1ca76f Mon Sep 17 00:00:00 2001
From: "Min, Byungil" <byungil.min@intel.com>
Date: Thu, 6 Apr 2023 18:08:34 +0900
Subject: [PATCH 265/296] [GPU] Bugfix in reorder_bfyx_to_blocked_format kernel
 (#16689)

+ Bugfix bfyx_to_blocked_format kernel of reorder prim for doubl blocked format
+ issued format is bs_fs_yx_bsv16_fsv32. Added test-cases.
+ Fixed accuracy issue from check_accuracy_issue

Signed-off-by: Min, Byungil <byungil.min@intel.com>
---
 .../intel_gpu/src/graph/layout_optimizer.cpp  | 29 +++++++++++--------
 .../reorder_data_bfyx_to_blocked_format.cl    |  2 +-
 .../reorder_kernel_bfyx_to_blocked_format.cpp |  1 +
 .../tests/test_cases/reorder_gpu_test.cpp     | 16 ++++++++++
 4 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index 957fd24139d21c..f7f4ada8b2b932 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1176,8 +1176,10 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
     auto in_dt = node.get_dependency(0).get_output_layout(false).data_type;
     auto out_dt = node.get_output_layout(false).data_type;
 
-    if (in_dt == data_types::f32 && (!node.is_type<fully_connected>() && !node.is_type<convolution>()))
-        return false;
+    // Generally, fp32 input does NOT use oneDNN
+    if (in_dt == data_types::f32 &&
+        (!node.is_type<fully_connected>() && !node.is_type<convolution>() && !node.is_type<reorder>()))
+          return false;
 
     if (in_dt == data_types::i64 || out_dt == data_types::i64)
         return false;
@@ -1222,6 +1224,19 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
         if ((in_dt == data_types::i8 || in_dt == data_types::u8) && (wei_dt == data_types::i8) &&
             (out_dt == data_types::i8 || out_dt == data_types::u8 || out_dt == data_types::i32 || out_dt == data_types::f16 || out_dt == data_types::f32))
             return true;
+    } else if (node.is_type<reorder>()) {
+        auto input_fmt = node.get_dependency(0).get_output_layout().format;
+        auto output_fmt = node.get_output_layout().format;
+
+        // For mixed precision case, oneDNN is slower than clDNN
+        if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(in_dt))
+            return false;
+        if (output_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(in_dt))
+            return false;
+        if (output_fmt == format::bfyx && out_dt == data_types::f32)
+            return false;
+
+        return true;
     }
 
     return false;
@@ -1408,8 +1423,6 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
 
         auto input_fmt = input_layout.format;
         auto output_fmt = output_layout.format;
-        auto input_dt = input_layout.data_type;
-        auto output_dt = output_layout.data_type;
 
         preferred_impl = impl_types::onednn;
 
@@ -1436,14 +1449,6 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
         if (!are_data_types_suitable_for_onednn(node)) {
             preferred_impl = impl_types::ocl;
         }
-
-        // For mixed precision case, onednn is slower than cldnn
-        if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(input_dt))
-            preferred_impl = impl_types::ocl;
-        if (output_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(output_dt))
-            preferred_impl = impl_types::ocl;
-        if (output_fmt == format::bfyx && output_dt == data_types::f32)
-            preferred_impl = impl_types::ocl;
     } else if (node.is_type<reduce>()) {
         if (!_optimization_attributes.use_onednn_impls)
             return impl_types::ocl;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl
index 0a6e161b25abc1..f0dad835c44c1c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl
@@ -71,7 +71,7 @@ KERNEL (reorder_data_bfyx_to_blocked_format)(
 
 #if INPUT0_DIMS == 4
     #if DOUBLE_BLOCKED_FORMAT
-        const uint bsv_pitch = BSV_ALIGNMENT;
+        const uint bsv_pitch = FSV_ALIGNMENT;
         const uint fs_pitch = y_pitch * (OUTPUT_SIZE_Y);
         const uint bs_pitch = fs_pitch * (INPUT0_FEATURE_SLICE_NUM);
         const uint output_idx_tile = (bs * bs_pitch) + (fs * fs_pitch) + (y * y_pitch) + (x * x_pitch) + (bsv * bsv_pitch) + (fsv);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_bfyx_to_blocked_format.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_bfyx_to_blocked_format.cpp
index 568f81ba47b786..769eabc24fbd7b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_bfyx_to_blocked_format.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_bfyx_to_blocked_format.cpp
@@ -200,6 +200,7 @@ JitConstants ReorderKernel_bfyx_to_blocked_format::GetJitConstants(const reorder
     }
 
     if (params.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 ||
+        params.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv32 ||
         params.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16 ||
         params.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv32 ||
         params.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
diff --git a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
index 09438ac7c17145..e69d30651cec92 100644
--- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
@@ -214,6 +214,13 @@ TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32) {
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, false);
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, false);
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
+    // bfyx to double blocked format (bs_fs_yx_bsv16_fsv32)
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48, 8, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32 + 2, 48, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48 + 5, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48, 48 + 3, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
+
     // bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, false);
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
@@ -222,6 +229,15 @@ TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32) {
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
 }
 
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f16) {
+    // bfyx to double blocked format (bs_fs_yx_bsv16_fsv32)
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f16, data_types::f16, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48, 8, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f16, data_types::f16, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32 + 2, 48, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f16, data_types::f16, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48 + 5, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f16, data_types::f16, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32, 48, 48 + 3, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f16, data_types::f16, format::bfyx, format::bs_fs_yx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
+}
+
 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32) {
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, false);
     compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, false);

From 7a5c472ccc0fd60b9fe40fac3036ba3ea708f995 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Thu, 6 Apr 2023 12:59:37 +0200
Subject: [PATCH 266/296] [ShapeInference] Fix Group Convolution shape infer -
 relax inputs size check (#16707)

* Relax group conv inputs size check

* Tests

* Relax inputs size checks for Backprop Convs
---
 .../convolution_backprop_shape_inference.hpp  |  4 ++--
 ...volution_backprop_shape_inference_util.hpp |  2 +-
 ...p_convolution_backprop_shape_inference.hpp |  4 ++--
 .../group_convolution_shape_inference.hpp     |  2 +-
 ...inary_convolution_shape_inference_test.cpp | 19 +++++++++++++++
 ...volution_backprop_shape_inference_test.cpp | 22 +++++++++++++++++
 .../convolution_shape_inference_test.cpp      | 19 +++++++++++++++
 ...volution_backprop_shape_inference_test.cpp | 24 +++++++++++++++++++
 ...group_convolution_shape_inference_test.cpp | 19 +++++++++++++++
 9 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp b/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
index a492a26b532dc9..b4585434dc2e2c 100644
--- a/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
+++ b/src/core/shape_inference/include/convolution_backprop_shape_inference.hpp
@@ -18,8 +18,8 @@ std::vector<TShape> shape_infer(const ConvolutionBackpropData* op,
                                 CoordinateDiff& pads_end,
                                 const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     const auto inputs_count = input_shapes.size();
-    const auto has_spatial_shape = inputs_count == 3;
-    NODE_VALIDATION_CHECK(op, inputs_count == 2 || has_spatial_shape);
+    const auto has_spatial_shape = inputs_count >= 3;
+    NODE_VALIDATION_CHECK(op, inputs_count >= 2);
     using namespace ov::util;
 
     TShape out_spatial_shape;
diff --git a/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp b/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
index 71447fe56a85c2..eaa8b86748a7a6 100644
--- a/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
+++ b/src/core/shape_inference/include/convolution_backprop_shape_inference_util.hpp
@@ -129,7 +129,7 @@ void apply_padding(const util::ConvolutionBackPropBase* op,
     const auto& filters_shape = input_shapes[1];
 
     // apply padding if required
-    if (input_shapes.size() == 3 && convolution::is_auto_pad(op) && data_shape.rank().is_static() &&
+    if (input_shapes.size() >= 3 && convolution::is_auto_pad(op) && data_shape.rank().is_static() &&
         filters_shape.rank().is_static()) {
         convolution::apply_auto_pad(op,
                                     data_shape,
diff --git a/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp b/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
index 1f4673b95de37a..d0c4844c0cf7af 100644
--- a/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
+++ b/src/core/shape_inference/include/group_convolution_backprop_shape_inference.hpp
@@ -29,8 +29,8 @@ std::vector<TShape> shape_infer(const GroupConvolutionBackpropData* op,
                                 CoordinateDiff& pads_end,
                                 const std::map<size_t, HostTensorPtr>& constant_data = {}) {
     const auto inputs_count = input_shapes.size();
-    const auto has_spatial_shape = inputs_count == 3;
-    NODE_VALIDATION_CHECK(op, inputs_count == 2 || has_spatial_shape);
+    const auto has_spatial_shape = inputs_count >= 3;
+    NODE_VALIDATION_CHECK(op, inputs_count >= 2);
     using namespace ov::util;
 
     TShape out_spatial_shape;
diff --git a/src/core/shape_inference/include/group_convolution_shape_inference.hpp b/src/core/shape_inference/include/group_convolution_shape_inference.hpp
index ea91dc117a7742..f8f5bfe690d913 100644
--- a/src/core/shape_inference/include/group_convolution_shape_inference.hpp
+++ b/src/core/shape_inference/include/group_convolution_shape_inference.hpp
@@ -28,7 +28,7 @@ std::vector<TShape> shape_infer(const GroupConvolution* op,
                                 CoordinateDiff& pads_begin,
                                 CoordinateDiff& pads_end,
                                 const std::map<size_t, HostTensorPtr>& constant_data = {}) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2);
+    NODE_VALIDATION_CHECK(op, input_shapes.size() >= 2);
     using namespace ov::util;
 
     const auto num_spatial = convolution::calculate_num_spatial(op, input_shapes);
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
index 5eaf1b724139ee..2ddf4b1e2dfaab 100644
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp
@@ -40,6 +40,25 @@ TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, default_ctor) {
     EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
 }
 
+TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, default_ctor_three_input_shapes) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::VALID);
+
+    // Third input shape (bias) can be provided, but is not used
+    input_shapes = ShapeVector{{1, 3, 10, 12}, {2, 3, 5, 5}, {2}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 6, 8}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
 TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, auto_pads_same_lower_inputs_dynamic_rank) {
     const auto strides = Strides{1, 1};
     const auto dilations = Strides{1, 1};
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
index 26976943bf7e1d..89bbf468d41733 100644
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp
@@ -84,6 +84,28 @@ TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, default_ctor) {
     EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
 }
 
+TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, default_ctor_more_inputs) {
+    const auto spatial_shape = PartialShape{500, 500};
+
+    op = make_op();
+    op->set_strides({2, 2});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({1, 1});
+    op->set_pads_end({1, 1});
+    op->set_output_padding({0, 0});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+    op->set_output_shape(spatial_shape.to_shape());
+
+    input_shapes = ShapeVector{{1, 20, 224, 224}, {20, 10, 3, 3}, {spatial_shape.size()}, {0}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 10, 500, 500}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({1, 1}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({1, 1}));
+}
+
 TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 2d_inputs_dynamic_rank_no_spatial_shape) {
     const auto strides = Strides{1, 1};
     const auto dilations = Strides{1, 1};
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
index 52a432907388f7..d95d9679f5d9e6 100644
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp
@@ -37,6 +37,25 @@ TEST_F(ConvolutionV1StaticShapeInferenceTest, default_ctor) {
     EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
 }
 
+TEST_F(ConvolutionV1StaticShapeInferenceTest, default_ctor_three_input_shapes) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::VALID);
+
+    // Third input shape (bias) can be provided, but is not used
+    input_shapes = ShapeVector{{1, 3, 10, 12}, {2, 3, 5, 5}, {2}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 6, 8}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({0, 0}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({0, 0}));
+}
+
 TEST_F(ConvolutionV1StaticShapeInferenceTest, 2d_auto_pads_same_lower_inputs_dynamic_rank) {
     const auto strides = Strides{1, 1};
     const auto dilations = Strides{1, 1};
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
index a58e20c257dcfa..fa3318ed892c83 100644
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp
@@ -65,6 +65,30 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor) {
     EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1, 3}));
 }
 
+TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor_more_inputs) {
+    op = make_op();
+    op->set_strides({1, 1, 1});
+    op->set_dilations({1, 1, 1});
+    op->set_pads_begin({2, 2, 2});
+    op->set_pads_end({2, 1, 3});
+    op->set_output_padding({1, 1, 1});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+
+    int32_t spatial_shape[] = {5, 10, 15};
+    const auto const_data =
+        std::map<size_t, HostTensorPtr>{{2, std::make_shared<HostTensor>(element::i32, Shape{3}, spatial_shape)}};
+
+    // More than three inputs can be provided, but not used
+    input_shapes = ShapeVector{{1, 6, 10, 12, 2}, {3, 2, 2, 5, 5, 5}, {3}, {0}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, const_data).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 6, 5, 10, 15}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({2, 2, 2}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1, 3}));
+}
+
 TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 2d_inputs_dynamic_rank_no_spatial_shape) {
     const auto strides = Strides{1, 1};
     const auto dilations = Strides{1, 1};
diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp
index 77b5b4f3e70831..a2b90a691e3599 100644
--- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp
@@ -56,6 +56,25 @@ TEST_F(GroupConvolutionV1StaticShapeInferenceTest, default_ctor) {
     EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1}));
 }
 
+TEST_F(GroupConvolutionV1StaticShapeInferenceTest, default_ctor_three_input_shapes) {
+    op = make_op();
+    op->set_strides({1, 1});
+    op->set_dilations({1, 1});
+    op->set_pads_begin({2, 2});
+    op->set_pads_end({2, 1});
+    op->set_auto_pad(op::PadType::EXPLICIT);
+
+    // Third input shape (bias) can be provided, but is not used
+    input_shapes = ShapeVector{{1, 6, 10, 12}, {3, 2, 2, 5, 5}, {3}};
+    auto shape_infer = make_shape_inference(op);
+    output_shapes = shape_infer->infer(input_shapes, {}).shapes;
+
+    EXPECT_EQ(output_shapes.size(), 1);
+    EXPECT_EQ(output_shapes.front(), StaticShape({1, 6, 10, 11}));
+    EXPECT_EQ(shape_infer->get_pads_begin(), CoordinateDiff({2, 2}));
+    EXPECT_EQ(shape_infer->get_pads_end(), CoordinateDiff({2, 1}));
+}
+
 TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 1d_explicit_pads_inputs_static_rank) {
     const auto strides = Strides{1};
     const auto dilations = Strides{1};

From dbe051aa79fecc5e73e3a7925cf2d3238eff76fc Mon Sep 17 00:00:00 2001
From: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
Date: Thu, 6 Apr 2023 13:02:24 +0200
Subject: [PATCH 267/296] [POT] use serialize methid (#16768)

---
 tools/pot/openvino/tools/pot/graph/graph_utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/pot/openvino/tools/pot/graph/graph_utils.py b/tools/pot/openvino/tools/pot/graph/graph_utils.py
index 8229850b0f2c37..5ff4bf6cc3fadf 100644
--- a/tools/pot/openvino/tools/pot/graph/graph_utils.py
+++ b/tools/pot/openvino/tools/pot/graph/graph_utils.py
@@ -8,7 +8,7 @@
 from openvino.tools.mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from openvino.tools.mo.utils.ir_reader.restore_graph import restore_graph_from_ir, save_restored_graph
 from openvino.tools.mo.utils.logger import init_logger
-from openvino.runtime import Core  # pylint: disable=E0401,E0611
+from openvino.runtime import Core, serialize  # pylint: disable=E0401,E0611
 from openvino.runtime.passes import Manager # pylint: disable=E0401,E0611
 from openvino.offline_transformations import apply_pot_transformations # pylint: disable=import-error,no-name-in-module
 
@@ -35,9 +35,7 @@ def load_graph(model_config, target_device='ANY'):
         apply_pot_transformations(model, target_device.encode('utf-8'))
         bin_path = serialized_bin_path
         xml_path = serialized_xml_path
-        # TODO: replace by openvino.runtime.serialize
-        pass_manager.register_pass(pass_name="Serialize", xml_path=xml_path, bin_path=bin_path)
-        pass_manager.run_passes(model)
+        serialize(model, xml_path=xml_path, bin_path=bin_path)
 
     if not os.path.exists(xml_path):
         raise RuntimeError('Input model xml should link to an existing file. Please, provide a correct path.')

From cafc7359c585986e93b99fcb363d33ad76c1291d Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 6 Apr 2023 15:40:20 +0400
Subject: [PATCH 268/296] Set test case name for ReshapeMatMul tests (#16705)

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 .../tests/functional/matmul_sr_tests.cpp      | 32 ++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/inference/tests/functional/matmul_sr_tests.cpp b/src/inference/tests/functional/matmul_sr_tests.cpp
index 1bfccfba2b29be..9d5a92169f802a 100644
--- a/src/inference/tests/functional/matmul_sr_tests.cpp
+++ b/src/inference/tests/functional/matmul_sr_tests.cpp
@@ -34,6 +34,35 @@ struct ReshapeMatMulTestCase {
 class SmartReshapeMatMulTests : public CommonTestUtils::TestsCommon,
                                 public testing::WithParamInterface<std::tuple<ReshapeMatMulTestCase>> {
 public:
+    static std::string getTestCaseName(testing::TestParamInfo<std::tuple<ReshapeMatMulTestCase>> obj) {
+        std::ostringstream result;
+        const auto& value = std::get<0>(obj.param);
+        result << "reshape_is_A_input=" << value.reshape_is_A_input << "_";
+        result << "A_shape=" << value.A_shape << "_";
+        result << "B_shape=" << value.B_shape << "_";
+        result << "reshape_pattern=[";
+        for (size_t i = 0; i < value.reshape_pattern.size(); i++) {
+            if (i)
+                result << ",";
+            result << value.reshape_pattern[i];
+        }
+        result << "]_";
+        result << "transpose_a=" << value.transpose_a << "_";
+        result << "transpose_b=" << value.transpose_b << "_";
+        result << "new_shapes={";
+        for (const auto& it : value.new_shapes) {
+            result << it.first << "=[";
+            for (size_t i = 0; i < it.second.size(); i++) {
+                if (i)
+                    result << ",";
+                result << it.second[i];
+            }
+            result << "]";
+        }
+        result << "}";
+        return result.str();
+    }
+
     void SetUp() override {
         const auto& test_case = std::get<0>(GetParam());
 
@@ -86,7 +115,8 @@ INSTANTIATE_TEST_SUITE_P(
         ReshapeMatMulTestCase{false, {20, 30}, {1, 30, 40}, {-1, 40}, false, false, {{"input_B", {2, 30, 40}}}},
         ReshapeMatMulTestCase{false, {20, 30}, {1, 40, 30}, {40, -1}, false, true, {{"input_B", {2, 40, 30}}}},
         ReshapeMatMulTestCase{false, {30, 20}, {1, 30, 40}, {-1, 40}, true, false, {{"input_B", {2, 30, 40}}}},
-        ReshapeMatMulTestCase{false, {30, 20}, {1, 40, 30}, {40, -1}, true, true, {{"input_B", {2, 40, 30}}}}));
+        ReshapeMatMulTestCase{false, {30, 20}, {1, 40, 30}, {40, -1}, true, true, {{"input_B", {2, 40, 30}}}}),
+    SmartReshapeMatMulTests::getTestCaseName);
 }  // namespace
 
 TEST(SmartReshapeTransposeMatMulTests, TransposeAMatMulFuse) {

From cb436112b2a097663d8206bd106feeaae1568a25 Mon Sep 17 00:00:00 2001
From: Tomasz Jankowski <tomasz1.jankowski@intel.com>
Date: Thu, 6 Apr 2023 13:55:57 +0200
Subject: [PATCH 269/296] [Transformations] Assert valid output shape on
 ReduceReshape fusion (#16712)

* Assert valid output shape on ReduceReshape fusion

* Remove useless test step

* Reduce tests flow

* _dummy_

to retrigger failed nonrestartable check
---
 .../reduce_reshape_fusion.cpp                 |  1 +
 .../reduce_reshape_fusion_tests.cpp           | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/common/transformations/src/transformations/common_optimizations/reduce_reshape_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/reduce_reshape_fusion.cpp
index 726789c34f0c2a..daaa22f37739d3 100644
--- a/src/common/transformations/src/transformations/common_optimizations/reduce_reshape_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/reduce_reshape_fusion.cpp
@@ -60,6 +60,7 @@ ov::pass::ReduceReshapeFusion::ReduceReshapeFusion() {
                        std::dynamic_pointer_cast<op::util::LogicalReductionKeepDims>(reduce_node)) {
             logical_reduce_node->set_keep_dims(true);
         }
+        reduce_node->validate_and_infer_types();
         reduce_node->set_friendly_name(reshape_node->get_friendly_name());
         copy_runtime_info(reshape_node, reduce_node);
         replace_node(m.get_match_root(), reduce_node);
diff --git a/src/common/transformations/tests/common_optimizations/reduce_reshape_fusion_tests.cpp b/src/common/transformations/tests/common_optimizations/reduce_reshape_fusion_tests.cpp
index 8234c2030ba773..106d7838e880bc 100644
--- a/src/common/transformations/tests/common_optimizations/reduce_reshape_fusion_tests.cpp
+++ b/src/common/transformations/tests/common_optimizations/reduce_reshape_fusion_tests.cpp
@@ -10,11 +10,13 @@
 #include <openvino/pass/manager.hpp>
 #include <string>
 #include <transformations/common_optimizations/reduce_reshape_fusion.hpp>
+#include <transformations/common_optimizations/transpose_to_reshape.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
 
 #include "common_test_utils/ngraph_test_utils.hpp"
 
+using namespace std;
 using namespace testing;
 using namespace ov;
 using namespace opset9;
@@ -154,3 +156,21 @@ TEST_F(TransformationTestsF, ReduceMeanReshapeFusionSkipIfMoreThanOneReduceConsu
     model = std::make_shared<Model>(NodeVector{reshape, add}, ParameterVector{input});
     manager.register_pass<pass::ReduceReshapeFusion>();
 }
+
+TEST(TransformationTests, ReduceMeanReshapeFusionAssertValidOutputShape) {
+    const auto input = make_shared<Parameter>(element::f32, PartialShape{1, 16, 16, 24});
+    const auto reduce_axes = Constant::create(element::i64, Shape{2}, {1, 2});
+    const auto reduce_mean = make_shared<ReduceMean>(input, reduce_axes, false);
+    const auto target_shape = Constant::create(element::i64, Shape{4}, {1, 1, 1, 24});
+    const auto reshape = make_shared<Reshape>(reduce_mean, target_shape, false);
+    const auto order = Constant::create(element::i64, Shape{4}, {0, 3, 1, 2});
+    const auto transpose = make_shared<Transpose>(reshape, order);
+
+    auto model = make_shared<Model>(NodeVector{transpose}, ParameterVector{input});
+
+    pass::Manager manager;
+    manager.set_per_pass_validation(false);
+    manager.register_pass<pass::ReduceReshapeFusion>();
+    manager.register_pass<pass::TransposeToReshape>();
+    ASSERT_NO_THROW(manager.run_passes(model));
+}

From d732024ccb47792e0ee51740b7581b08b6aa7b76 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 6 Apr 2023 16:37:11 +0400
Subject: [PATCH 270/296] Extended a list of libraries to exclude from wheel
 package (#16764)

---
 src/bindings/python/wheel/setup.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py
index 56e59bf6776cb6..3b07d8e6ffeb15 100644
--- a/src/bindings/python/wheel/setup.py
+++ b/src/bindings/python/wheel/setup.py
@@ -8,6 +8,7 @@
 import subprocess  # nosec
 import typing
 import platform
+import re
 import multiprocessing
 from fnmatch import fnmatchcase
 from pathlib import Path
@@ -307,7 +308,7 @@ def get_reallink(self, link_file):
     def generate_package(self, src_dirs):
         """Collect package data files from preinstalled dirs and put all runtime libraries to the subpackage."""
         # additional blacklist filter, just to fix cmake install issues
-        blacklist = [".lib", ".pdb", "_debug.dll", "_debug.dylib"]
+        blacklist_patterns = ["^.*\\.lib$", "^.*\\.pdb$", "^.*_debug\\.dll$", "^.*_debug\\.dylib$", "^.*_debug\\.so\\.\\d*$", "^.*\\.la$"]
         package_dir = os.path.join(get_package_dir(PY_INSTALL_CFG), WHEEL_LIBS_INSTALL_DIR)
 
         for src_dir in src_dirs:
@@ -354,7 +355,12 @@ def generate_package(self, src_dirs):
                 file_name = os.path.basename(file_path)
                 if file_path.is_symlink():
                     sys.exit(f"Wheel package content must not contain symlinks {file_path}")
-                if file_path.is_file() and not any(file_name.endswith(ext) for ext in blacklist):
+                blacklisted = False
+                for pattern in blacklist_patterns:
+                    if re.match(pattern, file_name) is not None:
+                        blacklisted = True
+                        break
+                if file_path.is_file() and not blacklisted:
                     dst_file = os.path.join(package_dir, os.path.relpath(file_path, local_base_dir))
                     os.makedirs(os.path.dirname(dst_file), exist_ok=True)
                     copyfile(file_path, dst_file)

From 92eb62fe630cdcb3ddd6868553be05620e4bef18 Mon Sep 17 00:00:00 2001
From: Jan Iwaszkiewicz <jan.iwaszkiewicz@intel.com>
Date: Thu, 6 Apr 2023 14:44:37 +0200
Subject: [PATCH 271/296] [PyOV] Fix getting all names in OVDict (#16665)

* [PyOV] Fix getting all names in OVDict

* Add docs and adjust tests

* Fix linter issues

* Adjust typing and add test for incorrect key type

---------

Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com>
---
 .../runtime/utils/data_helpers/wrappers.py    | 33 +++++++++++++------
 .../python/tests/test_runtime/test_ovdict.py  | 21 ++++++++----
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
index e2849b8d5e01bd..1bf23a7cad4f30 100644
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py
@@ -12,7 +12,8 @@
     from singledispatchmethod import singledispatchmethod  # type: ignore[no-redef]
 
 from collections.abc import Mapping
-from typing import Union, Dict, List, Iterator, KeysView, ItemsView, ValuesView
+from typing import Dict, Set, Tuple, Union, Iterator, Optional
+from typing import KeysView, ItemsView, ValuesView
 
 from openvino._pyopenvino import Tensor, ConstOutput
 from openvino._pyopenvino import InferRequest as InferRequestBase
@@ -71,6 +72,7 @@ class OVDict(Mapping):
     """
     def __init__(self, _dict: Dict[ConstOutput, np.ndarray]) -> None:
         self._dict = _dict
+        self._names: Optional[Dict[ConstOutput, Set[str]]] = None
 
     def __iter__(self) -> Iterator:
         return self._dict.__iter__()
@@ -81,12 +83,19 @@ def __len__(self) -> int:
     def __repr__(self) -> str:
         return self._dict.__repr__()
 
+    def __get_names(self) -> Dict[ConstOutput, Set[str]]:
+        """Return names of every output key.
+
+        Insert empty set if key has no name.
+        """
+        return {key: key.get_names() for key in self._dict.keys()}
+
     def __get_key(self, index: int) -> ConstOutput:
         return list(self._dict.keys())[index]
 
     @singledispatchmethod
     def __getitem_impl(self, key: Union[ConstOutput, int, str]) -> np.ndarray:
-        raise TypeError("Unknown key type!")
+        raise TypeError(f"Unknown key type: {type(key)}")
 
     @__getitem_impl.register
     def _(self, key: ConstOutput) -> np.ndarray:
@@ -101,10 +110,12 @@ def _(self, key: int) -> np.ndarray:
 
     @__getitem_impl.register
     def _(self, key: str) -> np.ndarray:
-        try:
-            return self._dict[self.__get_key(self.names().index(key))]
-        except ValueError:
-            raise KeyError(key)
+        if self._names is None:
+            self._names = self.__get_names()
+        for port, port_names in self._names.items():
+            if key in port_names:
+                return self._dict[port]
+        raise KeyError(key)
 
     def __getitem__(self, key: Union[ConstOutput, int, str]) -> np.ndarray:
         return self.__getitem_impl(key)
@@ -118,12 +129,14 @@ def values(self) -> ValuesView[np.ndarray]:
     def items(self) -> ItemsView[ConstOutput, np.ndarray]:
         return self._dict.items()
 
-    def names(self) -> List[str]:
-        """Return a name of every output key.
+    def names(self) -> Tuple[Set[str], ...]:
+        """Return names of every output key.
 
-        Throws RuntimeError if any of ConstOutput keys has no name.
+        Insert empty set if key has no name.
         """
-        return [key.get_any_name() for key in self._dict.keys()]
+        if self._names is None:
+            self._names = self.__get_names()
+        return tuple(self._names.values())
 
     def to_dict(self) -> Dict[ConstOutput, np.ndarray]:
         """Return underlaying native dictionary.
diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py
index e8c76a6d8d3bf7..460cf68d73f9bb 100644
--- a/src/bindings/python/tests/test_runtime/test_ovdict.py
+++ b/src/bindings/python/tests/test_runtime/test_ovdict.py
@@ -99,7 +99,7 @@ def _check_dict(result, obj, output_names=None):
     assert _check_keys(result.keys(), outs)
     assert _check_values(result)
     assert _check_items(result, outs, output_names)
-    assert result.names() == output_names
+    assert all([output_names[i] in result.names()[i] for i in range(0, len(output_names))])
 
     return True
 
@@ -124,6 +124,15 @@ def test_ovdict_single_output_basic(device, is_direct):
         raise TypeError("Unknown `obj` type!")
 
 
+@pytest.mark.parametrize("is_direct", [True, False])
+def test_ovdict_wrong_key_type(device, is_direct):
+    result, _ = _get_ovdict(device, multi_output=False, direct_infer=is_direct)
+
+    with pytest.raises(TypeError) as e:
+        _ = result[2.0]
+    assert "Unknown key type: <class 'float'>" in str(e.value)
+
+
 @pytest.mark.parametrize("is_direct", [True, False])
 def test_ovdict_single_output_noname(device, is_direct):
     result, obj = _get_ovdict(
@@ -140,13 +149,13 @@ def test_ovdict_single_output_noname(device, is_direct):
     assert isinstance(result[outs[0]], np.ndarray)
     assert isinstance(result[0], np.ndarray)
 
-    with pytest.raises(RuntimeError) as e0:
+    with pytest.raises(KeyError) as e0:
         _ = result["some_name"]
-    assert "Attempt to get a name for a Tensor without names" in str(e0.value)
+    assert "some_name" in str(e0.value)
 
-    with pytest.raises(RuntimeError) as e1:
-        _ = result.names()
-    assert "Attempt to get a name for a Tensor without names" in str(e1.value)
+    # Check if returned names are tuple with one empty set
+    assert len(result.names()) == 1
+    assert result.names()[0] == set()
 
 
 @pytest.mark.parametrize("is_direct", [True, False])

From 906ec7ee1b0b487c15f42d823543ca0c49b0928f Mon Sep 17 00:00:00 2001
From: Anastasiia Pnevskaia <anastasia.popova@intel.com>
Date: Thu, 6 Apr 2023 16:30:59 +0200
Subject: [PATCH 272/296] Fixed command lines in MO docs. (#16780)

* Fixed command lines in docs.

* Removed freezing from tutorial.
---
 .../Convert_EfficientDet_Models.md            | 51 +------------------
 docs/dev/installing.md                        |  4 +-
 2 files changed, 4 insertions(+), 51 deletions(-)

diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
index be7df6181e375e..fca2f79570e651 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
@@ -12,33 +12,7 @@ Converting EfficientDet Model to the IR
 There are several public versions of EfficientDet model implementation available on GitHub. This tutorial explains how to
 convert models from the `repository <https://github.com/google/automl/tree/master/efficientdet>`__  (commit 96e1fee) to the OpenVINO format.
 
-Getting a Frozen TensorFlow Model
-+++++++++++++++++++++++++++++++++
-
-Follow the instructions below to get frozen TensorFlow EfficientDet model. EfficientDet-D4 model is an example:
-
-1. Clone the repository:
-
-.. code-block:: sh
-
-   git clone https://github.com/google/automl
-   cd automl/efficientdet
-
-2. (Optional) Checkout to the commit that the conversion was tested on:
-
-.. code-block:: sh
-
-   git checkout 96e1fee
-
-3. Install required dependencies:
-
-.. code-block:: sh
-
-   python3 -m pip install --upgrade pip
-   python3 -m pip install -r requirements.txt
-   python3 -m pip install --upgrade tensorflow-model-optimization
-
-4. Download and extract the model checkpoint `efficientdet-d4.tar.gz <https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz>`__
+Download and extract the model checkpoint `efficientdet-d4.tar.gz <https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz>`__
 referenced in the **"Pretrained EfficientDet Checkpoints"** section of the model repository:
 
 .. code-block:: sh
@@ -46,22 +20,6 @@ referenced in the **"Pretrained EfficientDet Checkpoints"** section of the model
    wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
    tar zxvf efficientdet-d4.tar.gz
 
-5. Freeze the model:
-
-.. code-block:: sh
-
-    mo --runmode=saved_model --model_name=efficientdet-d4  --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
-
-As a result, the frozen model file ``savedmodeldir/efficientdet-d4_frozen.pb`` will be generated.
-
-.. note::
-
-    For custom trained models, specify ``--hparams`` flag to ``config.yaml`` which was used during training.
-
-.. note::
-
-    If you see an error *AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'*, apply the fix from the `patch <https://github.com/google/automl/pull/846>`__.
-
 Converting an EfficientDet TensorFlow Model to the IR
 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -70,8 +28,7 @@ To generate the IR of the EfficientDet TensorFlow model, run:
 .. code-block:: sh
 
    mo \
-   --input_model savedmodeldir/efficientdet-d4_frozen.pb \
-   --transformations_config front/tf/automl_efficientdet.json \
+   --input_meta_graph efficientdet-d4/model.meta \
    --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
    --reverse_input_channels
 
@@ -81,10 +38,6 @@ EfficientDet models were trained with different input image sizes. To determine
 dictionary in the `hparams_config.py <https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304>`__ file.
 The attribute ``image_size`` specifies the shape to be defined for the model conversion.
 
-The ``transformations_config`` command line parameter specifies the configuration json file containing hints for the Model Optimizer on how to convert the model and trigger transformations implemented in the ``<PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/AutomlEfficientDet.py``. The json file contains some parameters which must be changed if you
-train the model yourself and modified the ``hparams_config`` file or the parameters are different from the ones used for EfficientDet-D4.
-The attribute names are self-explanatory or match the name in the ``hparams_config`` file.
-
 .. note::
 
     The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model>` guide.
diff --git a/docs/dev/installing.md b/docs/dev/installing.md
index cbe92b0e82b67f..c180e92dc2cf3f 100644
--- a/docs/dev/installing.md
+++ b/docs/dev/installing.md
@@ -148,12 +148,12 @@ omz_downloader --name googlenet-v1 --output_dir %USERPROFILE%\Documents\models
 Linux and macOS:
 ```sh
 mkdir ~/ir
-mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --data_type FP16 --output_dir ~/ir
+mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --compress_to_fp16 --output_dir ~/ir
 ```
 Windows:
 ```bat
 mkdir %USERPROFILE%\Documents\ir
-mo --input_model %USERPROFILE%\Documents\models\public\googlenet-v1\googlenet-v1.caffemodel --data_type FP16 --output_dir %USERPROFILE%\Documents\ir
+mo --input_model %USERPROFILE%\Documents\models\public\googlenet-v1\googlenet-v1.caffemodel --compress_to_fp16 --output_dir %USERPROFILE%\Documents\ir
 ```
 
 5. Run Inference on the Sample

From 5f416dc4d201926e79a360f1ebec857d3de83564 Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Thu, 6 Apr 2023 18:01:04 +0200
Subject: [PATCH 273/296] [LPT] Introduced BiasAttribute (#16781)

* Extended check on ConvSum fusing

* [LPT] Introduced 'bias' rt attribute

* [CPU][TESTS] Added FQLayerDQBias tests
---
 docs/IE_PLUGIN_DG/layout.xml                  |   1 +
 .../low_precision_transformations/lpt.md      |   1 +
 .../pipeline/step2_markup.md                  |  16 +--
 .../step2_markup/markup_bias.md               |   3 +
 docs/doxygen-xfail.txt                        |   1 +
 .../include/low_precision/markup_bias.hpp     |  32 +++++
 .../low_precision/rt_info/bias_attribute.hpp  |  21 ++++
 .../low_precision_transformations/src/add.cpp |  14 +--
 .../src/eltwise_base_transformation.cpp       |  15 ++-
 .../src/fake_quantize.cpp                     |  12 +-
 .../src/low_precision.cpp                     |   2 +
 .../src/markup_bias.cpp                       |  41 ++++++
 .../src/network_helper.cpp                    |   9 +-
 .../src/rt_info/bias_attribute.cpp            |  27 ++++
 .../tests/markup_bias_transformation.cpp      | 107 ++++++++++++++++
 .../simple_low_precision_transformer.cpp      |   2 +
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   |   9 +-
 .../subgraph_tests/src/fq_layer_dq_bias.cpp   | 118 ++++++++++++++++++
 .../markup_bias_function.hpp                  |  25 ++++
 .../src/markup_bias_function.cpp              | 112 +++++++++++++++++
 20 files changed, 533 insertions(+), 35 deletions(-)
 create mode 100644 docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/transformations/step2_markup/markup_bias.md
 create mode 100644 src/common/low_precision_transformations/include/low_precision/markup_bias.hpp
 create mode 100644 src/common/low_precision_transformations/include/low_precision/rt_info/bias_attribute.hpp
 create mode 100644 src/common/low_precision_transformations/src/markup_bias.cpp
 create mode 100644 src/common/low_precision_transformations/src/rt_info/bias_attribute.cpp
 create mode 100644 src/common/low_precision_transformations/tests/markup_bias_transformation.cpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_layer_dq_bias.cpp
 create mode 100644 src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/markup_bias_function.hpp
 create mode 100644 src/tests/ngraph_helpers/lpt_ngraph_functions/src/markup_bias_function.cpp

diff --git a/docs/IE_PLUGIN_DG/layout.xml b/docs/IE_PLUGIN_DG/layout.xml
index dbd424edc2c8dd..5b84575851e11b 100644
--- a/docs/IE_PLUGIN_DG/layout.xml
+++ b/docs/IE_PLUGIN_DG/layout.xml
@@ -25,6 +25,7 @@
                             <tab type="user" title="CreateAttribute" url="@ref openvino_docs_OV_UG_lpt_CreateAttribute"/>
                             <tab type="user" title="CreatePrecisionsDependentAttribute" url="@ref openvino_docs_OV_UG_lpt_CreatePrecisionsDependentAttribute"/>
                             <tab type="user" title="MarkupAvgPoolPrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved"/>
+                            <tab type="user" title="MarkupBias" url="@ref openvino_docs_OV_UG_lpt_MarkupBias"/>
                             <tab type="user" title="MarkupCanBeQuantized" url="@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized"/>
                             <tab type="user" title="MarkupPerTensorQuantization" url="@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization"/>
                             <tab type="user" title="MarkupPrecisions" url="@ref openvino_docs_OV_UG_lpt_MarkupPrecisions"/>
diff --git a/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/lpt.md b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/lpt.md
index c695f20ce15643..0b7c24ab8e1cf2 100644
--- a/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/lpt.md
+++ b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/lpt.md
@@ -128,6 +128,7 @@ The model on this step is changed. There are more details in developer guide [Pr
 
 ### Step 2. Markup
 This step creates runtime attributes for operations. These attributes will be used in next step. Transformations:
+* [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias)
 * [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
 * [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
 * [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
diff --git a/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/pipeline/step2_markup.md b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/pipeline/step2_markup.md
index f1590c229c8d26..df93ec6445460a 100644
--- a/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/pipeline/step2_markup.md
+++ b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/pipeline/step2_markup.md
@@ -2,18 +2,20 @@
 
 This step defines the optimal `FakeQuantize` decomposition precisions for the best inference performance via operations markup with runtime attribute instances. Attributes are created for input and output ports and operations. Transformations do not change the operation output port precisions. A model markup low precision logic is decomposed and implemented into the following common markup transformations. The order of transformations is important:
 
-1. [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
-2. [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
-3. [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
-4. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved)
-5. [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions)
-6. [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals)
-7. [AlignQuantizationParameters](@ref openvino_docs_OV_UG_lpt_AlignQuantizationParameters)
+1. [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias)
+2. [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
+3. [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
+4. [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
+5. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved)
+6. [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions)
+7. [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals)
+8. [AlignQuantizationParameters](@ref openvino_docs_OV_UG_lpt_AlignQuantizationParameters)
 
 The table of transformations and used attributes:
 
 | Transformation name             | Create attributes             | Use attributes                            |
 |---------------------------------|-------------------------------|-------------------------------------------|
+| MarkupBias                      | Bias                          |                                           |
 | MarkupCanBeQuantized            | Precisions                    |                                           |
 | MarkupPrecisions                | Precisions,PrecisionPreserved |                                           |
 | MarkupPerTensorQuantization     | PerTensorQuantization         |                                           |
diff --git a/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/transformations/step2_markup/markup_bias.md b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/transformations/step2_markup/markup_bias.md
new file mode 100644
index 00000000000000..632d50cee6c38e
--- /dev/null
+++ b/docs/IE_PLUGIN_DG/plugin_transformation_pipeline/low_precision_transformations/transformations/step2_markup/markup_bias.md
@@ -0,0 +1,3 @@
+# MarkupBias transformation {#openvino_docs_OV_UG_lpt_MarkupBias}
+
+ngraph::pass::low_precision::MarkupBias class represents the `MarkupBias` transformation.
diff --git a/docs/doxygen-xfail.txt b/docs/doxygen-xfail.txt
index f3f1abd334e2f3..bfb5159f63158e 100644
--- a/docs/doxygen-xfail.txt
+++ b/docs/doxygen-xfail.txt
@@ -27,6 +27,7 @@ openvino_docs_OV_UG_lpt_gathertransformation.rst
 openvino_docs_OV_UG_lpt_linopsequencefusion.rst
 openvino_docs_OV_UG_lpt_mvntransformation.rst
 openvino_docs_OV_UG_lpt_markupavgpoolprecisionpreserved.rst
+openvino_docs_OV_UG_lpt_markupbias.rst
 openvino_docs_OV_UG_lpt_markupcanbequantized.rst
 openvino_docs_OV_UG_lpt_markuppertensorquantization.rst
 openvino_docs_OV_UG_lpt_markupprecisions.rst
diff --git a/src/common/low_precision_transformations/include/low_precision/markup_bias.hpp b/src/common/low_precision_transformations/include/low_precision/markup_bias.hpp
new file mode 100644
index 00000000000000..ce4b262075b804
--- /dev/null
+++ b/src/common/low_precision_transformations/include/low_precision/markup_bias.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <low_precision/lpt_visibility.hpp>
+#include <memory>
+#include <openvino/pass/graph_rewrite.hpp>
+#include <openvino/pass/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief MarkupBias transformation marks biases after target layers.
+ *
+ * For more details about the transformation, refer to
+ * [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias) page
+ * in the Inference Engine Developer Guide.
+ */
+class LP_TRANSFORMATIONS_API MarkupBias : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("MarkupBias", "0");
+    MarkupBias();
+};
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
\ No newline at end of file
diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/bias_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/bias_attribute.hpp
new file mode 100644
index 00000000000000..fdefec850cac3b
--- /dev/null
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/bias_attribute.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/node.hpp>
+#include <openvino/core/runtime_attribute.hpp>
+
+namespace ov {
+LP_TRANSFORMATIONS_API void mark_as_bias(const std::shared_ptr<Node>& node);
+
+LP_TRANSFORMATIONS_API bool marked_as_bias(const std::shared_ptr<const Node>& node);
+
+class LP_TRANSFORMATIONS_API BiasAttribute : public ov::RuntimeAttribute {
+public:
+    OPENVINO_RTTI("LowPrecision::Bias", "", ov::RuntimeAttribute);
+    bool is_copyable(const std::shared_ptr<Node>& to) const override;
+};
+} // namespace ov
diff --git a/src/common/low_precision_transformations/src/add.cpp b/src/common/low_precision_transformations/src/add.cpp
index f20fad5061de4c..32dcac99b80151 100644
--- a/src/common/low_precision_transformations/src/add.cpp
+++ b/src/common/low_precision_transformations/src/add.cpp
@@ -15,6 +15,7 @@
 
 #include "low_precision/common/ie_lpt_exception.hpp"
 #include "low_precision/network_helper.hpp"
+#include "low_precision/rt_info/bias_attribute.hpp"
 #include "itt.hpp"
 
 namespace ngraph {
@@ -29,7 +30,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
     //    - single responsibility
     //    - keep AddTransformation and AddToSubtractTransformation transformations independent and optional
     const auto add = ov::as_type_ptr<opset1::Add>(op);
-    if (add == nullptr) {
+    if (add == nullptr || ov::marked_as_bias(add)) {
         return nullptr;
     }
 
@@ -40,17 +41,8 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
     if (constBranchIndex == -1) {
         return nullptr;
     }
-    const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
-
-    const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
-    if (ov::is_type<opset1::Convolution>(parent) ||
-        ov::is_type<opset1::GroupConvolution>(parent) ||
-        ov::is_type<opset1::ConvolutionBackpropData>(parent) ||
-        (ov::is_type<opset1::MatMul>(parent) &&
-        (ov::is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || ov::is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
-        return nullptr;
-    }
 
+    const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
     auto constant = fold<opset1::Negative>(add->input_value(constBranchIndex));
     auto constOutput = constant->output(0);
 
diff --git a/src/common/low_precision_transformations/src/eltwise_base_transformation.cpp b/src/common/low_precision_transformations/src/eltwise_base_transformation.cpp
index 106eef1d7fc408..293d28dcebdda7 100644
--- a/src/common/low_precision_transformations/src/eltwise_base_transformation.cpp
+++ b/src/common/low_precision_transformations/src/eltwise_base_transformation.cpp
@@ -10,6 +10,7 @@
 #include <vector>
 
 #include "low_precision/network_helper.hpp"
+#include "low_precision/rt_info/bias_attribute.hpp"
 
 using namespace ngraph;
 using namespace ngraph::pass;
@@ -70,8 +71,18 @@ static std::shared_ptr<Node> getDataParent(const std::shared_ptr<Node> branchDat
         parent = parent->get_input_node_shared_ptr(0);
     }
 
-    if (ov::is_type<opset1::Add>(parent) && isTargetType(parent->get_input_node_shared_ptr(0))) {
-        return parent->get_input_node_shared_ptr(0);
+    if (ov::marked_as_bias(parent)) {
+        const auto bias_parent = parent->get_input_node_shared_ptr(0);
+        // target node just before bias
+        if (isTargetType(bias_parent)) {
+            return bias_parent;
+        }
+        // between target node and bias are placed some DQ operations
+        const auto dq = NetworkHelper::getDequantization(parent->get_input_node_shared_ptr(0));
+        const auto data_node = dq.data.get_node_shared_ptr();
+        if (isTargetType(data_node)) {
+            return data_node;
+        }
     }
     return parent;
 }
diff --git a/src/common/low_precision_transformations/src/fake_quantize.cpp b/src/common/low_precision_transformations/src/fake_quantize.cpp
index 7ab60dca33908c..3dc308962b2197 100644
--- a/src/common/low_precision_transformations/src/fake_quantize.cpp
+++ b/src/common/low_precision_transformations/src/fake_quantize.cpp
@@ -10,6 +10,7 @@
 #include <ngraph/pattern/op/wrap_type.hpp>
 
 #include "low_precision/network_helper.hpp"
+#include "low_precision/rt_info/bias_attribute.hpp"
 #include "itt.hpp"
 
 namespace ngraph {
@@ -191,17 +192,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
 
         inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
         inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
-    } else if (ov::is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
-        if (ov::is_type<opset1::Convolution>(fq::getDataNode(eltwise)) ||
-            ov::is_type<opset1::GroupConvolution>(fq::getDataNode(eltwise)) ||
-            ov::is_type<opset1::ConvolutionBackpropData>(fq::getDataNode(eltwise)) ||
-            ov::is_type<opset1::MatMul>(fq::getDataNode(eltwise)) ||
-            ov::is_type<opset1::GroupConvolutionBackpropData>(fq::getDataNode(eltwise))) {
-            return nullptr;
-        }
-
+    } else if (ov::is_type<opset1::Add>(eltwise) && checkElementwise(eltwise) && !ov::marked_as_bias(eltwise)) {
         const auto value = foldConvert(constant, element::f32);
-
         inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
         inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
     } else if (ov::is_type<opset1::Convert>(eltwise)) {
diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp
index 0a476b6f436b6b..1ed6ba9641271a 100644
--- a/src/common/low_precision_transformations/src/low_precision.cpp
+++ b/src/common/low_precision_transformations/src/low_precision.cpp
@@ -19,6 +19,7 @@
 
 #include "low_precision/align_quantization_intervals.hpp"
 #include "low_precision/fake_quantize_decomposition.hpp"
+#include "low_precision/markup_bias.hpp"
 #include "low_precision/markup_precisions.hpp"
 #include "low_precision/markup_can_be_quantized.hpp"
 #include "low_precision/markup_avg_pool_precision_preserved.hpp"
@@ -201,6 +202,7 @@ bool ngraph::pass::low_precision::MarkupOptimizations::run_on_model(const std::s
         markup.register_pass<low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);
         markup.register_pass<low_precision::AlignQuantizationParameters>(params.defaultPrecisions);
     }
+    markup.register_pass<low_precision::MarkupBias>();
     markup.run_passes(f);
     return false;
 }
diff --git a/src/common/low_precision_transformations/src/markup_bias.cpp b/src/common/low_precision_transformations/src/markup_bias.cpp
new file mode 100644
index 00000000000000..50ff66c0366d31
--- /dev/null
+++ b/src/common/low_precision_transformations/src/markup_bias.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/markup_bias.hpp"
+
+#include <memory>
+#include <openvino/opsets/opset1.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+
+#include "itt.hpp"
+#include "low_precision/rt_info/bias_attribute.hpp"
+
+using namespace ngraph::pass::low_precision;
+
+MarkupBias::MarkupBias() {
+    MATCHER_SCOPE(MarkupBias);
+    auto layer_m = ov::pass::pattern::wrap_type<ov::opset1::Convolution,
+                                                ov::opset1::GroupConvolution,
+                                                ov::opset1::ConvolutionBackpropData,
+                                                ov::opset1::GroupConvolutionBackpropData,
+                                                ov::opset1::MatMul>(ov::pass::pattern::has_static_rank());
+    auto bias_const_m = ov::pass::pattern::wrap_type<ov::opset1::Constant>();
+    auto bias_m = ov::pass::pattern::wrap_type<ov::opset1::Add>({layer_m, bias_const_m});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto& const_shape = pattern_map.at(bias_const_m).get_shape();
+
+        const bool per_channel = std::count_if(const_shape.begin(), const_shape.end(), [](size_t x) { return x > 1; }) == 1;
+        if (ov::shape_size(const_shape) == 1 || per_channel) {
+            const auto bias = pattern_map.at(bias_m).get_node_shared_ptr();
+            ov::mark_as_bias(bias);
+        }
+
+        return false;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(bias_m, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/low_precision_transformations/src/network_helper.cpp b/src/common/low_precision_transformations/src/network_helper.cpp
index c7db834e0b2848..b5ea92e61ff66a 100644
--- a/src/common/low_precision_transformations/src/network_helper.cpp
+++ b/src/common/low_precision_transformations/src/network_helper.cpp
@@ -1286,7 +1286,14 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt
         return 1ul;
     };
 
-    Output<Node> dataNode = inPlace ? std::const_pointer_cast<Node>(node)->output(0) : node->input_value(parentIndex);
+    Output<Node> dataNode;
+    if (inPlace) {
+        dataNode = std::const_pointer_cast<Node>(node);
+    } else {
+        if (parentIndex >= node->get_input_size())
+            return FakeQuantizeDequantization();
+        dataNode = node->input_value(parentIndex);
+    }
 
     const std::shared_ptr<ngraph::opset1::Multiply> multiply = ov::as_type_ptr<ngraph::opset1::Multiply>(dataNode.get_node_shared_ptr());
     std::shared_ptr<opset1::Constant> multiplyConstant;
diff --git a/src/common/low_precision_transformations/src/rt_info/bias_attribute.cpp b/src/common/low_precision_transformations/src/rt_info/bias_attribute.cpp
new file mode 100644
index 00000000000000..40d75e06f87939
--- /dev/null
+++ b/src/common/low_precision_transformations/src/rt_info/bias_attribute.cpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/rt_info/bias_attribute.hpp"
+#include "low_precision/network_helper.hpp"
+
+#include <iterator>
+#include <memory>
+#include <openvino/opsets/opset1.hpp>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+void ov::mark_as_bias(const std::shared_ptr<ov::Node>& node) {
+    auto& rt = node->get_rt_info();
+    rt[ov::BiasAttribute::get_type_info_static()] = ov::BiasAttribute();
+}
+
+bool ov::marked_as_bias(const std::shared_ptr<const ov::Node>& node) {
+    const auto& rt_info = node->get_rt_info();
+    return rt_info.find(ov::BiasAttribute::get_type_info_static()) != rt_info.end();
+}
+
+bool ov::BiasAttribute::is_copyable(const std::shared_ptr<ov::Node>& to) const {
+    return ov::is_type<ov::opset1::Add>(to) && ngraph::pass::low_precision::NetworkHelper::getConstantInput(to) != nullptr;
+}
diff --git a/src/common/low_precision_transformations/tests/markup_bias_transformation.cpp b/src/common/low_precision_transformations/tests/markup_bias_transformation.cpp
new file mode 100644
index 00000000000000..3fa1966596999a
--- /dev/null
+++ b/src/common/low_precision_transformations/tests/markup_bias_transformation.cpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <low_precision/markup_bias.hpp>
+#include <low_precision/rt_info/bias_attribute.hpp>
+#include <memory>
+#include <string>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "layer_transformation.hpp"
+#include "lpt_ngraph_functions/markup_bias_function.hpp"
+#include "simple_low_precision_transformer.hpp"
+
+using namespace testing;
+
+class MarkupBiasTestParams {
+public:
+    ov::PartialShape input_shape;
+    ov::PartialShape bias_shape;
+    bool is_bias;
+};
+
+using MarkupBiasTestValues = std::tuple<ov::element::Type, MarkupBiasTestParams, std::string>;
+
+class MarkupBiasTests : public testing::WithParamInterface<MarkupBiasTestValues>, public LayerTransformation {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<MarkupBiasTestValues>& obj) {
+        ov::element::Type precision;
+        MarkupBiasTestParams test_values;
+        std::string layer_type;
+        std::tie(precision, test_values, layer_type) = obj.param;
+
+        std::ostringstream result;
+        result << precision << "IS=" << test_values.input_shape << "_bias_shape=" << test_values.bias_shape << "_"
+               << layer_type << "_is_bias=" << test_values.is_bias;
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        ov::element::Type precision;
+        MarkupBiasTestParams test_values;
+        std::string layer_type;
+        std::tie(precision, test_values, layer_type) = GetParam();
+
+        actualFunction = ngraph::builder::subgraph::MarkupBiasFunction::get(precision,
+                                                                            test_values.input_shape,
+                                                                            test_values.bias_shape,
+                                                                            layer_type);
+        SimpleLowPrecisionTransformer transformer;
+        transformer.transform(actualFunction);
+    }
+};
+
+TEST_P(MarkupBiasTests, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+
+    const auto addOps = LayerTransformation::get<opset1::Add>(actualFunction);
+    EXPECT_EQ(1ul, addOps.size()) << "unexpected addOps size";
+
+    const bool is_bias = std::get<1>(GetParam()).is_bias;
+    auto biasAttr = ngraph::pass::low_precision::getAttribute<ov::BiasAttribute>(addOps[0]);
+    EXPECT_EQ(!biasAttr.empty(), is_bias) << "Bias markup failed";
+}
+
+namespace MarkupBiasTestsInstantiation {
+std::vector<ov::element::Type> precisions = {
+    ov::element::f32,
+};
+
+std::vector<MarkupBiasTestParams> test_params_4d = {
+    {{1, 10, 16, 16}, {1, 10, 1, 1}, true},
+    {{1, 10, 16, 16}, {1, 1, 1, 1}, true},
+    {{1, 10, 16, 16}, {1, 10, 16, 16}, false},
+    {{1, 10, 16, 16}, ov::PartialShape::dynamic(), false},
+};
+
+std::vector<std::string> layer_types_4d = {
+    "Convolution",
+    "GroupConvolution",
+    "ConvolutionBackpropData",
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LPT_4D_Positive,
+                         MarkupBiasTests,
+                         ::testing::Combine(::testing::ValuesIn(precisions),
+                                            ::testing::ValuesIn(test_params_4d),
+                                            ::testing::ValuesIn(layer_types_4d)),
+                         MarkupBiasTests::getTestCaseName);
+
+std::vector<MarkupBiasTestParams> test_params_2d = {
+    {{1, 10}, {1, 10}, true},
+    {{1, 10}, {1, 1}, true},
+    {{1, 10}, ov::PartialShape::dynamic(), false},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LPT_2D_Positive,
+                         MarkupBiasTests,
+                         ::testing::Combine(::testing::ValuesIn(precisions),
+                                            ::testing::ValuesIn(test_params_2d),
+                                            ::testing::Values("MatMulWithConstant")),
+                         MarkupBiasTests::getTestCaseName);
+
+}  // namespace MarkupBiasTestsInstantiation
diff --git a/src/common/low_precision_transformations/tests/simple_low_precision_transformer.cpp b/src/common/low_precision_transformations/tests/simple_low_precision_transformer.cpp
index 686390d53fce3f..a83c6f8b5d8248 100644
--- a/src/common/low_precision_transformations/tests/simple_low_precision_transformer.cpp
+++ b/src/common/low_precision_transformations/tests/simple_low_precision_transformer.cpp
@@ -7,6 +7,7 @@
 #include <low_precision/align_quantization_parameters.hpp>
 #include <low_precision/layer_transformation.hpp>
 #include <low_precision/low_precision.hpp>
+#include <low_precision/markup_bias.hpp>
 #include <low_precision/markup_can_be_quantized.hpp>
 #include <low_precision/markup_quantization_granularity.hpp>
 #include <low_precision/transformation_context.hpp>
@@ -26,6 +27,7 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
 
     // TODO: use one pass manager
     markup = std::make_shared<ngraph::pass::Manager>(passConfig);
+    markup->register_pass<ngraph::pass::low_precision::MarkupBias>();
     markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
     markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions,
                                                                          params.defaultPrecisions);
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index 245fe2c92d0eec..a6a9ff7b995a0b 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -2365,10 +2365,11 @@ bool Eltwise::canBeInPlace() const {
 
 void Eltwise::fuseInto(NodePtr& parentNode) {
     // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
-    specialConvolutionAddFusing = (parentNode->getType() == Type::Convolution
-                                    || parentNode->getType() == Type::BinaryConvolution)
-                                        && getAlgorithm() == Algorithm::EltwiseAdd &&
-            dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims());
+    specialConvolutionAddFusing =
+        (parentNode->getType() == Type::Convolution || parentNode->getType() == Type::BinaryConvolution) &&
+        getAlgorithm() == Algorithm::EltwiseAdd &&
+        dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims()) &&
+        !getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(1)->getParent()->isConstant();
     if ((scales.empty() && shifts.empty()) &&
         !specialConvolutionAddFusing &&
         canBePerformedAsScaleShift(parentNode.get())) {
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_layer_dq_bias.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_layer_dq_bias.cpp
new file mode 100644
index 00000000000000..68e5fbdbf81754
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_layer_dq_bias.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lpt_ngraph_functions/markup_bias_function.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "test_utils/fusing_test_utils.hpp"
+
+using namespace ngraph;
+using namespace ov::test;
+using namespace CPUTestUtils;
+using namespace InferenceEngine;
+
+namespace SubgraphTestsDefinitions {
+using FQLayerDQBiasParams = std::tuple<InputShape, std::string>;
+
+class FQLayerDQBias : virtual public SubgraphBaseTest,
+                      public CpuTestWithFusing,
+                      public testing::WithParamInterface<FQLayerDQBiasParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<FQLayerDQBiasParams> obj) {
+        InputShape input_shape;
+        std::string layer_type;
+        std::tie(input_shape, layer_type) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=(" << CommonTestUtils::partialShape2str({input_shape.first}) << ")_TS=(";
+        for (const auto& item : input_shape.second) {
+            result << CommonTestUtils::vec2str(item) << "_";
+        }
+        result << ")_layer_type=" << layer_type;
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        InputShape input_shape;
+        std::string layer_type;
+        std::tie(input_shape, layer_type) = GetParam();
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        fusedOps = std::vector<std::string>{"Add"};
+        std::tie(inFmts, outFmts, priority, selectedType) = CPUSpecificParams{{}, {}, {}, CPUTestsBase::any_type};
+        std::unordered_map<std::string, std::string> ngraph_type_to_plugin_type{
+            {"Convolution", "Convolution"},
+            {"GroupConvolution", "Convolution"},
+            {"ConvolutionBackpropData", "Deconvolution"},
+            {"MatMul", "MatMul"},
+            {"MatMulWithConstant", "FullyConnected"},
+        };
+        node_type = ngraph_type_to_plugin_type[layer_type];
+
+        const auto shapes = layer_type == "MatMul" ? std::vector<InputShape>{input_shape, input_shape}
+                                                   : std::vector<InputShape>{input_shape};
+        init_input_shapes(shapes);
+        function = ngraph::builder::subgraph::MarkupBiasFunction::get(ov::element::f32, inputDynamicShapes[0], {}, layer_type);
+    }
+
+    std::string node_type;
+};
+
+TEST_P(FQLayerDQBias, smoke_CompareWithRefs) {
+    run();
+    CheckPluginRelatedResults(compiledModel, node_type);
+}
+
+namespace {
+const std::vector<InputShape> input_shapes_4D_static = {
+    {{}, {{1, 3, 1, 1}}},
+    {{}, {{1, 3, 64, 64}}}
+};
+
+const std::vector<std::string> layer_types_4D_static = {
+    "Convolution",
+    "GroupConvolution",
+    "ConvolutionBackpropData",
+    "MatMul",
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_4D_static, FQLayerDQBias,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_4D_static),
+                                            ::testing::ValuesIn(layer_types_4D_static)),
+                         FQLayerDQBias::getTestCaseName);
+
+const std::vector<InputShape> input_shapes_4D_dynamic = {
+    {{-1, 3, -1, -1}, {{1, 3, 64, 64}}}
+};
+
+const std::vector<std::string> layer_types_4D_dynamic = {
+    "Convolution",
+    "GroupConvolution",
+    "MatMul",
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_4D_dynamic, FQLayerDQBias,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_4D_dynamic),
+                                            ::testing::ValuesIn(layer_types_4D_dynamic)),
+                         FQLayerDQBias::getTestCaseName);
+
+const std::vector<InputShape> input_shapes_2D = {
+    {{-1, 768}, {{1, 768}}}
+};
+
+const std::vector<std::string> layer_types_2D = {
+    "MatMulWithConstant",
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_2D, FQLayerDQBias,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes_2D),
+                                            ::testing::ValuesIn(layer_types_2D)),
+                         FQLayerDQBias::getTestCaseName);
+
+} // namespace
+} // namespace SubgraphTestsDefinitions
diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/markup_bias_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/markup_bias_function.hpp
new file mode 100644
index 00000000000000..4b53c19b57ba2f
--- /dev/null
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/markup_bias_function.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <openvino/core/model.hpp>
+
+#include "common/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+class MarkupBiasFunction {
+public:
+    static std::shared_ptr<ov::Model> get(const ov::element::Type& precision,
+                                          const ov::PartialShape& input_shape,
+                                          const ov::PartialShape& add_shape,
+                                          const std::string& operation_type);
+};
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/markup_bias_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/markup_bias_function.cpp
new file mode 100644
index 00000000000000..821ad174c23866
--- /dev/null
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/markup_bias_function.cpp
@@ -0,0 +1,112 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/opsets/opset1.hpp>
+#include "lpt_ngraph_functions/markup_bias_function.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+std::shared_ptr<ov::Model> MarkupBiasFunction::get(const ov::element::Type& precision,
+                                                   const ov::PartialShape& input_shape,
+                                                   const ov::PartialShape& add_shape,
+                                                   const std::string& layer_type) {
+    auto input_params = builder::makeDynamicParams(precision, {input_shape});
+    auto il = opset1::Constant::create(precision, {}, {0.f});
+    auto ih = opset1::Constant::create(precision, {}, {12.5f});
+    auto ol = opset1::Constant::create(precision, {}, {0.f});
+    auto oh = opset1::Constant::create(precision, {}, {12.5f});
+    auto fq = std::make_shared<opset1::FakeQuantize>(input_params[0], il, ih, ol, oh, 256);
+
+    std::shared_ptr<ov::Node> layer;
+    const size_t out_channels = 10;
+    if (layer_type == "Convolution") {
+        const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
+        auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{out_channels, in_channels, 1, 1}, {}, true);
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
+        auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+
+        const ov::Strides strides = {1, 1};
+        const ov::CoordinateDiff pads_begin = {0, 0};
+        const ov::CoordinateDiff pads_end = {0, 0};
+        const ov::Strides dilations = {1, 1};
+        layer = std::make_shared<ov::opset1::Convolution>(fq, mul, strides, pads_begin, pads_end, dilations);
+    } else if (layer_type == "GroupConvolution") {
+        const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
+        auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{in_channels, 1, 1, 1}, {}, true);
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
+        auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+
+        std::vector<int32_t> target_shape{static_cast<int32_t>(in_channels), 1, 1, 1, 1};
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {5}, target_shape);
+        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, true);
+
+        const ov::Strides strides = {1, 1};
+        const ov::CoordinateDiff pads_begin = {0, 0};
+        const ov::CoordinateDiff pads_end = {0, 0};
+        const ov::Strides dilations = {1, 1};
+        layer = std::make_shared<ov::opset1::GroupConvolution>(fq, reshape, strides, pads_begin, pads_end, dilations);
+    } else if (layer_type == "ConvolutionBackpropData") {
+        const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
+        auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{in_channels, out_channels, 1, 1}, {}, true);
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
+        auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+
+        const ov::Strides strides = {1, 1};
+        const ov::CoordinateDiff pads_begin = {0, 0};
+        const ov::CoordinateDiff pads_end = {0, 0};
+        const ov::Strides dilations = {1, 1};
+        layer = std::make_shared<ov::opset1::ConvolutionBackpropData>(fq, mul, strides, pads_begin, pads_end, dilations);
+    } else if (layer_type == "MatMul") {
+        auto new_param = std::make_shared<ov::opset1::Parameter>(precision, input_shape);
+        input_params.push_back(new_param);
+        auto il_2 = opset1::Constant::create(precision, {}, {-128.f});
+        auto ih_2 = opset1::Constant::create(precision, {}, {127.f});
+        auto ol_2 = opset1::Constant::create(precision, {}, {-128.f});
+        auto oh_2 = opset1::Constant::create(precision, {}, {127.f});
+        auto fq_2 = std::make_shared<opset1::FakeQuantize>(new_param, il_2, ih_2, ol_2, oh_2, 256);
+        layer = std::make_shared<ov::opset1::MatMul>(fq, fq_2, false, true);
+    } else if (layer_type == "MatMulWithConstant") {
+        const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
+        auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{out_channels, in_channels}, {}, true);
+        auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
+        auto mul_const = builder::makeConstant<float>(precision, Shape{out_channels, 1}, {}, true);
+        auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
+        layer = std::make_shared<ov::opset1::MatMul>(fq, mul, false, true);
+    } else {
+        throw std::runtime_error("Unsupported layer type");
+    }
+
+    layer->set_friendly_name(layer_type);
+
+    std::shared_ptr<ov::Node> add_input;
+    // empty add_shape means that add_input must be generated automatically
+    if (add_shape.is_static() && add_shape.size() == 0) {
+        const auto& out_shape = layer->get_output_partial_shape(0);
+        Shape bias_shape(out_shape.size(), 1);
+        if (layer_type != "MatMul") {
+            bias_shape[1] = out_shape[1].get_length();
+        }
+        add_input = builder::makeConstant<float>(precision, bias_shape, {}, true);
+    } else {
+        if (add_shape.is_static()) {
+            add_input = builder::makeConstant<float>(precision, add_shape.to_shape(), {}, true);
+        } else {
+            auto new_param = std::make_shared<ov::opset1::Parameter>(precision, input_shape);
+            input_params.push_back(new_param);
+            add_input = new_param;
+        }
+    }
+
+    auto add = std::make_shared<ov::opset1::Add>(layer, add_input);
+    return std::make_shared<ov::Model>(add, input_params);
+}
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph

From 5ccc743707103d31b9cd58a68f25de9c8e5e708b Mon Sep 17 00:00:00 2001
From: Sofya Balandina <sofya.balandina@intel.com>
Date: Thu, 6 Apr 2023 17:24:13 +0100
Subject: [PATCH 274/296] [apiConformance] Fix relative all (#16518)

---
 .../shared/include/base/ov_behavior_test_utils.hpp    |  2 +-
 .../ie_test_utils/common_test_utils/test_common.cpp   | 11 +++++++++++
 .../ie_test_utils/common_test_utils/test_common.hpp   |  1 +
 .../functional_test_utils/src/summary/api_summary.cpp |  3 ++-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
index 010ec941e5db90..eb8afb5e1cc16b 100644
--- a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
+++ b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp
@@ -62,7 +62,7 @@ class APIBaseTest : public CommonTestUtils::TestsCommon {
 
     void SetUp() override {
         set_api_entity();
-        auto test_name = this->GetTestName();
+        auto test_name = this->GetFullTestName();
         k = test_name.find("_mandatory") != std::string::npos || test_name.find("mandatory_") != std::string::npos ? 1 : 0;
         std::cout << "[ CONFORMANCE ] Influence coefficient: " << k << std::endl;
         api_summary.updateStat(api_entity, target_device, ov::test::utils::PassRate::Statuses::CRASHED, k);
diff --git a/src/tests/ie_test_utils/common_test_utils/test_common.cpp b/src/tests/ie_test_utils/common_test_utils/test_common.cpp
index 028163ee21d2ce..6aaebac03d6a90 100644
--- a/src/tests/ie_test_utils/common_test_utils/test_common.cpp
+++ b/src/tests/ie_test_utils/common_test_utils/test_common.cpp
@@ -83,4 +83,15 @@ std::string TestsCommon::GetTestName() const {
     return test_name;
 }
 
+std::string TestsCommon::GetFullTestName() const {
+    std::string suite_name =
+        ::testing::UnitTest::GetInstance()->current_test_info()->test_suite_name();
+    std::replace_if(suite_name.begin(), suite_name.end(),
+        [](char c) { return !std::isalnum(c); }, '_');
+
+    std::string test_name = GetTestName();
+
+    return suite_name + "_" + test_name;
+}
+
 }  // namespace CommonTestUtils
diff --git a/src/tests/ie_test_utils/common_test_utils/test_common.hpp b/src/tests/ie_test_utils/common_test_utils/test_common.hpp
index f89364cb9d5913..872bb339766ddd 100644
--- a/src/tests/ie_test_utils/common_test_utils/test_common.hpp
+++ b/src/tests/ie_test_utils/common_test_utils/test_common.hpp
@@ -18,6 +18,7 @@ class TestsCommon : virtual public ::testing::Test {
 
     static std::string GetTimestamp();
     std::string GetTestName() const;
+    std::string GetFullTestName() const;
 };
 
 }  // namespace CommonTestUtils
diff --git a/src/tests/ie_test_utils/functional_test_utils/src/summary/api_summary.cpp b/src/tests/ie_test_utils/functional_test_utils/src/summary/api_summary.cpp
index 2ef03cdab04ffc..48cb3f6e32b336 100644
--- a/src/tests/ie_test_utils/functional_test_utils/src/summary/api_summary.cpp
+++ b/src/tests/ie_test_utils/functional_test_utils/src/summary/api_summary.cpp
@@ -69,12 +69,13 @@ void ApiSummary::updateStat(ov_entity entity, const std::string& target_device,
     if (isCrashReported) {
         cur_stat[real_device].crashed--;
         isCrashReported = false;
+    } else {
+        cur_stat[real_device].rel_all += rel_influence_coef;
     }
     if (isHangReported) {
         isHangReported = false;
         return;
     }
-    cur_stat[real_device].rel_all += rel_influence_coef;
     switch (status) {
         case PassRate::Statuses::SKIPPED: {
             cur_stat[real_device].skipped++;

From d2deae225a175502ed92e78df1b44d4753d66a7d Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 6 Apr 2023 20:33:32 +0400
Subject: [PATCH 275/296] Added rpath for TBB libs to find hwloc (#16788)

---
 src/bindings/python/wheel/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py
index 3b07d8e6ffeb15..3515af49c8fbc9 100644
--- a/src/bindings/python/wheel/setup.py
+++ b/src/bindings/python/wheel/setup.py
@@ -98,6 +98,7 @@
         "name": "tbb",
         "prefix": "libs.tbb",
         "install_dir": TBB_LIBS_DIR,
+        "rpath": LIBS_RPATH,
         "binary_dir": OPENVINO_BUILD_DIR,
     },
     "pugixml_libs": {

From 4812879318cfcb5e81fd74e9fce379d377c02840 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Thu, 6 Apr 2023 20:42:01 +0400
Subject: [PATCH 276/296] Enable transformation callback for TS transformations
 (#16767)

---
 .../src/transformations/transpose_sinking/ts_binary.cpp   | 7 +++++++
 .../src/transformations/transpose_sinking/ts_concat.cpp   | 7 +++++++
 .../transpose_sinking/ts_data_movement.cpp                | 7 +++++++
 .../transformations/transpose_sinking/ts_interpolate.cpp  | 7 +++++++
 .../transformations/transpose_sinking/ts_reduction.cpp    | 8 ++++++++
 .../src/transformations/transpose_sinking/ts_slice.cpp    | 7 +++++++
 .../src/transformations/transpose_sinking/ts_split.cpp    | 6 +++++-
 .../src/transformations/transpose_sinking/ts_squeeze.cpp  | 6 ++++++
 .../src/transformations/transpose_sinking/ts_unary.cpp    | 6 ++++++
 .../transformations/transpose_sinking/ts_unsqueeze.cpp    | 6 ++++++
 10 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_binary.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_binary.cpp
index ef46b4143c2305..f4fbeaefb1f96a 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_binary.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_binary.cpp
@@ -32,6 +32,10 @@ TSBinaryForward::TSBinaryForward() {
         const auto& pattern_to_output = m.get_pattern_value_map();
         auto& main_node_output = pattern_to_output.at(main_node_label);
         auto main_node = main_node_output.get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         TransposeInputsInfo transpose_input_info = GetFirstTransposeInput(main_node);
 
         // todo: support dynamic rank case
@@ -73,6 +77,9 @@ TSBinaryBackward::TSBinaryBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
 
         for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node, transpose_const)) {
             register_new_node(new_node);
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp
index 4694b94565009a..726f40f216c9a9 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp
@@ -27,6 +27,9 @@ TSConcatForward::TSConcatForward() {
 
         auto& main_node_output = pattern_to_output.at(main_node_label);
         auto main_node = main_node_output.get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
 
         TransposeInputsInfo transpose_input_info = GetFirstTransposeInput(main_node);
         auto concat_node = as_type_ptr<Concat>(main_node);
@@ -77,6 +80,10 @@ TSConcatBackward::TSConcatBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         auto concat_node = as_type_ptr<Concat>(main_node);
         auto concat_axis = concat_node->get_concatenation_axis();
         if (concat_axis < 0) {
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_data_movement.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_data_movement.cpp
index 482841d2c6532f..c59fb887be5a0b 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_data_movement.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_data_movement.cpp
@@ -29,6 +29,10 @@ TSDataMovementForward::TSDataMovementForward() {
         const auto& pattern_to_node = m.get_pattern_map();
 
         auto& main_node = pattern_to_node.at(main_node_label);
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         auto transpose = std::dynamic_pointer_cast<Transpose>(pattern_to_node.at(transpose_label));
         if (!transpose) {
             return false;
@@ -92,6 +96,9 @@ TSDataMovementBackward::TSDataMovementBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
 
         for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node,
                                                                        transpose_const,
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_interpolate.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_interpolate.cpp
index 0a9c2b7458f019..88d603aba01d43 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_interpolate.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_interpolate.cpp
@@ -29,6 +29,10 @@ TSInterpolateForward::TSInterpolateForward() {
         const auto& pattern_to_node = m.get_pattern_map();
 
         auto& main_node = pattern_to_node.at(main_node_label);
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         auto transpose = std::dynamic_pointer_cast<Transpose>(pattern_to_node.at(transpose_label));
         if (!transpose) {
             return false;
@@ -102,6 +106,9 @@ TSInterpolateBackward::TSInterpolateBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
 
         for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node,
                                                                        transpose_const,
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
index a5b608f5b996b0..1b10c01e6967b0 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_reduction.cpp
@@ -50,6 +50,10 @@ TSReductionForward::TSReductionForward() {
 
         auto transpose = pattern_to_output.at(transpose_label);
         auto reduction = pattern_to_output.at(reduce_label);
+        if (transformation_callback(reduction)) {
+            return false;
+        }
+
         auto keep_dims = get_keep_dims(reduction);
 
         auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
@@ -107,6 +111,10 @@ TSReductionBackward::TSReductionBackward() {
         const auto& pattern_to_output = m.get_pattern_map();
         auto transpose = pattern_to_output.at(transpose_label);
         auto reduction = pattern_to_output.at(reduce_label);
+        if (transformation_callback(reduction)) {
+            return false;
+        }
+
         auto keep_dims = get_keep_dims(reduction);
 
         auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
index de4390f07eeeca..e8b0f5c663d698 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
@@ -29,6 +29,10 @@ TSSliceForward::TSSliceForward() {
         const auto& pattern_to_node = m.get_pattern_map();
 
         auto& main_node = pattern_to_node.at(main_node_label);
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         auto transpose = std::dynamic_pointer_cast<Transpose>(pattern_to_node.at(transpose_label));
         if (!transpose || main_node->get_input_size() < 5) {
             return false;
@@ -84,6 +88,9 @@ TSSliceBackward::TSSliceBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
 
         if (main_node->get_input_size() < 5) {
             return false;
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_split.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_split.cpp
index 3aeb74436e7783..38c5e5c9c2806b 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_split.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_split.cpp
@@ -134,7 +134,7 @@ TSSplitBackward::TSSplitBackward() {
             split = FindInputNode<VariadicSplit>(transpose_label_node);
         }
 
-        if (!split) {
+        if (!split || transformation_callback(split)) {
             return false;
         }
         auto split_axis_constant = as_type_ptr<Constant>(split->input_value(1).get_node_shared_ptr());
@@ -200,6 +200,10 @@ TSSplitForward::TSSplitForward() {
 
         auto& main_node_output = pattern_to_output.at(main_node_label);
         auto main_node = main_node_output.get_node_shared_ptr();
+        if (transformation_callback(main_node)) {
+            return false;
+        }
+
         auto split_axis_constant = as_type_ptr<Constant>(main_node->input_value(1).get_node_shared_ptr());
         if (!split_axis_constant) {
             return false;
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
index 8ff816bd3f1035..f2954701e347cf 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_squeeze.cpp
@@ -109,6 +109,9 @@ TSSqueezeForward::TSSqueezeForward() {
 
         auto transpose = pattern_to_output.at(transpose_label);
         auto squeeze = pattern_to_output.at(squeeze_label);
+        if (transformation_callback(squeeze)) {
+            return false;
+        }
 
         auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
         auto squeeze_axes = as_type_ptr<Constant>(squeeze->get_input_node_shared_ptr(1));
@@ -196,6 +199,9 @@ TSSqueezeBackward::TSSqueezeBackward() {
 
         auto transpose = pattern_to_output.at(transpose_label);
         auto squeeze = pattern_to_output.at(squeeze_label);
+        if (transformation_callback(squeeze)) {
+            return false;
+        }
 
         auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
         auto squeeze_axes = as_type_ptr<Constant>(squeeze->get_input_node_shared_ptr(1));
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_unary.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_unary.cpp
index ed543ca9d92139..e3147d5cf7a190 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_unary.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_unary.cpp
@@ -64,6 +64,9 @@ TSUnaryForward::TSUnaryForward() {
         const auto& pattern_to_output = m.get_pattern_value_map();
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
+        if (transformation_callback(unary)) {
+            return false;
+        }
 
         const NodePair new_nodes = SwapNodes(transpose, unary);
 
@@ -105,6 +108,9 @@ TSUnaryBackward::TSUnaryBackward() {
         auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
         auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
         auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
+        if (transformation_callback(unary)) {
+            return false;
+        }
 
         for (auto& new_node : sink_backward::InsertTransposeBeforeNode(unary, transpose_const)) {
             register_new_node(new_node);
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
index 9d9416d8e38156..3b92a2072a7099 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp
@@ -110,6 +110,9 @@ TSUnsqueezeForward::TSUnsqueezeForward() {
 
         auto transpose = pattern_to_output.at(transpose_label);
         auto unsqueeze = pattern_to_output.at(unsqueeze_label);
+        if (transformation_callback(unsqueeze)) {
+            return false;
+        }
 
         auto transpose_order = as_type_ptr<Constant>(transpose->get_input_node_shared_ptr(1));
         auto unsqueeze_axes = as_type_ptr<Constant>(unsqueeze->get_input_node_shared_ptr(1));
@@ -179,6 +182,9 @@ TSUnsqueezeBackward::TSUnsqueezeBackward() {
 
         auto transpose = pattern_to_output.at(transpose_label);
         auto unsqueeze = pattern_to_output.at(unsqueeze_label);
+        if (transformation_callback(unsqueeze)) {
+            return false;
+        }
 
         auto transpose_order = std::dynamic_pointer_cast<Constant>(transpose->get_input_node_shared_ptr(1));
         auto unsqueeze_axes = std::dynamic_pointer_cast<Constant>(unsqueeze->get_input_node_shared_ptr(1));

From c7c7c4bb051fc68af9edf63bbac4b9b0db45dc60 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Thu, 6 Apr 2023 20:59:00 +0400
Subject: [PATCH 277/296] samples/cpp remove unused code (#16787)

---
 .../cpp/benchmark_app/infer_request_wrap.hpp  |   3 +-
 .../common/utils/include/samples/common.hpp   | 683 +-----------------
 .../utils/include/samples/ocv_common.hpp      |  92 ---
 .../include/samples/vpu/vpu_tools_common.hpp  |  28 -
 samples/cpp/common/utils/src/common.cpp       |  24 -
 5 files changed, 19 insertions(+), 811 deletions(-)
 delete mode 100644 samples/cpp/common/utils/include/samples/ocv_common.hpp
 delete mode 100644 samples/cpp/common/utils/include/samples/vpu/vpu_tools_common.hpp
 delete mode 100644 samples/cpp/common/utils/src/common.cpp

diff --git a/samples/cpp/benchmark_app/infer_request_wrap.hpp b/samples/cpp/benchmark_app/infer_request_wrap.hpp
index 6e72b7559dadb3..3383bd08019fe3 100644
--- a/samples/cpp/benchmark_app/infer_request_wrap.hpp
+++ b/samples/cpp/benchmark_app/infer_request_wrap.hpp
@@ -26,8 +26,7 @@
 typedef std::function<void(size_t id, size_t group_id, const double latency, const std::exception_ptr& ptr)>
     QueueCallbackFunction;
 
-/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
-/// time.
+/// @brief Handles asynchronous callbacks and calculates execution time
 class InferReqWrap final {
 public:
     using Ptr = std::shared_ptr<InferReqWrap>;
diff --git a/samples/cpp/common/utils/include/samples/common.hpp b/samples/cpp/common/utils/include/samples/common.hpp
index f5884163df5f49..bee4be89063a71 100644
--- a/samples/cpp/common/utils/include/samples/common.hpp
+++ b/samples/cpp/common/utils/include/samples/common.hpp
@@ -146,146 +146,6 @@ inline slog::LogStream& operator<<(slog::LogStream& os, const std::map<std::stri
     return os;
 }
 
-/**
- * @class Color
- * @brief A Color class stores channels of a given color
- */
-class Color {
-private:
-    unsigned char _r;
-    unsigned char _g;
-    unsigned char _b;
-
-public:
-    /**
-     * A default constructor.
-     * @param r - value for red channel
-     * @param g - value for green channel
-     * @param b - value for blue channel
-     */
-    Color(unsigned char r, unsigned char g, unsigned char b) : _r(r), _g(g), _b(b) {}
-
-    inline unsigned char red() {
-        return _r;
-    }
-
-    inline unsigned char blue() {
-        return _b;
-    }
-
-    inline unsigned char green() {
-        return _g;
-    }
-};
-
-// TODO : keep only one version of writeOutputBMP
-
-/**
- * @brief Writes output data to image
- * @param name - image name
- * @param data - output data
- * @param classesNum - the number of classes
- * @return false if error else true
- */
-static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t classesNum, std::ostream& outFile) {
-    unsigned int seed = (unsigned int)time(NULL);
-    // Known colors for training classes from Cityscape dataset
-    static std::vector<Color> colors = {
-        {128, 64, 128}, {232, 35, 244}, {70, 70, 70},   {156, 102, 102}, {153, 153, 190}, {153, 153, 153},
-        {30, 170, 250}, {0, 220, 220},  {35, 142, 107}, {152, 251, 152}, {180, 130, 70},  {60, 20, 220},
-        {0, 0, 255},    {142, 0, 0},    {70, 0, 0},     {100, 60, 0},    {90, 0, 0},      {230, 0, 0},
-        {32, 11, 119},  {0, 74, 111},   {81, 0, 81}};
-
-    while (classesNum > colors.size()) {
-        static std::mt19937 rng(seed);
-        std::uniform_int_distribution<int> dist(0, 255);
-        Color color(dist(rng), dist(rng), dist(rng));
-        colors.push_back(color);
-    }
-
-    unsigned char file[14] = {
-        'B',
-        'M',  // magic
-        0,
-        0,
-        0,
-        0,  // size in bytes
-        0,
-        0,  // app data
-        0,
-        0,  // app data
-        40 + 14,
-        0,
-        0,
-        0  // start of data offset
-    };
-    unsigned char info[40] = {
-        40,   0,    0, 0,  // info hd size
-        0,    0,    0, 0,  // width
-        0,    0,    0, 0,  // height
-        1,    0,           // number color planes
-        24,   0,           // bits per pixel
-        0,    0,    0, 0,  // compression is none
-        0,    0,    0, 0,  // image bits size
-        0x13, 0x0B, 0, 0,  // horz resolution in pixel / m
-        0x13, 0x0B, 0, 0,  // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
-        0,    0,    0, 0,  // #colors in palette
-        0,    0,    0, 0,  // #important colors
-    };
-
-    auto height = data.size();
-    auto width = data.at(0).size();
-
-    OPENVINO_ASSERT(
-        height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max,
-        "File size is too big: ",
-        height,
-        " X ",
-        width);
-
-    int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
-    int sizeData = static_cast<int>(width * height * 3 + height * padSize);
-    int sizeAll = sizeData + sizeof(file) + sizeof(info);
-
-    file[2] = (unsigned char)(sizeAll);
-    file[3] = (unsigned char)(sizeAll >> 8);
-    file[4] = (unsigned char)(sizeAll >> 16);
-    file[5] = (unsigned char)(sizeAll >> 24);
-
-    info[4] = (unsigned char)(width);
-    info[5] = (unsigned char)(width >> 8);
-    info[6] = (unsigned char)(width >> 16);
-    info[7] = (unsigned char)(width >> 24);
-
-    int32_t negativeHeight = -(int32_t)height;
-    info[8] = (unsigned char)(negativeHeight);
-    info[9] = (unsigned char)(negativeHeight >> 8);
-    info[10] = (unsigned char)(negativeHeight >> 16);
-    info[11] = (unsigned char)(negativeHeight >> 24);
-
-    info[20] = (unsigned char)(sizeData);
-    info[21] = (unsigned char)(sizeData >> 8);
-    info[22] = (unsigned char)(sizeData >> 16);
-    info[23] = (unsigned char)(sizeData >> 24);
-
-    outFile.write(reinterpret_cast<char*>(file), sizeof(file));
-    outFile.write(reinterpret_cast<char*>(info), sizeof(info));
-
-    unsigned char pad[3] = {0, 0, 0};
-
-    for (size_t y = 0; y < height; y++) {
-        for (size_t x = 0; x < width; x++) {
-            unsigned char pixel[3];
-            size_t index = data.at(y).at(x);
-            pixel[0] = colors.at(index).red();
-            pixel[1] = colors.at(index).green();
-            pixel[2] = colors.at(index).blue();
-            outFile.write(reinterpret_cast<char*>(pixel), 3);
-        }
-        outFile.write(reinterpret_cast<char*>(pad), padSize);
-    }
-}
-
 /**
  * @brief Writes output data to BMP image
  * @param name - image name
@@ -396,7 +256,12 @@ static UNUSED void addRectangles(unsigned char* data,
                                  size_t width,
                                  std::vector<int> rectangles,
                                  std::vector<int> classes,
-                                 int thickness = 1) {
+                                 int thickness) {
+    struct Color {
+        unsigned char red;
+        unsigned char green;
+        unsigned char blue;
+    };
     std::vector<Color> colors = {// colors to be used for bounding boxes
                                  {128, 64, 128},  {232, 35, 244}, {70, 70, 70},  {156, 102, 102}, {153, 153, 190},
                                  {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107},  {152, 251, 152},
@@ -451,12 +316,12 @@ static UNUSED void addRectangles(unsigned char* data,
             shift_first = (y + t) * width * 3;
             shift_second = (y + h - t) * width * 3;
             for (int ii = x; ii < x + w + 1; ii++) {
-                data[shift_first + ii * 3] = colors.at(cls).red();
-                data[shift_first + ii * 3 + 1] = colors.at(cls).green();
-                data[shift_first + ii * 3 + 2] = colors.at(cls).blue();
-                data[shift_second + ii * 3] = colors.at(cls).red();
-                data[shift_second + ii * 3 + 1] = colors.at(cls).green();
-                data[shift_second + ii * 3 + 2] = colors.at(cls).blue();
+                data[shift_first + ii * 3] = colors.at(cls).red;
+                data[shift_first + ii * 3 + 1] = colors.at(cls).green;
+                data[shift_first + ii * 3 + 2] = colors.at(cls).blue;
+                data[shift_second + ii * 3] = colors.at(cls).red;
+                data[shift_second + ii * 3 + 1] = colors.at(cls).green;
+                data[shift_second + ii * 3 + 2] = colors.at(cls).blue;
             }
         }
 
@@ -464,510 +329,17 @@ static UNUSED void addRectangles(unsigned char* data,
             shift_first = (x + t) * 3;
             shift_second = (x + w - t) * 3;
             for (int ii = y; ii < y + h + 1; ii++) {
-                data[shift_first + ii * width * 3] = colors.at(cls).red();
-                data[shift_first + ii * width * 3 + 1] = colors.at(cls).green();
-                data[shift_first + ii * width * 3 + 2] = colors.at(cls).blue();
-                data[shift_second + ii * width * 3] = colors.at(cls).red();
-                data[shift_second + ii * width * 3 + 1] = colors.at(cls).green();
-                data[shift_second + ii * width * 3 + 2] = colors.at(cls).blue();
+                data[shift_first + ii * width * 3] = colors.at(cls).red;
+                data[shift_first + ii * width * 3 + 1] = colors.at(cls).green;
+                data[shift_first + ii * width * 3 + 2] = colors.at(cls).blue;
+                data[shift_second + ii * width * 3] = colors.at(cls).red;
+                data[shift_second + ii * width * 3 + 1] = colors.at(cls).green;
+                data[shift_second + ii * width * 3 + 2] = colors.at(cls).blue;
             }
         }
     }
 }
 
-/**
- * Write output data to image
- * \param name - image name
- * \param data - output data
- * \param classesNum - the number of classes
- * \return false if error else true
- */
-
-static UNUSED bool writeOutputBmp(unsigned char* data, size_t height, size_t width, std::ostream& outFile) {
-    unsigned char file[14] = {
-        'B',
-        'M',  // magic
-        0,
-        0,
-        0,
-        0,  // size in bytes
-        0,
-        0,  // app data
-        0,
-        0,  // app data
-        40 + 14,
-        0,
-        0,
-        0  // start of data offset
-    };
-    unsigned char info[40] = {
-        40,   0,    0, 0,  // info hd size
-        0,    0,    0, 0,  // width
-        0,    0,    0, 0,  // height
-        1,    0,           // number color planes
-        24,   0,           // bits per pixel
-        0,    0,    0, 0,  // compression is none
-        0,    0,    0, 0,  // image bits size
-        0x13, 0x0B, 0, 0,  // horz resolution in pixel / m
-        0x13, 0x0B, 0, 0,  // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
-        0,    0,    0, 0,  // #colors in palette
-        0,    0,    0, 0,  // #important colors
-    };
-
-    OPENVINO_ASSERT(
-        height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max,
-        "File size is too big: ",
-        height,
-        " X ",
-        width);
-
-    int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
-    int sizeData = static_cast<int>(width * height * 3 + height * padSize);
-    int sizeAll = sizeData + sizeof(file) + sizeof(info);
-
-    file[2] = (unsigned char)(sizeAll);
-    file[3] = (unsigned char)(sizeAll >> 8);
-    file[4] = (unsigned char)(sizeAll >> 16);
-    file[5] = (unsigned char)(sizeAll >> 24);
-
-    info[4] = (unsigned char)(width);
-    info[5] = (unsigned char)(width >> 8);
-    info[6] = (unsigned char)(width >> 16);
-    info[7] = (unsigned char)(width >> 24);
-
-    int32_t negativeHeight = -(int32_t)height;
-    info[8] = (unsigned char)(negativeHeight);
-    info[9] = (unsigned char)(negativeHeight >> 8);
-    info[10] = (unsigned char)(negativeHeight >> 16);
-    info[11] = (unsigned char)(negativeHeight >> 24);
-
-    info[20] = (unsigned char)(sizeData);
-    info[21] = (unsigned char)(sizeData >> 8);
-    info[22] = (unsigned char)(sizeData >> 16);
-    info[23] = (unsigned char)(sizeData >> 24);
-
-    outFile.write(reinterpret_cast<char*>(file), sizeof(file));
-    outFile.write(reinterpret_cast<char*>(info), sizeof(info));
-
-    unsigned char pad[3] = {0, 0, 0};
-
-    for (size_t y = 0; y < height; y++) {
-        for (size_t x = 0; x < width; x++) {
-            unsigned char pixel[3];
-            pixel[0] = data[y * width * 3 + x * 3];
-            pixel[1] = data[y * width * 3 + x * 3 + 1];
-            pixel[2] = data[y * width * 3 + x * 3 + 2];
-            outFile.write(reinterpret_cast<char*>(pixel), 3);
-        }
-        outFile.write(reinterpret_cast<char*>(pad), padSize);
-    }
-
-    return true;
-}
-
-static UNUSED void printPerformanceCounts(const std::map<std::string, ov::ProfilingInfo>& performanceMap,
-                                          std::ostream& stream,
-                                          std::string deviceName,
-                                          bool bshowHeader = true) {
-    std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
-    // Print performance counts
-    if (bshowHeader) {
-        stream << std::endl << "performance counts:" << std::endl << std::endl;
-    }
-    std::ios::fmtflags fmt(std::cout.flags());
-
-    for (const auto& it : performanceMap) {
-        std::string toPrint(it.first);
-        const int maxLayerName = 30;
-
-        if (it.first.length() >= maxLayerName) {
-            toPrint = it.first.substr(0, maxLayerName - 4);
-            toPrint += "...";
-        }
-
-        stream << std::setw(maxLayerName) << std::left << toPrint;
-        switch (it.second.status) {
-        case ov::ProfilingInfo::Status::EXECUTED:
-            stream << std::setw(15) << std::left << "EXECUTED";
-            break;
-        case ov::ProfilingInfo::Status::NOT_RUN:
-            stream << std::setw(15) << std::left << "NOT_RUN";
-            break;
-        case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
-            stream << std::setw(15) << std::left << "OPTIMIZED_OUT";
-            break;
-        }
-        stream << std::setw(30) << std::left << "layerType: " + std::string(it.second.node_type) + " ";
-        stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.real_time.count());
-        stream << std::setw(20) << std::left << "cpu: " + std::to_string(it.second.cpu_time.count());
-        stream << " execType: " << it.second.exec_type << std::endl;
-        if (it.second.real_time.count() > 0) {
-            totalTime += it.second.real_time;
-        }
-    }
-    stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime.count()) << " microseconds"
-           << std::endl;
-    stream << std::endl;
-    stream << "Full device name: " << deviceName << std::endl;
-    stream << std::endl;
-    stream.flags(fmt);
-}
-
-/**
- * @brief This class represents an object that is found by an object detection net
- */
-class DetectedObject {
-public:
-    int objectType;
-    float xmin, xmax, ymin, ymax, prob;
-    bool difficult;
-
-    DetectedObject(int _objectType,
-                   float _xmin,
-                   float _ymin,
-                   float _xmax,
-                   float _ymax,
-                   float _prob,
-                   bool _difficult = false)
-        : objectType(_objectType),
-          xmin(_xmin),
-          xmax(_xmax),
-          ymin(_ymin),
-          ymax(_ymax),
-          prob(_prob),
-          difficult(_difficult) {}
-
-    DetectedObject(const DetectedObject& other) = default;
-
-    static float ioU(const DetectedObject& detectedObject1_, const DetectedObject& detectedObject2_) {
-        // Add small space to eliminate empty squares
-        float epsilon = 0;  // 1e-5f;
-
-        DetectedObject detectedObject1(detectedObject1_.objectType,
-                                       (detectedObject1_.xmin - epsilon),
-                                       (detectedObject1_.ymin - epsilon),
-                                       (detectedObject1_.xmax - epsilon),
-                                       (detectedObject1_.ymax - epsilon),
-                                       detectedObject1_.prob);
-        DetectedObject detectedObject2(detectedObject2_.objectType,
-                                       (detectedObject2_.xmin + epsilon),
-                                       (detectedObject2_.ymin + epsilon),
-                                       (detectedObject2_.xmax),
-                                       (detectedObject2_.ymax),
-                                       detectedObject2_.prob);
-
-        if (detectedObject1.objectType != detectedObject2.objectType) {
-            // objects are different, so the result is 0
-            return 0.0f;
-        }
-
-        if (detectedObject1.xmax < detectedObject1.xmin)
-            return 0.0;
-        if (detectedObject1.ymax < detectedObject1.ymin)
-            return 0.0;
-        if (detectedObject2.xmax < detectedObject2.xmin)
-            return 0.0;
-        if (detectedObject2.ymax < detectedObject2.ymin)
-            return 0.0;
-
-        float xmin = (std::max)(detectedObject1.xmin, detectedObject2.xmin);
-        float ymin = (std::max)(detectedObject1.ymin, detectedObject2.ymin);
-        float xmax = (std::min)(detectedObject1.xmax, detectedObject2.xmax);
-        float ymax = (std::min)(detectedObject1.ymax, detectedObject2.ymax);
-
-        // Caffe adds 1 to every length if the box isn't normalized. So do we...
-        float addendum;
-        if (xmax > 1 || ymax > 1)
-            addendum = 1;
-        else
-            addendum = 0;
-
-        // intersection
-        float intr;
-        if ((xmax >= xmin) && (ymax >= ymin)) {
-            intr = (addendum + xmax - xmin) * (addendum + ymax - ymin);
-        } else {
-            intr = 0.0f;
-        }
-
-        // union
-        float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) *
-                        (addendum + detectedObject1.ymax - detectedObject1.ymin);
-        float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) *
-                        (addendum + detectedObject2.ymax - detectedObject2.ymin);
-
-        float unn = square1 + square2 - intr;
-
-        return static_cast<float>(intr) / unn;
-    }
-
-    DetectedObject scale(float scale_x, float scale_y) const {
-        return DetectedObject(objectType,
-                              xmin * scale_x,
-                              ymin * scale_y,
-                              xmax * scale_x,
-                              ymax * scale_y,
-                              prob,
-                              difficult);
-    }
-};
-
-class ImageDescription {
-public:
-    const std::list<DetectedObject> alist;
-    const bool check_probs;
-
-    explicit ImageDescription(const std::list<DetectedObject>& _alist, bool _check_probs = false)
-        : alist(_alist),
-          check_probs(_check_probs) {}
-
-    static float ioUMultiple(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) {
-        const ImageDescription *detectedObjectsSmall, *detectedObjectsBig;
-        bool check_probs = desiredObjects.check_probs;
-
-        if (detectedObjects.alist.size() < desiredObjects.alist.size()) {
-            detectedObjectsSmall = &detectedObjects;
-            detectedObjectsBig = &desiredObjects;
-        } else {
-            detectedObjectsSmall = &desiredObjects;
-            detectedObjectsBig = &detectedObjects;
-        }
-
-        std::list<DetectedObject> doS = detectedObjectsSmall->alist;
-        std::list<DetectedObject> doB = detectedObjectsBig->alist;
-
-        float fullScore = 0.0f;
-        while (doS.size() > 0) {
-            float score = 0.0f;
-            std::list<DetectedObject>::iterator bestJ = doB.end();
-            for (auto j = doB.begin(); j != doB.end(); j++) {
-                float curscore = DetectedObject::ioU(*doS.begin(), *j);
-                if (score < curscore) {
-                    score = curscore;
-                    bestJ = j;
-                }
-            }
-
-            float coeff = 1.0;
-            if (check_probs) {
-                if (bestJ != doB.end()) {
-                    float mn = std::min((*bestJ).prob, (*doS.begin()).prob);
-                    float mx = std::max((*bestJ).prob, (*doS.begin()).prob);
-
-                    coeff = mn / mx;
-                }
-            }
-
-            doS.pop_front();
-            if (bestJ != doB.end())
-                doB.erase(bestJ);
-            fullScore += coeff * score;
-        }
-        fullScore /= detectedObjectsBig->alist.size();
-
-        return fullScore;
-    }
-
-    ImageDescription scale(float scale_x, float scale_y) const {
-        std::list<DetectedObject> slist;
-        for (auto& dob : alist) {
-            slist.push_back(dob.scale(scale_x, scale_y));
-        }
-        return ImageDescription(slist, check_probs);
-    }
-};
-
-struct AveragePrecisionCalculator {
-private:
-    enum MatchKind { TruePositive, FalsePositive };
-
-    /**
-     * Here we count all TP and FP matches for all the classes in all the images.
-     */
-    std::map<int, std::vector<std::pair<double, MatchKind>>> matches;
-
-    std::map<int, int> N;
-
-    double threshold;
-
-    static bool SortBBoxDescend(const DetectedObject& bbox1, const DetectedObject& bbox2) {
-        return bbox1.prob > bbox2.prob;
-    }
-
-    static bool SortPairDescend(const std::pair<double, MatchKind>& p1, const std::pair<double, MatchKind>& p2) {
-        return p1.first > p2.first;
-    }
-
-public:
-    explicit AveragePrecisionCalculator(double _threshold) : threshold(_threshold) {}
-
-    // gt_bboxes -> des
-    // bboxes -> det
-
-    void consumeImage(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) {
-        // Collecting IoU values
-        std::vector<bool> visited(desiredObjects.alist.size(), false);
-        std::vector<DetectedObject> bboxes{std::begin(detectedObjects.alist), std::end(detectedObjects.alist)};
-        std::sort(bboxes.begin(), bboxes.end(), SortBBoxDescend);
-
-        for (auto&& detObj : bboxes) {
-            // Searching for the best match to this detection
-            // Searching for desired object
-            float overlap_max = -1;
-            int jmax = -1;
-            auto desmax = desiredObjects.alist.end();
-
-            int j = 0;
-            for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++, j++) {
-                double iou = DetectedObject::ioU(detObj, *desObj);
-                if (iou > overlap_max) {
-                    overlap_max = static_cast<float>(iou);
-                    jmax = j;
-                    desmax = desObj;
-                }
-            }
-
-            MatchKind mk;
-            if (overlap_max >= threshold) {
-                if (!desmax->difficult) {
-                    if (!visited[jmax]) {
-                        mk = TruePositive;
-                        visited[jmax] = true;
-                    } else {
-                        mk = FalsePositive;
-                    }
-                    matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
-                }
-            } else {
-                mk = FalsePositive;
-                matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
-            }
-        }
-
-        for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++) {
-            if (!desObj->difficult) {
-                N[desObj->objectType]++;
-            }
-        }
-    }
-
-    std::map<int, double> calculateAveragePrecisionPerClass() const {
-        /**
-         * Precision-to-TP curve per class (a variation of precision-to-recall curve without
-         * dividing into N)
-         */
-        std::map<int, std::map<int, double>> precisionToTP;
-
-        std::map<int, double> res;
-
-        for (auto m : matches) {
-            // Sorting
-            std::sort(m.second.begin(), m.second.end(), SortPairDescend);
-
-            int clazz = m.first;
-            int TP = 0, FP = 0;
-
-            std::vector<double> prec;
-            std::vector<double> rec;
-
-            for (auto mm : m.second) {
-                // Here we are descending in a probability value
-                MatchKind mk = mm.second;
-                if (mk == TruePositive)
-                    TP++;
-                else if (mk == FalsePositive)
-                    FP++;
-
-                double precision = static_cast<double>(TP) / (TP + FP);
-                double recall = 0;
-                if (N.find(clazz) != N.end()) {
-                    recall = static_cast<double>(TP) / N.at(clazz);
-                }
-
-                prec.push_back(precision);
-                rec.push_back(recall);
-            }
-
-            int num = static_cast<int>(rec.size());
-
-            // 11point from Caffe
-            double ap = 0;
-            std::vector<float> max_precs(11, 0.);
-            int start_idx = num - 1;
-            for (int j = 10; j >= 0; --j) {
-                for (int i = start_idx; i >= 0; --i) {
-                    if (rec[i] < j / 10.) {
-                        start_idx = i;
-                        if (j > 0) {
-                            max_precs[j - 1] = max_precs[j];
-                        }
-                        break;
-                    } else {
-                        if (max_precs[j] < prec[i]) {
-                            max_precs[j] = static_cast<float>(prec[i]);
-                        }
-                    }
-                }
-            }
-            for (int j = 10; j >= 0; --j) {
-                ap += max_precs[j] / 11;
-            }
-            res[clazz] = ap;
-        }
-
-        return res;
-    }
-};
-
-/**
- * @brief Adds colored rectangles to the image
- * @param data - data where rectangles are put
- * @param height - height of the rectangle
- * @param width - width of the rectangle
- * @param detectedObjects - vector of detected objects
- */
-static UNUSED void addRectangles(unsigned char* data,
-                                 size_t height,
-                                 size_t width,
-                                 std::vector<DetectedObject> detectedObjects) {
-    std::vector<Color> colors = {{128, 64, 128},  {232, 35, 244}, {70, 70, 70},  {156, 102, 102}, {153, 153, 190},
-                                 {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107},  {152, 251, 152},
-                                 {180, 130, 70},  {60, 20, 220},  {0, 0, 255},   {142, 0, 0},     {70, 0, 0},
-                                 {100, 60, 0},    {90, 0, 0},     {230, 0, 0},   {32, 11, 119},   {0, 74, 111},
-                                 {81, 0, 81}};
-
-    for (size_t i = 0; i < detectedObjects.size(); i++) {
-        int cls = detectedObjects[i].objectType % colors.size();
-
-        int xmin = static_cast<int>(detectedObjects[i].xmin * width);
-        int xmax = static_cast<int>(detectedObjects[i].xmax * width);
-        int ymin = static_cast<int>(detectedObjects[i].ymin * height);
-        int ymax = static_cast<int>(detectedObjects[i].ymax * height);
-
-        size_t shift_first = ymin * width * 3;
-        size_t shift_second = ymax * width * 3;
-        for (int x = xmin; x < xmax; x++) {
-            data[shift_first + x * 3] = colors.at(cls).red();
-            data[shift_first + x * 3 + 1] = colors.at(cls).green();
-            data[shift_first + x * 3 + 2] = colors.at(cls).blue();
-            data[shift_second + x * 3] = colors.at(cls).red();
-            data[shift_second + x * 3 + 1] = colors.at(cls).green();
-            data[shift_second + x * 3 + 2] = colors.at(cls).blue();
-        }
-
-        shift_first = xmin * 3;
-        shift_second = xmax * 3;
-        for (int y = ymin; y < ymax; y++) {
-            data[shift_first + y * width * 3] = colors.at(cls).red();
-            data[shift_first + y * width * 3 + 1] = colors.at(cls).green();
-            data[shift_first + y * width * 3 + 2] = colors.at(cls).blue();
-            data[shift_second + y * width * 3] = colors.at(cls).red();
-            data[shift_second + y * width * 3 + 1] = colors.at(cls).green();
-            data[shift_second + y * width * 3 + 2] = colors.at(cls).blue();
-        }
-    }
-}
-
 inline void showAvailableDevices() {
     ov::Core core;
     std::vector<std::string> devices = core.get_available_devices();
@@ -980,16 +352,6 @@ inline void showAvailableDevices() {
     std::cout << std::endl;
 }
 
-/**
- * @brief Parse text config file. The file must have the following format (with space a delimeter):
- * CONFIG_NAME1 CONFIG_VALUE1
- * CONFIG_NAME2 CONFIG_VALUE2
- *
- * @param configName - filename for a file with config options
- * @param comment - lines starting with symbol `comment` are skipped
- */
-std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#');
-
 inline std::string getFullDeviceName(ov::Core& core, std::string device) {
     try {
         return core.get_property(device, ov::device::full_name);
@@ -1066,15 +428,6 @@ static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> perform
     stream.flags(fmt);
 }
 
-static UNUSED void printPerformanceCounts(ov::InferRequest request,
-                                          std::ostream& stream,
-                                          std::string deviceName,
-                                          bool bshowHeader = true,
-                                          int precision = 3) {
-    auto performanceMap = request.get_profiling_info();
-    printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision);
-}
-
 static inline std::string double_to_string(const double number) {
     std::stringstream ss;
     ss << std::fixed << std::setprecision(2) << number;
diff --git a/samples/cpp/common/utils/include/samples/ocv_common.hpp b/samples/cpp/common/utils/include/samples/ocv_common.hpp
deleted file mode 100644
index a1198760e25ce8..00000000000000
--- a/samples/cpp/common/utils/include/samples/ocv_common.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief a header file with common samples functionality using OpenCV
- * @file ocv_common.hpp
- */
-
-#pragma once
-
-#include <opencv2/opencv.hpp>
-
-#include "openvino/openvino.hpp"
-#include "samples/common.hpp"
-
-/**
- * @brief Sets image data stored in cv::Mat object to a given Blob object.
- * @param orig_image - given cv::Mat object with an image data.
- * @param blob - Blob object which to be filled by an image data.
- * @param batchIndex - batch index of an image inside of the blob.
- */
-template <typename T>
-void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
-    InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims();
-    const size_t width = blobSize[3];
-    const size_t height = blobSize[2];
-    const size_t channels = blobSize[1];
-    InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
-    OPENVINO_ASSERT(mblob,
-                    "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
-                    "but by fact we were not able to cast inputBlob to MemoryBlob");
-    // locked memory holder should be alive all time while access to its buffer happens
-    auto mblobHolder = mblob->wmap();
-
-    T* blob_data = mblobHolder.as<T*>();
-
-    cv::Mat resized_image(orig_image);
-    if (static_cast<int>(width) != orig_image.size().width || static_cast<int>(height) != orig_image.size().height) {
-        cv::resize(orig_image, resized_image, cv::Size(width, height));
-    }
-
-    int batchOffset = batchIndex * width * height * channels;
-
-    for (size_t c = 0; c < channels; c++) {
-        for (size_t h = 0; h < height; h++) {
-            for (size_t w = 0; w < width; w++) {
-                blob_data[batchOffset + c * width * height + h * width + w] = resized_image.at<cv::Vec3b>(h, w)[c];
-            }
-        }
-    }
-}
-
-/**
- * @brief Wraps data stored inside of a passed cv::Mat object by new Blob pointer.
- * @note: No memory allocation is happened. The blob just points to already existing
- *        cv::Mat data.
- * @param mat - given cv::Mat object with an image data.
- * @return resulting Blob pointer.
- */
-static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
-    size_t channels = mat.channels();
-    size_t height = mat.size().height;
-    size_t width = mat.size().width;
-
-    size_t strideH = mat.step.buf[0];
-    size_t strideW = mat.step.buf[1];
-
-    bool is_dense = strideW == channels && strideH == channels * width;
-
-    OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat");
-
-    InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8,
-                                      {1, channels, height, width},
-                                      InferenceEngine::Layout::NHWC);
-
-    return InferenceEngine::make_shared_blob<uint8_t>(tDesc, mat.data);
-}
-
-static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
-    const size_t channels = mat.channels();
-    const size_t height = mat.size().height;
-    const size_t width = mat.size().width;
-
-    const size_t strideH = mat.step.buf[0];
-    const size_t strideW = mat.step.buf[1];
-
-    const bool is_dense = strideW == channels && strideH == channels * width;
-    OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat");
-
-    return ov::Tensor(ov::element::u8, ov::Shape{1, height, width, channels}, mat.data);
-}
diff --git a/samples/cpp/common/utils/include/samples/vpu/vpu_tools_common.hpp b/samples/cpp/common/utils/include/samples/vpu/vpu_tools_common.hpp
deleted file mode 100644
index f31d6e374f66e0..00000000000000
--- a/samples/cpp/common/utils/include/samples/vpu/vpu_tools_common.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <fstream>
-#include <map>
-#include <string>
-
-static std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#') {
-    std::map<std::string, std::string> config = {};
-
-    std::ifstream file(configName);
-    if (!file.is_open()) {
-        return config;
-    }
-
-    std::string key, value;
-    while (file >> key >> value) {
-        if (key.empty() || key[0] == comment) {
-            continue;
-        }
-        config[key] = value;
-    }
-
-    return config;
-}
diff --git a/samples/cpp/common/utils/src/common.cpp b/samples/cpp/common/utils/src/common.cpp
deleted file mode 100644
index 1cf43fddf2a40d..00000000000000
--- a/samples/cpp/common/utils/src/common.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "samples/common.hpp"
-
-std::map<std::string, std::string> parseConfig(const std::string& configName, char comment) {
-    std::map<std::string, std::string> config = {};
-
-    std::ifstream file(configName);
-    if (!file.is_open()) {
-        return config;
-    }
-
-    std::string key, value;
-    while (file >> key >> value) {
-        if (key.empty() || key[0] == comment) {
-            continue;
-        }
-        config[key] = value;
-    }
-
-    return config;
-}

From 5e2f424fd066cd9b397d6eeff5f81ebd3b29f31d Mon Sep 17 00:00:00 2001
From: Nadezhda Ageeva <nadezhda.ageeva@intel.com>
Date: Thu, 6 Apr 2023 21:19:37 +0400
Subject: [PATCH 278/296] [HETERO] Enable smoke_Hetero_CachingSupportCase tests
 (#16711)

---
 src/plugins/hetero/executable_network.cpp                      | 3 ++-
 .../functional/shared_tests_instances/skip_tests_config.cpp    | 3 ---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/plugins/hetero/executable_network.cpp b/src/plugins/hetero/executable_network.cpp
index a2354adfb2df7e..344e7085eb8d58 100644
--- a/src/plugins/hetero/executable_network.cpp
+++ b/src/plugins/hetero/executable_network.cpp
@@ -592,9 +592,10 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
         }
 
         std::shared_ptr<ov::Node> node = std::make_shared<ov::op::v0::Parameter>(elementType, partialShape);
+        // For result operation_name is name of previous operation
+        node->set_friendly_name(operation_name);
         if (!is_param)
             node = std::make_shared<ov::op::v0::Result>(node);
-        node->set_friendly_name(operation_name);
         node->output(0).get_tensor().add_names(tensorNames);
 
         return node;
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index a2fe8fcfb6f074..a419adabb64a62 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -166,9 +166,6 @@ std::vector<std::string> disabledTestPatterns() {
         // Issue: 95607
         R"(.*CachingSupportCase.*LoadNetworkCacheTestBase.*(TIwithLSTMcell1|MatMulBias|2InputSubtract)_(i|u).*)",
         R"(.*CachingSupportCase.*ReadConcatSplitAssign.*)",
-        // Issue: 95239
-        // HETERO plugin lacks caching_properties definition
-        R"(smoke_Hetero_CachingSupportCase.*)",
         // 94982. FP32->I32 conversion issue in the reference implementation. There can be some garbage in the rest of float values like 0.333333745.
         // The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion.
         // Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.

From 24ab3f7c415623d7b5e719dbe3977c2a8f7f598f Mon Sep 17 00:00:00 2001
From: Paul Youngsoo Ahn <paul.y.ahn@intel.com>
Date: Fri, 7 Apr 2023 04:44:25 +0900
Subject: [PATCH 279/296] [GPU] Fix sub kernel ordering issue in kernels_cache
 (#16746)

* [GPU] Fix sub kernel ordering issue in kernels_cache (#16746)

* [GPU] Add unit test for sub kernel idx (#16746)

* [GPU]Follow up code review (#16746)

* [GPU] Skip kernel compilation when current node is optimized out in update_impl (#16746)

* [GPU]Code refactoring (#16746)
---
 .../include/intel_gpu/runtime/kernel.hpp      |  1 +
 .../src/graph/impls/ocl/primitive_base.hpp    | 16 +++----
 .../intel_gpu/src/graph/primitive_inst.cpp    | 17 +++++---
 .../intel_gpu/src/runtime/kernels_cache.cpp   | 28 +++++++-----
 .../intel_gpu/src/runtime/kernels_cache.hpp   |  4 +-
 .../intel_gpu/src/runtime/ocl/ocl_kernel.hpp  |  1 +
 .../tests/passes/kernels_cache_test.cpp       | 43 ++++++++++++++++++-
 7 files changed, 81 insertions(+), 29 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
index 73a84e6e55df44..fddd466cb9a8ac 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
@@ -19,6 +19,7 @@ class kernel {
     using ptr = std::shared_ptr<kernel>;
     virtual std::shared_ptr<kernel> clone() const = 0;
     virtual ~kernel() = default;
+    virtual std::string get_id() const { return ""; }
 };
 
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 45f0cafd2bb424..06ba742d83a359 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -293,17 +293,13 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) override {
         if (is_cpu())
             return;
-
-        size_t total_kernels_num = std::accumulate(kernels.begin(), kernels.end(), static_cast<size_t>(0),
-            [](size_t val, cldnn::kernels_cache::compiled_kernels::value_type& p) {
-                return (val + p.second.size());
-            });
-
+        OPENVINO_ASSERT(kernels.size() == 1, "Only the kernels of the single primitive should be allowed.");
+        auto& kernel_vec = kernels.begin()->second;
         _kernels.clear();
-        _kernels.reserve(total_kernels_num);
-
-        for (auto& k : kernels) {
-            _kernels.insert(_kernels.end(), k.second.begin(), k.second.end());
+        _kernels.resize(kernel_vec.size());
+        for (auto& k : kernel_vec) {
+            auto sub_kernel_idx = k.second;
+            _kernels[sub_kernel_idx] = k.first;
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index ff77999718e9d2..fde7d24fb9e555 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -391,8 +391,10 @@ bool primitive_inst::update_impl() {
                     }
 
                     auto impl = _node->type()->choose_impl(*_node, updated_params);
-                    auto kernels = _program->get_kernels_cache().compile(updated_params, impl->get_kernels_source());
-                    impl->set_kernels(kernels);
+                    if (!can_be_optimized()) {
+                        auto kernels = _program->get_kernels_cache().compile(updated_params, impl->get_kernels_source());
+                        impl->set_kernels(kernels);
+                    }
                     cache.add(updated_params, impl->clone());
                 });
                 if (!can_be_optimized())  {
@@ -404,9 +406,11 @@ bool primitive_inst::update_impl() {
                 }
             } else {
                 _impl = _node->type()->choose_impl(*_node, updated_params);
-                auto& kernels_cache = get_network().get_program()->get_kernels_cache();
-                auto kernels = kernels_cache.compile(updated_params, _impl->get_kernels_source());
-                _impl->set_kernels(kernels);
+                if (!can_be_optimized()) {
+                    auto& kernels_cache = get_network().get_program()->get_kernels_cache();
+                    auto kernels = kernels_cache.compile(updated_params, _impl->get_kernels_source());
+                    _impl->set_kernels(kernels);
+                }
                 cache.add(updated_params, _impl->clone());
 
                 auto new_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr";
@@ -767,7 +771,8 @@ event::ptr primitive_inst::update_weights() {
                 auto& kernels_cache = get_network().get_program()->get_kernels_cache();
                 auto kernels = kernels_cache.compile(*_impl_params, {weights_params.clKernel->code.kernelString});
                 OPENVINO_ASSERT(kernels.size() == 1, "The output of kernel compile has issue");
-                kernel = (kernels.begin()->second)[0];
+                auto& kernel_data = kernels.begin()->second;
+                kernel = kernel_data[0].first;
                 cache.add(kernel_key, kernel);
             }
 
diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
index 17934f5d414840..074f1a01aac13e 100644
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
@@ -93,7 +93,8 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
         auto& code = k.second;
         bool dump_custom_program = code.dump_custom_program;
 
-        for (auto kernel_string : code.kernel_strings) {
+        for (size_t kernel_part_idx = 0; kernel_part_idx < code.kernel_strings.size(); kernel_part_idx++) {
+            auto& kernel_string = code.kernel_strings[kernel_part_idx];
             std::string full_code = kernel_string->jit + kernel_string->str + kernel_string->undefs;
             std::string entry_point = kernel_string->entry_point;
             std::string options = kernel_string->options;
@@ -132,7 +133,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
 
             auto& current_batch = current_bucket.back();
             current_batch.dump_custom_program = dump_custom_program;
-            current_batch.entry_point_to_id.emplace(entry_point, code.params);
+            current_batch.entry_point_to_id.emplace(entry_point, std::make_pair(code.params, kernel_part_idx));
 
             current_batch.source.push_back(std::move(full_code));
             current_batch.kernels_counter++;
@@ -283,11 +284,12 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
                     cl_kernel kern = k.get();
                     cl_context context = cl_build_engine.get_cl_context().get();
                     kernel::ptr kernel = kernels_factory::create(_engine, context, kern, entry_point);
-                    auto& params = iter->second;
+                    auto& params = iter->second.first;
+                    auto kernel_part_idx = iter->second.second;
                     if (compiled_kernels.find(params) != compiled_kernels.end()) {
-                        compiled_kernels[params].push_back(kernel);
+                        compiled_kernels[params].push_back(std::make_pair(kernel, kernel_part_idx));
                     } else {
-                        compiled_kernels[params] = { kernel };
+                        compiled_kernels[params] = { std::make_pair(kernel, kernel_part_idx) };
                     }
                 } else {
                     throw std::runtime_error("Could not find entry point");
@@ -345,11 +347,13 @@ std::vector<kernel::ptr> kernels_cache::get_kernels(kernel_impl_params params) c
     }
     auto res = _kernels.find(params);
     OPENVINO_ASSERT(_kernels.end() != res, "Kernel for {" + current_node_id + "} is not found in the kernel cache!");
+    OPENVINO_ASSERT(res->second.size() != 0, "Number of kernels should not be zero for " + current_node_id);
 
-    std::vector<kernel::ptr> kernels;
-    kernels.reserve(res->second.size());
+    std::vector<kernel::ptr> kernels(res->second.size());
     for (auto& k : res->second) {
-        kernels.emplace_back(k->clone());
+        auto& kernel_ptr = k.first;
+        auto kernel_part_idx = k.second;
+        kernels[kernel_part_idx] = kernel_ptr->clone();
     }
     return kernels;
 }
@@ -565,7 +569,10 @@ void kernels_cache::load(BinaryInputBuffer& ib) {
 kernels_cache::compiled_kernels kernels_cache::compile(const kernel_impl_params& params,
                                             const std::vector<std::shared_ptr<kernel_string>>& kernel_sources,
                                             bool dump_custom_program) {
-    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::Compile_ThreadSafe");
+    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::compile");
+    if (kernel_sources.empty())
+        return {};
+
     kernels_code t_kernels_code;
 
     // Get kernels code from kernel sources
@@ -585,6 +592,8 @@ kernels_cache::compiled_kernels kernels_cache::compile(const kernel_impl_params&
         build_batch(_build_engine, batches[idx], output_kernels);
     }
 
+    OPENVINO_ASSERT(output_kernels.size() == 1, "Only the kernels of the single primitive should be compiled.");
+
     t_kernels_code.clear();
 #if defined(__unix__) && !defined(__ANDROID__)
     //  NOTE: In linux, without malloc_trim, an amount of the memory used by compilation is not being returned to system thought they are freed.
@@ -597,5 +606,4 @@ kernels_cache::compiled_kernels kernels_cache::compile(const kernel_impl_params&
 
     return output_kernels;
 }
-
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
index eac55ebb12ca70..9412104d46fdf5 100644
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
@@ -56,7 +56,7 @@ class kernels_cache {
         source_code source;
         std::string options;
         bool dump_custom_program;
-        std::map<std::string, kernel_impl_params> entry_point_to_id;
+        std::map<std::string, std::pair<kernel_impl_params, size_t>> entry_point_to_id;
 
         explicit batch_program(int32_t _bucket_id, int32_t _batch_id, std::string _options, const std::vector<std::string>& batch_header_str)
             : bucket_id(_bucket_id),
@@ -70,7 +70,7 @@ class kernels_cache {
         }
     };
 
-    using compiled_kernels = std::unordered_map<kernel_impl_params, std::vector<kernel::ptr>, impl_hasher>;
+    using compiled_kernels = std::unordered_map<kernel_impl_params, std::vector<std::pair<kernel::ptr, size_t>>, impl_hasher>;
 
 private:
     static std::mutex _mutex;
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
index 6118859c8729d1..0d88bb18f3093c 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
@@ -24,6 +24,7 @@ class ocl_kernel : public kernel {
         : _compiled_kernel(compiled_kernel)
         , _kernel_id(kernel_id) { }
 
+    std::string get_id() const override { return _kernel_id; }
     const ocl_kernel_type& get_handle() const { return _compiled_kernel; }
     ocl_kernel_type& get_handle() { return _compiled_kernel; }
     std::shared_ptr<kernel> clone() const override { return std::make_shared<ocl_kernel>(get_handle().clone(), _kernel_id); }
diff --git a/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp b/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp
index c122a70ed723bd..3b53d167ce45c8 100644
--- a/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/kernels_cache_test.cpp
@@ -17,7 +17,7 @@
 #include "intel_gpu/graph/network.hpp"
 #include "pass_manager.h"
 #include "to_string_utils.h"
-
+#include <regex>
 #include "program_wrapper.h"
 
 #include <memory>
@@ -90,3 +90,44 @@ TEST(kernels_cache, reuse_kernel_for_static_model_01) {
         ASSERT_EQ(concat1_kern, concat2_kern);
     }
 }
+
+TEST(kernels_cache, sub_kernel_ordering_test) {
+    auto& engine = get_test_engine();
+    ExecutionConfig config = get_test_default_config(engine);
+    InferenceEngine::CPUStreamsExecutor::Config task_executor_config("sub_kernel_ordering_test", 1);
+    task_executor_config._streams = 2;
+    auto executor = std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
+    const size_t num_kernels = 9;
+    auto _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(engine, config, 0, executor));
+    std::vector<std::string> entry_point_list;
+    std::vector<std::shared_ptr<kernel_selector::KernelString>> kernel_code_list;
+    for (size_t idx = 0; idx < num_kernels; idx++) {
+        std::shared_ptr<kernel_selector::KernelString> kernel_string = std::make_shared<kernel_selector::KernelString>();
+        std::string entry_point = "add_kernel_" + std::to_string(idx);
+        std::string kernel_code =
+            R"__krnl(
+                __kernel void $entry_point_name(const __global float* input0, const __global float* input1, __global float* output)
+                {
+                    const unsigned idx = get_global_id(0);
+                    output[idx] = input0[idx] + input1[idx];
+
+                }
+            )__krnl";
+        kernel_code = std::regex_replace(kernel_code, std::regex("\\$entry_point_name"), entry_point);
+        kernel_string->str = kernel_code;
+        kernel_string->options = "-cl-mad-enable";
+        kernel_string->entry_point = entry_point;
+        kernel_string->batch_compilation = true;
+        entry_point_list.push_back(entry_point);
+        kernel_code_list.push_back(kernel_string);
+    }
+    kernel_impl_params dummy_params;
+    _kernels_cache->add_kernels_source(dummy_params, kernel_code_list, false);
+    _kernels_cache->build_all();
+    auto _out_kernels = _kernels_cache->get_kernels(dummy_params);
+    ASSERT_EQ(entry_point_list.size(), _out_kernels.size());
+    for (size_t i = 0; i < entry_point_list.size(); i++) {
+        ASSERT_EQ(entry_point_list[i], _out_kernels[i]->get_id());
+    }
+}
+

From e17a6f29bf9588d01f33ba73cca5b6bf2ae2c6b0 Mon Sep 17 00:00:00 2001
From: Mingyu Kim <mingyu.kim@intel.com>
Date: Fri, 7 Apr 2023 09:03:28 +0900
Subject: [PATCH 280/296] [GPU] Use unique symbol name for typedef (#16777)

---
 ...perimental_detectron_generate_proposals_single_image_ref.cl | 3 ++-
 .../src/kernel_selector/cl_kernels/generate_proposals_ref.cl   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_generate_proposals_single_image_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_generate_proposals_single_image_ref.cl
index 43138107263155..6a69678a9cf242 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_generate_proposals_single_image_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_generate_proposals_single_image_ref.cl
@@ -86,7 +86,7 @@ KERNEL(edgpsi_ref_stage_0)
 #endif /* EDGPSI_STAGE_0 */
 
 #ifdef EDGPSI_STAGE_1
-
+#define Box FUNC(_Box)
 typedef struct __attribute__((__packed__)) {
     INPUT0_TYPE x0;
     INPUT0_TYPE y0;
@@ -181,6 +181,7 @@ KERNEL(edgpsi_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
 
     FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
 }
+#undef Box
 #endif /* EDGPSI_STAGE_1 */
 
 #ifdef EDGPSI_STAGE_2
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/generate_proposals_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/generate_proposals_ref.cl
index 5ccec1157e8f89..409b6a80f6ef9f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/generate_proposals_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/generate_proposals_ref.cl
@@ -92,7 +92,7 @@ KERNEL(generate_proposals_ref_stage_0)
 #endif /* GENERATE_PROPOSALS_STAGE_0 */
 
 #ifdef GENERATE_PROPOSALS_STAGE_1
-
+#define Box FUNC(__Box)
 typedef struct __attribute__((__packed__)) {
     INPUT0_TYPE x0;
     INPUT0_TYPE y0;
@@ -190,6 +190,7 @@ KERNEL(generate_proposals_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
 
     FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
 }
+#undef Box
 #endif /* GENERATE_PROPOSALS_STAGE_1 */
 
 #ifdef GENERATE_PROPOSALS_STAGE_2

From df4d7bd3e915224210e479c8ea25f4bdd3658229 Mon Sep 17 00:00:00 2001
From: Eddy Kim <eddy.kim@intel.com>
Date: Fri, 7 Apr 2023 09:28:45 +0900
Subject: [PATCH 281/296] fix uninitialized scalar variables (#16772)

---
 .../include/intel_gpu/primitives/lstm.hpp        |  2 +-
 src/plugins/intel_gpu/src/graph/data.cpp         |  4 ++--
 .../intel_gpu/src/graph/detection_output.cpp     |  2 +-
 .../src/graph/impls/onednn/gemm_onednn.cpp       | 16 ++++++++--------
 src/plugins/intel_gpu/src/graph/mutable_data.cpp |  2 +-
 .../intel_gpu/src/graph/primitive_inst.cpp       |  2 +-
 .../intel_gpu/src/runtime/ocl/ocl_memory.cpp     |  2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
index fd7befb7cc1923..115157277a143d 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
@@ -280,7 +280,7 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
 struct lstm_elt : public primitive_base<lstm_elt> {
     CLDNN_DECLARE_PRIMITIVE(lstm_elt)
 
-    lstm_elt() : primitive_base("", {}) {}
+    lstm_elt() : primitive_base("", {}), clip(0), input_forget(0), offset_order(lstm_weights_order::iofz), direction(0) {}
 
     DECLARE_OBJECT_TYPE_SERIALIZATION
 
diff --git a/src/plugins/intel_gpu/src/graph/data.cpp b/src/plugins/intel_gpu/src/graph/data.cpp
index 16e0edb6d2d033..b5c47ae846fd3d 100644
--- a/src/plugins/intel_gpu/src/graph/data.cpp
+++ b/src/plugins/intel_gpu/src/graph/data.cpp
@@ -80,10 +80,10 @@ void data_inst::load(BinaryInputBuffer& ib) {
     layout output_layout = layout();
     ib >> output_layout;
 
-    allocation_type _allocation_type;
+    allocation_type _allocation_type = allocation_type::unknown;
     ib >> make_data(&_allocation_type, sizeof(_allocation_type));
 
-    size_t data_size;
+    size_t data_size = 0;
     ib >> make_data(&data_size, sizeof(size_t));
 
     if (ib.getNetwork()) {
diff --git a/src/plugins/intel_gpu/src/graph/detection_output.cpp b/src/plugins/intel_gpu/src/graph/detection_output.cpp
index 7c96e07ff60d9d..09c876d43b7f22 100644
--- a/src/plugins/intel_gpu/src/graph/detection_output.cpp
+++ b/src/plugins/intel_gpu/src/graph/detection_output.cpp
@@ -221,7 +221,7 @@ void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
     float nms_threshold;
     int top_k;
     float eta;
-    prior_box_code_type code_type;
+    prior_box_code_type code_type = prior_box_code_type::corner;
     bool variance_encoded_in_target;
     float confidence_threshold;
     int32_t prior_info_size;
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
index 0007eb29073c8c..0184cff8a3b45d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
@@ -233,20 +233,20 @@ struct gemm_onednn : typed_primitive_onednn_impl<gemm> {
         bool gemm_with_bias;
         ib >> gemm_with_bias;
 
-        dnnl::memory::data_type in0_dt;
-        dnnl::memory::data_type in1_dt;
-        dnnl::memory::data_type out_dt;
-        dnnl::memory::data_type bias_dt;
+        dnnl::memory::data_type in0_dt = dnnl::memory::data_type::undef;
+        dnnl::memory::data_type in1_dt = dnnl::memory::data_type::undef;
+        dnnl::memory::data_type out_dt = dnnl::memory::data_type::undef;
+        dnnl::memory::data_type bias_dt = dnnl::memory::data_type::undef;
 
         dnnl::memory::dims in0_dims;
         dnnl::memory::dims in1_dims;
         dnnl::memory::dims out_dims;
         dnnl::memory::dims bias_dims;
 
-        dnnl::memory::format_tag in0_fmt;
-        dnnl::memory::format_tag in1_fmt;
-        dnnl::memory::format_tag out_fmt;
-        dnnl::memory::format_tag bias_fmt;
+        dnnl::memory::format_tag in0_fmt = dnnl::memory::format_tag::undef;
+        dnnl::memory::format_tag in1_fmt = dnnl::memory::format_tag::undef;
+        dnnl::memory::format_tag out_fmt = dnnl::memory::format_tag::undef;
+        dnnl::memory::format_tag bias_fmt = dnnl::memory::format_tag::undef;
 
         ib >> make_data(&in0_dt, sizeof(dnnl::memory::data_type));
         ib >> make_data(&in1_dt, sizeof(dnnl::memory::data_type));
diff --git a/src/plugins/intel_gpu/src/graph/mutable_data.cpp b/src/plugins/intel_gpu/src/graph/mutable_data.cpp
index 62bb16c3162c07..347ec032ef5924 100644
--- a/src/plugins/intel_gpu/src/graph/mutable_data.cpp
+++ b/src/plugins/intel_gpu/src/graph/mutable_data.cpp
@@ -99,7 +99,7 @@ void mutable_data_inst::save(cldnn::BinaryOutputBuffer& ob) const {
 void mutable_data_inst::load(BinaryInputBuffer& ib) {
     parent::load(ib);
 
-    size_t data_size;
+    size_t data_size = 0;
     ib >> make_data(&data_size, sizeof(size_t));
 
     if (data_size == 0)
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index fde7d24fb9e555..fdc8c9e8744153 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -1273,7 +1273,7 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
             ib >> output_layout;
             output_layouts.emplace_back(output_layout);
 
-            allocation_type _allocation_type;
+            allocation_type _allocation_type = allocation_type::unknown;
             ib >> make_data(&_allocation_type, sizeof(_allocation_type));
             allocation_types.emplace_back(_allocation_type);
         }
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
index 47405125ce6997..82a150821fbed3 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
@@ -129,7 +129,7 @@ dnnl::memory gpu_buffer::get_onednn_memory(dnnl::memory::desc desc, int64_t offs
 #endif
 
 gpu_image2d::gpu_image2d(ocl_engine* engine, const layout& layout)
-    : lockable_gpu_mem(), memory(engine, layout, allocation_type::cl_mem, false), _row_pitch(0), _slice_pitch(0) {
+    : lockable_gpu_mem(), memory(engine, layout, allocation_type::cl_mem, false), _width(0), _height(0), _row_pitch(0), _slice_pitch(0) {
     cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT;
     cl_channel_order order = CL_R;
     switch (layout.format) {

From 51967fd27bb854b0747dce78bd956db03d7a159d Mon Sep 17 00:00:00 2001
From: yanlan song <bell.song@intel.com>
Date: Fri, 7 Apr 2023 09:31:40 +0800
Subject: [PATCH 282/296] Optimize and fix fps number in log (#16615)

* fix in correct fps with cpu_help

Signed-off-by: fishbell <bell.song@intel.com>

* fix some threading issue

Signed-off-by: fishbell <bell.song@intel.com>

* indenting

Signed-off-by: fishbell <bell.song@intel.com>

* fix lock

Signed-off-by: fishbell <bell.song@intel.com>

* formatting

Signed-off-by: fishbell <bell.song@intel.com>

* do print in destructor, avoid CI script parse failure

Signed-off-by: fishbell <bell.song@intel.com>

* fix build warning

Signed-off-by: fishbell <bell.song@intel.com>

---------

Signed-off-by: fishbell <bell.song@intel.com>
---
 src/plugins/auto/auto_schedule.cpp  | 53 ++++++++---------------------
 src/plugins/auto/auto_schedule.hpp  |  1 -
 src/plugins/auto/common.hpp         | 53 +++++++++++++++++++++++++++++
 src/plugins/auto/multi_schedule.cpp |  3 +-
 src/plugins/auto/multi_schedule.hpp |  2 +-
 src/plugins/auto/utils/log.hpp      |  2 +-
 6 files changed, 72 insertions(+), 42 deletions(-)

diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/auto_schedule.cpp
index cc71bdba17e0bd..0e699d6d7ee83e 100644
--- a/src/plugins/auto/auto_schedule.cpp
+++ b/src/plugins/auto/auto_schedule.cpp
@@ -45,10 +45,7 @@ void AutoSchedule::GenerateWorkers(const std::string& device,
     } else {
         realDeviceName = device;
     }
-    auto itNumRequests = std::find_if(_autoSContext->_devicePriorities.cbegin(), _autoSContext->_devicePriorities.cend(),
-                                      [&realDeviceName](const DeviceInformation & d) {
-                                          return d.deviceName == realDeviceName;
-                                      });
+    auto itNumRequests = deviceChecker().checkAndReturnIfDeviceInList<DeviceInformation>(realDeviceName, _autoSContext->_devicePriorities, true);
     unsigned int optimalNum = 0;
     try {
         optimalNum = executableNetwork->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
@@ -139,9 +136,7 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
             } else {
                 realDeviceName = deviceName;
             }
-            const auto CurrentDeviceIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
-                                                [=](const DeviceInformation& d) -> bool {
-                                                return d.deviceName.find(realDeviceName) != std::string::npos;});
+            const auto CurrentDeviceIter = deviceChecker().checkAndReturnIfDeviceInList<DeviceInformation>(realDeviceName, _autoSContext->_devicePriorities);
             if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
                 if (_autoSContext->_devicePriorities.size() == 1) {
                     LOG_INFO_TAG("No other devices in _devicePriorities");
@@ -186,11 +181,7 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
         auto removeInferFailDevice = [&](const std::string& deviceName) {
             if (_autoSContext->_devicePriorities.size() > 1) {
                 const auto CurrentDeviceIter =
-                    std::find_if(_autoSContext->_devicePriorities.begin(),
-                                 _autoSContext->_devicePriorities.end(),
-                                 [=](const DeviceInformation& d) -> bool {
-                                     return d.deviceName.find(deviceName) != std::string::npos;
-                                 });
+                    deviceChecker().checkAndReturnIfDeviceInList<DeviceInformation>(deviceName, _autoSContext->_devicePriorities);
                 if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
                     _autoSContext->_devicePriorities.erase(CurrentDeviceIter);
                     return true;
@@ -317,12 +308,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
         if (isCumulative && !contextPtr->isLoadSuccess) {
             std::string failedDeviceName = contextPtr->deviceInfo.deviceName;
             std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
-            const auto DeviceIter =
-                std::find_if(_autoSContext->_devicePriorities.begin(),
-                             _autoSContext->_devicePriorities.end(),
-                             [&](const DeviceInformation& d) -> bool {
-                                 return d.deviceName.find(failedDeviceName) != std::string::npos;
-                             });
+            const auto DeviceIter = deviceChecker().checkAndReturnIfDeviceInList(failedDeviceName, _autoSContext->_devicePriorities);
             // Remove failed device from _devicePriorities
             if (DeviceIter != _autoSContext->_devicePriorities.end()) {
                 _autoSContext->_devicePriorities.erase(DeviceIter);
@@ -356,11 +342,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
         if (isActualDevCPU || !_autoSContext->_startupfallback) {
             _loadContext[CPU].isEnabled = false;
         } else {
-            const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(),
-                                              _autoSContext->_devicePriorities.end(),
-                                              [](const DeviceInformation& d) -> bool {
-                                                  return d.deviceName.find("CPU") != std::string::npos;
-                                              });
+            const auto CPUIter = deviceChecker().checkAndReturnIfDeviceInList("CPU", _autoSContext->_devicePriorities);
             // if have CPU Device,  enable _loadContext[CPU]
             if (CPUIter != _autoSContext->_devicePriorities.end()) {
                 _loadContext[CPU].isEnabled = true;
@@ -570,11 +552,9 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
     // maybe 0 is loaded to VPUX, 1 is loaded to iGPU
     _autoSContext->_plugin->UnregisterPriority(_autoSContext->_modelPriority, context.deviceInfo.uniqueName);
     // remove the current device from deviceList
-    auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(),
-                                    [device](DeviceInformation & d) {
-                                        return d.deviceName == device;
-                                    });
-    deviceList.erase(eraseDevice);
+    auto eraseDevice = deviceChecker().checkAndReturnIfDeviceInList(device, deviceList, true);
+    if (eraseDevice != deviceList.end())
+        deviceList.erase(eraseDevice);
     if (deviceList.empty()) {
         return;
     }
@@ -685,23 +665,22 @@ void AutoSchedule::WaitActualNetworkReady() const {
 bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, DeviceName preferred_device) {
     std::vector<DeviceInformation> devices;
     // AUTO work mode
+    // Devices that fail infer will be removed from the priority list in the callback, need lock here
+    std::unique_lock<std::mutex> lock(_autoSContext->_fallbackMutex);
     if (!preferred_device.empty()) {
         if (_pCTPUTLoadContext) {
-            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
             devices = _autoSContext->_devicePriorities;
         } else {
             // if the device needed by customer is not ready, need to wait for it
             WaitActualNetworkReady();
-            // the preferred_device should be the selected device in AUTO work mode
-            if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
-                IE_THROW(NotFound) << "The preferred device should be the selected device";
-            }
             devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
         }
+        if (!deviceChecker().checkIfDeviceInList<DeviceInformation>(preferred_device, devices)) {
+            lock.unlock();
+            IE_THROW(NotFound) << "The preferred device should be the selected device";
+        }
     } else {
         if (_pCTPUTLoadContext) {
-            // Devices that fail infer will be removed from the priority list in the callback, need lock here
-            std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
             for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
                 devices.push_back(_autoSContext->_devicePriorities[i]);
             }
@@ -722,9 +701,7 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
             }
         }
     }
-    if (devices.size() == 0) {
-        IE_THROW(GeneralError) << "No device to run pipeline task";
-    }
+    lock.unlock();
     for (auto&& device : devices) {
         if (!preferred_device.empty() && (device.deviceName != preferred_device)) {
             continue;
diff --git a/src/plugins/auto/auto_schedule.hpp b/src/plugins/auto/auto_schedule.hpp
index fc1308064b5364..2d24dc66244794 100644
--- a/src/plugins/auto/auto_schedule.hpp
+++ b/src/plugins/auto/auto_schedule.hpp
@@ -79,7 +79,6 @@ class AutoSchedule : public MultiSchedule {
     std::future<void>                        _firstLoadFuture;
     std::promise<void>                       _firstLoadPromise;
     bool                                     _exitFlag = {false};
-    size_t                                   _cpuHelpInferCount = 0;
 };
 
 }  // namespace MultiDevicePlugin
diff --git a/src/plugins/auto/common.hpp b/src/plugins/auto/common.hpp
index 891e0bebb015cd..46a31268bf9cde 100644
--- a/src/plugins/auto/common.hpp
+++ b/src/plugins/auto/common.hpp
@@ -81,6 +81,59 @@ struct WorkerInferRequest {
     MultiImmediateExecutor::Ptr  _fallbackExec;
 };
 
+struct deviceChecker {
+        template <typename T,
+          typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true,
+          typename U = typename std::vector<T>::const_iterator>
+        U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
+            if (exactMatch) {
+                return std::find_if(deviceList.begin(), deviceList.end(),
+                        [&target](const T& d) { return d == target; });
+            }
+            return std::find_if(deviceList.begin(), deviceList.end(),
+                            [&target](const T & d) {
+                                return d.find(target) != std::string::npos;
+                            });
+        }
+        template <typename T,
+          typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true>
+        bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
+            if (exactMatch) {
+                return std::find_if(deviceList.begin(), deviceList.end(),
+                                    [&target](const T& d) { return d == target; }) != deviceList.cend();
+            }
+            return std::find_if(deviceList.begin(), deviceList.end(),
+                            [&target](const T& d) {
+                                return d.find(target) != std::string::npos;
+                            }) != deviceList.end();
+        }
+        template <typename T,
+          typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true,
+          typename U = typename std::vector<T>::const_iterator>
+        U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
+            if (exactMatch) {
+                return std::find_if(deviceList.begin(), deviceList.end(),
+                        [&target](const T& d) { return d.deviceName == target; });
+            }
+            return std::find_if(deviceList.begin(), deviceList.end(),
+                            [&target](const T& d) {
+                                return d.deviceName.find(target) != std::string::npos;
+                            });
+        }
+        template <typename T,
+          typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true>
+        bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
+            if (exactMatch) {
+                return std::find_if(deviceList.begin(), deviceList.end(),
+                                    [&target](const T& d) { return d.deviceName == target; }) != deviceList.end();
+            }
+            return std::find_if(deviceList.begin(), deviceList.end(),
+                            [&target](const T& d) {
+                                return d.deviceName.find(target) != std::string::npos;
+                            }) != deviceList.end();
+        }
+};
+
 using NotBusyPriorityWorkerRequests = IE::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
 using NotBusyWorkerRequests = IE::ThreadSafeBoundedQueue<WorkerInferRequest*>;
 template <typename T>
diff --git a/src/plugins/auto/multi_schedule.cpp b/src/plugins/auto/multi_schedule.cpp
index 27a841962453da..7448d3603e5d1c 100644
--- a/src/plugins/auto/multi_schedule.cpp
+++ b/src/plugins/auto/multi_schedule.cpp
@@ -245,12 +245,13 @@ MultiSchedule::~MultiSchedule() {
             reqAllStartTimes.sort(std::less<Time>());
             reqAllEndTimes.sort(std::less<Time>());
             if (_workerRequest.first == "CPU_HELP") {
-                LOG_INFO_TAG("CPU_HELP:infer:%ld", _cpuHelpInferCount + count);
                 if (_cpuHelpFps > 0.0) {
+                    LOG_INFO_TAG("CPU_HELP:infer:%ld", _cpuHelpInferCount);
                     LOG_INFO_TAG("CPU_HELP:fps:%lf", _cpuHelpFps);
                 } else if (count >= 1) {
                     std::chrono::duration<double, std::milli> durtation =
                         reqAllEndTimes.back() - reqAllStartTimes.front();
+                    LOG_INFO_TAG("CPU_HELP:infer:%ld", count);
                     LOG_INFO_TAG("CPU_HELP:fps:%lf", count * 1000 / durtation.count());
                 }
             } else {
diff --git a/src/plugins/auto/multi_schedule.hpp b/src/plugins/auto/multi_schedule.hpp
index 11631c0c72f3df..c7cfc4c4331241 100644
--- a/src/plugins/auto/multi_schedule.hpp
+++ b/src/plugins/auto/multi_schedule.hpp
@@ -60,7 +60,7 @@ class MultiSchedule : public Schedule, public IE::ITaskExecutor {
     MultiScheduleContext::Ptr                                 _multiSContext;
     SoExecNetwork                                             _passthroughExeNet;
     Time                                                      _cpuHelpReleaseTime;
-    unsigned int                                              _cpuHelpInferCount = 0;
+    size_t                                                    _cpuHelpInferCount = 0;
     double                                                    _cpuHelpFps = 0.0;
     std::string                                               _LogTag;
 };
diff --git a/src/plugins/auto/utils/log.hpp b/src/plugins/auto/utils/log.hpp
index 76917478692b64..9667ae52fae6a0 100644
--- a/src/plugins/auto/utils/log.hpp
+++ b/src/plugins/auto/utils/log.hpp
@@ -179,7 +179,7 @@ inline void Log::setLogLevel(LogLevel logLevel_) {
 }
 
 inline void Log::print(std::stringstream& stream) {
-    std::cout << stream.str() << std::endl;
+    std::cout << stream.str() << std::endl << std::flush;
 }
 
 inline void Log::checkFormat(const char* fmt) {

From 07437eec1e7644b5acf9c608a3a1f89e8f2d6d0d Mon Sep 17 00:00:00 2001
From: Eddy Kim <eddy.kim@intel.com>
Date: Fri, 7 Apr 2023 12:13:53 +0900
Subject: [PATCH 283/296] updated to store dims and data type for binary post
 ops (#16792)

---
 .../intel_gpu/src/graph/program_node.cpp      | 42 ++++++++++------
 .../fusions/fully_connected_fusion_test.cpp   | 48 +++++++++++++++++++
 2 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 53ebb26d9ed59e..adbc7dab4d7f4d 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -985,8 +985,10 @@ void program_node::init_onednn_primitive_attributes() {
                     if (prim->input_size == 3) {
                         cldnn::onednn::combine_bf_with_first_spatial_dim(in);
                     }
-                    post_ops.append_binary(alg, onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab));
-                    update_onednn_post_op_list(op_type, dep_idx, dnnl::memory::format_tag::ab);
+                    auto mem_desc = onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab);
+                    post_ops.append_binary(alg, mem_desc);
+                    update_onednn_post_op_list(op_type, dep_idx, dnnl::memory::format_tag::ab, false,
+                                               mem_desc.get_dims(), mem_desc.get_data_type());
                 } else if (is_type<gemm>()) {
                     size_t rank = cldnn::format::dimension(in.format);
                     dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in.batch() == 1);
@@ -995,8 +997,10 @@ void program_node::init_onednn_primitive_attributes() {
                     post_ops.append_binary(alg, dnnl::memory::desc(dims, dt, fmt));
                     update_onednn_post_op_list(op_type, dep_idx, fmt, false, dims, dt);
                 } else {
-                    post_ops.append_binary(alg, onednn::layout_to_memory_desc(in));
-                    update_onednn_post_op_list(op_type, dep_idx);
+                    auto mem_desc = onednn::layout_to_memory_desc(in);
+                    post_ops.append_binary(alg, mem_desc);
+                    update_onednn_post_op_list(op_type, dep_idx, onednn::convert_data_format(in.format), false,
+                                               mem_desc.get_dims(), mem_desc.get_data_type());
                 }
             };
 
@@ -1042,7 +1046,8 @@ void program_node::init_onednn_primitive_attributes() {
                             auto in_scale = get_dependency(dep_idx++).get_output_layout();
                             dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
                             post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
-                            update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                            update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                       in_scale_desc.get_dims(), in_scale_desc.get_data_type());
                         }
 
                         if (q_param->_need_pre_shift) {
@@ -1053,7 +1058,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto in_shift = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           in_shift_desc.get_dims(), in_shift_desc.get_data_type());
                             }
                         }
                     }
@@ -1084,7 +1090,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto out_scale = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           out_scale_desc.get_dims(), out_scale_desc.get_data_type());
                             }
                         }
 
@@ -1096,7 +1103,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto out_shift = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           out_shift_desc.get_dims(), out_shift_desc.get_data_type());
                             }
                         }
                     }
@@ -1124,9 +1132,11 @@ void program_node::init_onednn_primitive_attributes() {
                         dnnl::memory::desc in_hi_desc = onednn::layout_to_memory_desc(in_hi, dnnl::memory::format_tag::ab, true);
 
                         post_ops.append_binary(clamp_max, in_lo_desc);
-                        update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2, dnnl::memory::format_tag::ab, true);
+                        update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2, dnnl::memory::format_tag::ab, true,
+                                                   in_lo_desc.get_dims(), in_lo_desc.get_data_type());
                         post_ops.append_binary(clamp_min, in_hi_desc);
-                        update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                        update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                   in_hi_desc.get_dims(), in_hi_desc.get_data_type());
                     }
                 }
 
@@ -1143,7 +1153,8 @@ void program_node::init_onednn_primitive_attributes() {
                             auto in_scale = get_dependency(dep_idx++).get_output_layout();
                             dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
                             post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
-                            update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                            update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                       in_scale_desc.get_dims(), in_scale_desc.get_data_type());
                         }
 
                         if (q_param->_need_pre_shift) {
@@ -1154,7 +1165,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto in_shift = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           in_shift_desc.get_dims(), in_shift_desc.get_data_type());
                             }
                         }
                     }
@@ -1181,7 +1193,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto out_scale = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           out_scale_desc.get_dims(), out_scale_desc.get_data_type());
                             }
                         }
 
@@ -1193,7 +1206,8 @@ void program_node::init_onednn_primitive_attributes() {
                                 auto out_shift = get_dependency(dep_idx++).get_output_layout();
                                 dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
                                 post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
-                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
+                                update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true,
+                                                           out_shift_desc.get_dims(), out_shift_desc.get_data_type());
                             }
                         }
                     }
diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
index f29813b56e3cb4..07883c9aaf2778 100644
--- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp
@@ -486,6 +486,22 @@ TEST_P(fc_fp16_eltwise_add, basic) {
     execute(p);
 }
 
+TEST_P(fc_fp16_eltwise_add, basic_cached) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-2f;
+    execute(p, true);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_add, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
     // fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bs_f_bsv16_b1"}, // TODO check a failure in fully_connected_gpu_bs_f_bsv16_b1 + eltwise in iGPU
     // fully_connected_test_params{ CASE_FC_FP16_3D_3, 2, 3, "fully_connected_gpu_bfyx_ref"},   // TODO check onednn failure
@@ -541,6 +557,22 @@ TEST_P(fc_fp16_eltwise_sub, basic) {
     execute(p);
 }
 
+TEST_P(fc_fp16_eltwise_sub, basic_cached) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sub),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-1f;
+    execute(p, true);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_sub, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
     fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
     fully_connected_test_params{ CASE_FC_FP16_2, 2, 3, "fully_connected_gpu_bfyx_ref" },
@@ -566,6 +598,22 @@ TEST_P(fc_fp16_eltwise_prod, basic) {
     execute(p);
 }
 
+TEST_P(fc_fp16_eltwise_prod, basic_cached) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
+        fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
+        eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
+        reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-1f;
+    execute(p, true);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_prod, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
     fully_connected_test_params{ CASE_FC_FP16_1, 2, 3, "fully_connected_gpu_bfyx_ref" },
     fully_connected_test_params{ CASE_FC_FP16_2, 2, 3, "fully_connected_gpu_bfyx_ref" },

From 3be946371d97e207151e8dcd655d745d55723685 Mon Sep 17 00:00:00 2001
From: Xuejun Zhai <xuejun.zhai@intel.com>
Date: Fri, 7 Apr 2023 12:37:25 +0800
Subject: [PATCH 284/296] Xuejun/remove api tensor related (#15877)

* [Remove APIs] remove api set_partial_shape()

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* [Remove APIs] remove api set_element_type()

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* [Remove APIs] remove api set_tensor_type()

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>

* Revert "[Remove APIs] remove api set_tensor_type()"

This reverts commit 96f89e222dd3dbebfb3c7b98cbdacfc09f5897a7.

* Revert "[Remove APIs] remove api set_element_type()"

This reverts commit 33ebb61977b3921ae6eedc5e46c2656e69e43b0b.

* Apply suggestions from code review

---------

Signed-off-by: Zhai, Xuejun <xuejun.zhai@intel.com>
Co-authored-by: Evgenya Stepyreva <eva.my.link@gmail.com>
Co-authored-by: Evgenya Stepyreva <evgenya.stepyreva@intel.com>
---
 .../openvino/core/descriptor/tensor.hpp       |  4 +--
 src/core/src/descriptor/tensor.cpp            |  8 ++----
 src/core/src/runtime/host_tensor.cpp          |  5 ++--
 .../utils/shape_inference/shape_inference.cpp |  7 ++---
 .../single_layer_tests/convolution.cpp        |  5 +++-
 .../single_layer_tests/group_convolution.cpp  |  5 +++-
 .../template/backend/int_executable.cpp       | 28 +++++++++++--------
 7 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/src/core/include/openvino/core/descriptor/tensor.hpp b/src/core/include/openvino/core/descriptor/tensor.hpp
index 7aa5b225a8cda5..d264d568ab4db5 100644
--- a/src/core/include/openvino/core/descriptor/tensor.hpp
+++ b/src/core/include/openvino/core/descriptor/tensor.hpp
@@ -70,9 +70,6 @@ class OPENVINO_API Tensor {
     OPENVINO_DEPRECATED(
         "set_element_type() is deprecated. To change Tensor element type please change the Parameter type")
     void set_element_type(const element::Type& elemenet_type);
-    OPENVINO_DEPRECATED(
-        "set_partial_shape() is deprecated. To change Tensor partial shape please change the Parameter partial shape")
-    void set_partial_shape(const PartialShape& partial_shape);
 
     /// \brief sets lower bound value description
     void set_lower_value(const ov::Tensor& value);
@@ -148,6 +145,7 @@ class OPENVINO_API Tensor {
     friend OPENVINO_API std::string get_ov_tensor_legacy_name(const Tensor& tensor);
     friend OPENVINO_API void set_ov_tensor_legacy_name(Tensor& tensor, const std::string& tensor_name);
     friend class pass::ReverseShapeAndTypeInfer;
+    friend class ngraph::runtime::HostTensor;
 };
 
 OPENVINO_API
diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp
index ed87ce606bf21a..18a024835bdd39 100644
--- a/src/core/src/descriptor/tensor.cpp
+++ b/src/core/src/descriptor/tensor.cpp
@@ -38,17 +38,13 @@ ov::descriptor::Tensor::Tensor(const element::Type& element_type,
 OPENVINO_SUPPRESS_DEPRECATED_START
 void ov::descriptor::Tensor::set_tensor_type(const element::Type& element_type, const PartialShape& pshape) {
     set_element_type(element_type);
-    set_partial_shape(pshape);
+    m_partial_shape = pshape;
+    m_shape_changed = true;
 }
 
 void ov::descriptor::Tensor::set_element_type(const element::Type& element_type) {
     m_element_type = element_type;
 }
-
-void ov::descriptor::Tensor::set_partial_shape(const PartialShape& partial_shape) {
-    m_partial_shape = partial_shape;
-    m_shape_changed = true;
-}
 OPENVINO_SUPPRESS_DEPRECATED_END
 
 void ov::descriptor::Tensor::invalidate_values() {
diff --git a/src/core/src/runtime/host_tensor.cpp b/src/core/src/runtime/host_tensor.cpp
index 1a0b5248c23506..90bc88e968dc89 100644
--- a/src/core/src/runtime/host_tensor.cpp
+++ b/src/core/src/runtime/host_tensor.cpp
@@ -143,9 +143,8 @@ void runtime::HostTensor::set_shape(const Shape& shape) {
                  shape,
                  " must be compatible with the partial shape: ",
                  get_partial_shape());
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    m_descriptor->set_partial_shape(shape);
-    OPENVINO_SUPPRESS_DEPRECATED_END
+    m_descriptor->m_partial_shape = shape;
+    m_descriptor->m_shape_changed = true;
 }
 
 void runtime::HostTensor::set_unary(const HostTensorPtr& arg) {
diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
index 8b4342e3be3ba8..691350d9544679 100644
--- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp
@@ -233,13 +233,12 @@ class entryFallback : public entryBase {
             local_op = op->clone_with_new_inputs(new_inputs);
         } else {
             local_op = local_op_default;
-            OPENVINO_SUPPRESS_DEPRECATED_START
             for (size_t i = 0; i < local_op->get_input_size(); i++) {
-                if (dynamic_cast<ov::opset1::Parameter*>(local_op->get_input_node_ptr(i))) {
-                    local_op->get_input_tensor(i).set_partial_shape(input_shapes[i].to_partial_shape());
+                if (auto parameter = dynamic_cast<ov::opset1::Parameter*>(local_op->get_input_node_ptr(i))) {
+                    parameter->set_partial_shape(input_shapes[i].to_partial_shape());
+                    parameter->validate_and_infer_types();
                 }
             }
-            OPENVINO_SUPPRESS_DEPRECATED_END
         }
 
         local_op->validate_and_infer_types();
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp
index dae6945e49fd01..44dc9771e8ebae 100755
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp
@@ -145,7 +145,10 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUT
                     }
 
                     std::vector<ov::Shape> secondParameterShapes;
-                    opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
+                    if (auto parameter = dynamic_cast<ov::op::v0::Parameter*>(opToShapeInfer->get_input_node_ptr(0))) {
+                        parameter->set_partial_shape(targetShapes.front());
+                        parameter->validate_and_infer_types();
+                    }
                     opToShapeInfer->validate_and_infer_types();
                     targetShapes.push_back(opToShapeInfer->get_output_shape(0));
                 }
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp
index 7ee67d278cfcd5..41fba4387446b7 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp
@@ -142,7 +142,10 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface<groupCon
                     }
 
                     std::vector<ov::Shape> secondParameterShapes;
-                    opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
+                    if (auto parameter = dynamic_cast<ov::op::v0::Parameter*>(opToShapeInfer->get_input_node_ptr(0))) {
+                        parameter->set_partial_shape(targetShapes.front());
+                        parameter->validate_and_infer_types();
+                    }
                     opToShapeInfer->validate_and_infer_types();
                     targetShapes.push_back(opToShapeInfer->get_output_shape(0));
                 }
diff --git a/src/plugins/template/backend/int_executable.cpp b/src/plugins/template/backend/int_executable.cpp
index 6091a62e3f0973..481b0b6e6012cc 100644
--- a/src/plugins/template/backend/int_executable.cpp
+++ b/src/plugins/template/backend/int_executable.cpp
@@ -75,23 +75,27 @@ inline void update_output_tensors(ov::TensorVector& output_values, const ngraph:
 }  // namespace
 
 class TemporaryOverrideOutputs {
-    std::shared_ptr<ov::Node> node;
-    std::vector<ov::PartialShape> orig_shapes;
+    std::shared_ptr<ov::Model> model;
+    std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, ov::PartialShape> orig_paramter_shapes_map;
 
 public:
-    TemporaryOverrideOutputs(std::shared_ptr<ov::Node> node, const std::vector<ov::Tensor>& args) : node(node) {
-        for (size_t i = 0; i < args.size(); ++i) {
-            auto output = node->get_input_source_output(i);
-            orig_shapes.push_back(output.get_partial_shape());
-            output.get_tensor().set_partial_shape(args[i].get_shape());
+    TemporaryOverrideOutputs(std::shared_ptr<ov::Model>& model,
+                             const std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, ov::Tensor>& tensor_map)
+        : model(model) {
+        for (const auto& param : model->get_parameters()) {
+            auto output_tensor = param->output(0).get_tensor_ptr();
+            orig_paramter_shapes_map.insert({output_tensor, param->get_partial_shape()});
+            param->set_partial_shape(tensor_map.at(output_tensor).get_shape());
         }
+        model->validate_nodes_and_infer_types();
     }
 
     ~TemporaryOverrideOutputs() {
-        for (size_t i = 0; i < orig_shapes.size(); ++i) {
-            auto output = node->get_input_source_output(i);
-            output.get_tensor().set_partial_shape(orig_shapes[i]);
+        for (const auto& param : model->get_parameters()) {
+            auto output_tensor = param->output(0).get_tensor_ptr();
+            param->set_partial_shape(orig_paramter_shapes_map.at(output_tensor));
         }
+        model->validate_nodes_and_infer_types();
     }
 };
 
@@ -147,12 +151,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
             results_map.emplace(output, output_count);
     }
 
+    auto overrider = TemporaryOverrideOutputs(m_model, tensor_map);
+
     // for each ordered op in the graph
     for (const auto& op : m_nodes) {
         if (std::dynamic_pointer_cast<ov::op::v0::Parameter>(op)) {
             continue;
         }
-
         // get op inputs from map
         std::vector<ov::Tensor> op_inputs;
         for (auto input : op->inputs()) {
@@ -160,7 +165,6 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
             op_inputs.push_back(tensor_map.at(tensor));
         }
 
-        TemporaryOverrideOutputs overrider(op, op_inputs);
         OutputVector output_ports;
         for (size_t i = 0; i < op->inputs().size(); ++i) {
             output_ports.push_back(op->get_input_source_output(i));

From 6d82f360509e022ac1b872ec1cb7dd817cda9845 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Fri, 7 Apr 2023 09:18:27 +0400
Subject: [PATCH 285/296] Enable nop elimination for f16 type (#16749)

---
 src/common/transformations/src/transformations/utils/utils.cpp | 3 +++
 .../tests/common_optimizations/nop_elimination.cpp             | 1 +
 2 files changed, 4 insertions(+)

diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp
index 695e058fe921e4..95728c63c0f27b 100644
--- a/src/common/transformations/src/transformations/utils/utils.cpp
+++ b/src/common/transformations/src/transformations/utils/utils.cpp
@@ -271,6 +271,9 @@ bool can_eliminate_eltwise_node(const std::shared_ptr<Node>& eltwise,
     case element::f32:
         actual_const = reinterpret_cast<const float*>(data_ptr)[0];
         break;
+    case element::f16:
+        actual_const = reinterpret_cast<const ov::float16*>(data_ptr)[0];
+        break;
     case element::i32:
         actual_const = static_cast<float>(reinterpret_cast<const int32_t*>(data_ptr)[0]);
         break;
diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
index 8f8a20661de29a..0932709116a6bd 100644
--- a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
+++ b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
@@ -971,6 +971,7 @@ class EliminateEltwiseTests : public testing::WithParamInterface<EliminateEltwis
 
 std::vector<element::Type> types{
     element::f32,
+    element::f16,
     element::f64,
     element::i32,
     element::u32,

From 132b65797749d048824a09ce80cfebdb4e033786 Mon Sep 17 00:00:00 2001
From: Roman Lyamin <Roman.Lyamin@intel.com>
Date: Fri, 7 Apr 2023 09:19:05 +0400
Subject: [PATCH 286/296] [GPU] Added GatherND dynamic support and changed
 logic for empty tensor support (#16690)

---
 src/plugins/intel_gpu/src/graph/gather_nd.cpp | 35 +++++++++
 .../src/graph/impls/ocl/activation.cpp        |  1 -
 .../src/graph/impls/ocl/arg_max_min.cpp       |  1 -
 .../intel_gpu/src/graph/impls/ocl/border.cpp  |  1 -
 .../src/graph/impls/ocl/broadcast.cpp         |  1 -
 .../src/graph/impls/ocl/concatenation.cpp     |  1 -
 .../intel_gpu/src/graph/impls/ocl/crop.cpp    |  1 -
 .../intel_gpu/src/graph/impls/ocl/cum_sum.cpp |  1 -
 .../intel_gpu/src/graph/impls/ocl/eltwise.cpp |  1 -
 .../src/graph/impls/ocl/fully_connected.cpp   |  1 -
 .../intel_gpu/src/graph/impls/ocl/gather.cpp  |  1 -
 .../src/graph/impls/ocl/gather_elements.cpp   |  1 -
 .../intel_gpu/src/graph/impls/ocl/gemm.cpp    |  1 -
 .../intel_gpu/src/graph/impls/ocl/mvn.cpp     |  1 -
 .../src/graph/impls/ocl/non_zero.cpp          |  2 -
 .../intel_gpu/src/graph/impls/ocl/permute.cpp |  1 -
 .../src/graph/impls/ocl/primitive_base.hpp    |  7 --
 .../src/graph/impls/ocl/quantize.cpp          |  1 -
 .../intel_gpu/src/graph/impls/ocl/range.cpp   |  1 -
 .../intel_gpu/src/graph/impls/ocl/reduce.cpp  |  1 -
 .../intel_gpu/src/graph/impls/ocl/reorder.cpp |  1 -
 .../src/graph/impls/ocl/scatter_nd_update.cpp |  1 -
 .../src/graph/impls/ocl/scatter_update.cpp    |  1 -
 .../intel_gpu/src/graph/impls/ocl/select.cpp  |  1 -
 .../src/graph/impls/ocl/shape_of.cpp          |  1 -
 .../intel_gpu/src/graph/impls/ocl/softmax.cpp |  1 -
 .../src/graph/impls/ocl/strided_slice.cpp     |  1 -
 .../intel_gpu/src/graph/impls/ocl/tile.cpp    |  1 -
 .../src/graph/include/gather_nd_inst.h        |  2 +
 src/plugins/intel_gpu/src/graph/non_zero.cpp  | 10 +--
 .../kernel_selector/kernel_base_opencl.cpp    |  2 +-
 .../src/kernel_selector/kernel_selector.cpp   |  9 ---
 .../kernel_selector/kernel_selector_common.h  | 22 ++++++
 .../activation/activation_kernel_base.cpp     |  1 +
 .../arg_max_min/arg_max_min_kernel_axis.cpp   |  1 +
 .../arg_max_min/arg_max_min_kernel_base.cpp   |  1 +
 .../kernels/border/border_kernel_base.cpp     |  7 ++
 .../kernels/border/border_kernel_base.h       |  1 +
 .../broadcast/broadcast_kernel_base.cpp       |  3 +-
 .../concatenation_kernel_base.cpp             |  6 ++
 .../concatenation/concatenation_kernel_base.h |  1 +
 .../concatenation_kernel_fs_b_yx_fsv32.cpp    |  1 +
 .../kernels/cum_sum/cum_sum_kernel_base.cpp   |  1 +
 .../kernels/eltwise/eltwise_kernel_base.cpp   |  1 +
 .../fully_connected_kernel_base.cpp           |  1 +
 .../gather/gather_elements_kernel_ref.cpp     |  1 +
 .../kernels/gather/gather_kernel_ref.cpp      |  1 +
 .../kernels/gemm/gemm_kernel_base.cpp         |  1 +
 .../kernels/mvn/mvn_kernel_base.cpp           |  1 +
 .../non_zero/count_nonzero_kernel_ref.cpp     |  1 +
 .../non_zero/gather_nonzero_kernel_ref.cpp    |  1 +
 .../kernels/permute/permute_kernel_base.cpp   |  1 +
 .../kernels/quantize/quantize_kernel_base.cpp |  1 +
 .../kernels/range/range_kernel_ref.cpp        |  1 +
 .../kernels/reduce/reduce_kernel_base.cpp     |  1 +
 .../kernels/reorder/reorder_kernel_base.cpp   |  1 +
 .../scatter_nd_update_kernel_ref.cpp          |  1 +
 .../scatter_update_kernel_ref.cpp             |  1 +
 .../kernels/select/select_kernel_base.cpp     |  1 +
 .../kernels/shape_of/shape_of_kernel_ref.cpp  |  8 +-
 .../kernels/shape_of/shape_of_kernel_ref.h    |  1 +
 .../kernels/softmax/softmax_kernel_bf.cpp     |  1 +
 .../kernels/softmax/softmax_kernel_ref.cpp    |  1 +
 .../strided_slice_kernel_ref.cpp              |  1 +
 .../kernels/tile/tile_kernel_ref.cpp          |  1 +
 .../intel_gpu/src/runtime/ocl/ocl_stream.cpp  |  4 +-
 .../tests/shape_infer/gather_nd_si_test.cpp   | 74 +++++++++++++++++++
 .../test_cases/empty_tensor_gpu_test.cpp      |  1 -
 68 files changed, 191 insertions(+), 54 deletions(-)
 create mode 100644 src/plugins/intel_gpu/tests/shape_infer/gather_nd_si_test.cpp

diff --git a/src/plugins/intel_gpu/src/graph/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/gather_nd.cpp
index 06c92f8295f12d..09448dbb8a8555 100644
--- a/src/plugins/intel_gpu/src/graph/gather_nd.cpp
+++ b/src/plugins/intel_gpu/src/graph/gather_nd.cpp
@@ -3,6 +3,7 @@
 //
 
 #include "gather_nd_inst.h"
+#include "gather_nd_shape_inference.hpp"
 
 #include "primitive_type_base.h"
 #include "json_object.h"
@@ -70,6 +71,40 @@ layout gather_nd_inst::calc_output_layout(gather_nd_node const& node, kernel_imp
     return layout(input_layout_origin.data_type, output_format, output_sizes_tensor, padding);
 }
 
+
+template<typename ShapeType>
+std::vector<layout> gather_nd_inst::calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param) {
+    auto desc = impl_param.typed_desc<gather_nd>();
+
+    auto input_layout = impl_param.get_input_layout(0);
+    auto indices_layout = impl_param.get_input_layout(1);
+
+    auto output_type = input_layout.data_type;
+    if (impl_param.has_fused_primitives()) {
+        output_type = impl_param.get_fused_output_layout().data_type;
+    }
+
+    std::vector<ShapeType> output_shapes = {ShapeType()};
+    std::vector<ShapeType> input_shapes = {
+        input_layout.get<ShapeType>(),
+        indices_layout.get<ShapeType>()
+    };
+
+    if (desc->batch_merged_output) {
+        ov::op::v5::GatherND op;
+        op.set_batch_dims(desc->batch_dims);
+        ov::op::v5::shape_infer(&op, input_shapes, output_shapes);
+    } else {
+        ov::op::v8::GatherND op;
+        op.set_batch_dims(desc->batch_dims);
+        ov::op::v8::shape_infer(&op, input_shapes, output_shapes);
+    }
+
+    format output_format = format::adjust_to_rank(input_layout.format, output_shapes[0].size());
+
+    return { layout{output_shapes[0], output_type, output_format} };
+}
+
 std::string gather_nd_inst::to_string(gather_nd_node const& node) {
     auto desc = node.get_primitive();
     auto node_info = node.desc_to_json();
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
index 4f219b18a345ee..14d5958f3071af 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
@@ -66,7 +66,6 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
index 094c3194cc7caa..8ca2f251a8f6db 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
@@ -114,7 +114,6 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp
index 46c2f4722ce2bd..93e981e2de3767 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp
@@ -106,7 +106,6 @@ struct border_impl : typed_primitive_impl_ocl<border> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
index 520527be680d02..667cce6a68b3ee 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp
@@ -118,7 +118,6 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp
index 2280b00c76f2bd..3e9346d2b96885 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp
@@ -76,7 +76,6 @@ struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
index 93eedcca9fe3c9..a32cf563d21dcc 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp
@@ -49,7 +49,6 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
                     "[GPU] Scalar field for runtime offset is not added for crop shape agnostic impl");
             _kernel_data.kernels[0].params.scalars[0] = s;
             (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-            update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp
index f3b95dc218d186..891a39c8667b86 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp
@@ -72,7 +72,6 @@ struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
index 7159f38956f0c1..186259d1000e61 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp
@@ -162,7 +162,6 @@ struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
index f1c4691195e81e..f9e47eaa96bb48 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
@@ -116,7 +116,6 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
index 0995e890ee7dbb..4dd4a14abfe544 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
@@ -126,7 +126,6 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp
index 1ee7c44f0eeaae..226170c1cb3320 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp
@@ -69,7 +69,6 @@ struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
        auto kernel_params = get_kernel_params(impl_param, true);
        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-       update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
index 93bf1b32dd05eb..41e9bf92fa440b 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp
@@ -75,7 +75,6 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp
index 1223f7321576a8..bf235e205693af 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp
@@ -41,7 +41,6 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp
index 0c0a904847be3b..38788361cb011a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp
@@ -34,7 +34,6 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
@@ -62,7 +61,6 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp
index b853d89823d5da..eea53736ab5f7f 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp
@@ -65,7 +65,6 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 06ba742d83a359..dc183f7f272adc 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -283,13 +283,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         }
     }
 
-    void update_kernels_list_to_skip() {
-        for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
-            auto gws = _kernel_data.kernels[i].params.workGroups.global;
-            _kernel_data.kernels[i].skip_execution = (std::accumulate(gws.begin(), gws.end(), static_cast<size_t>(1), std::multiplies<size_t>()) == 0);
-        }
-    }
-
     void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) override {
         if (is_cpu())
             return;
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
index 836097e9ae6d4d..671622ff627436 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
@@ -92,7 +92,6 @@ struct quantize_impl : typed_primitive_impl_ocl<quantize> {
         const auto& output_layout = impl_param.get_output_layout();
         quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
         (_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp
index 56e43bd969d614..6d6116e6b41205 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp
@@ -35,7 +35,6 @@ struct range_impl : typed_primitive_impl_ocl<range> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
        auto kernel_params = get_kernel_params(impl_param, true);
        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-       update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp
index 7bfda772ac1307..eb5503073d3dde 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp
@@ -87,7 +87,6 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
index 7528c7c178c897..47661b13b91418 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
@@ -107,7 +107,6 @@ struct reorder_impl : typed_primitive_impl_ocl<reorder> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp
index a05c8a3d8cdeb6..be35f7286f0c20 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp
@@ -39,7 +39,6 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
        auto kernel_params = get_kernel_params(impl_param, true);
        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp
index 26790d403ff0e3..4139ea67531792 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp
@@ -64,7 +64,6 @@ struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
        auto kernel_params = get_kernel_params(impl_param, true);
        (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
index 8dcc1db1a96bc0..ab4e59b8045073 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp
@@ -55,7 +55,6 @@ struct select_impl : typed_primitive_impl_ocl<select> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
index 9a5d47246bbdfa..390b146c701c75 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/shape_of.cpp
@@ -53,7 +53,6 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp
index 0c2e20d8e4ad0d..eb48966594102f 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp
@@ -59,7 +59,6 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
index e8d55ad6ef10ce..44564add921b26 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
@@ -185,7 +185,6 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp
index f85e6841be3ea6..03d7216e286694 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp
@@ -46,7 +46,6 @@ struct tile_impl : typed_primitive_impl_ocl<tile> {
     void update_dispatch_data(const kernel_impl_params& impl_param) override {
         auto kernel_params = get_kernel_params(impl_param, true);
         (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
-        update_kernels_list_to_skip();
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h
index 096ae6ae219097..f664fca39f6a20 100644
--- a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h
@@ -17,6 +17,8 @@ class typed_primitive_inst<gather_nd> : public typed_primitive_inst_base<gather_
     using parent::parent;
 
 public:
+    template<typename ShapeType>
+    static std::vector<layout> calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param);
     static layout calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param);
     static std::string to_string(gather_nd_node const& node);
 
diff --git a/src/plugins/intel_gpu/src/graph/non_zero.cpp b/src/plugins/intel_gpu/src/graph/non_zero.cpp
index c181835ab9e2bb..9697e22e719d98 100644
--- a/src/plugins/intel_gpu/src/graph/non_zero.cpp
+++ b/src/plugins/intel_gpu/src/graph/non_zero.cpp
@@ -71,17 +71,15 @@ std::vector<layout> gather_nonzero_inst::calc_output_layouts(gather_nonzero_node
     auto desc = impl_param.typed_desc<gather_nonzero>();
     assert(static_cast<bool>(desc->output_data_types[0]) == false &&
            "Output data type forcing is not supported for gather_nonzero_node!");
+
+    auto rank = impl_param.get_input_layout(0).get<ShapeType>().rank().get_length();
     if (impl_param.memory_deps.count(1)) {
         auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream());
         // output shape of nonzero is [input_rank, count_non_zero]
-        auto rank = static_cast<size_t>(impl_param.get_input_layout(0).get<ShapeType>().rank().get_length());
-        auto count = static_cast<size_t>(out_size[0]);
-        ov::Shape output_shape({rank, count});
-        ov::PartialShape output_pshape(output_shape);
-        auto out_layout = layout{output_pshape, cldnn::data_types::i32, cldnn::format::bfyx};
+        auto out_layout = layout{{rank, out_size[0]}, cldnn::data_types::i32, cldnn::format::bfyx};
         return {out_layout};
     } else {
-        return {layout{ov::PartialShape({ov::Dimension::dynamic(), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
+        return {layout{ov::PartialShape({ov::Dimension(rank), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp
index bd33f857d18ee3..d0d052b44c4ed3 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp
@@ -212,7 +212,7 @@ void KernelBaseOpenCL::FillCLKernelData(clKernelData& kernel,
                                         uint32_t number_of_inputs_for_fused_prims,
                                         int number_of_outputs,
                                         bool is_dynamic) const {
-    if (!is_dynamic)
+    if (!is_dynamic && !kernel.skip_execution)
         KernelBase::CheckDispatchData(kernelMapName, dispatchData, engine_info.maxWorkGroupSize);
     kernel.code.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode);
     kernel.params.workGroups.global = dispatchData.gws;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
index eb6c04765d943c..021ff5d907111a 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
@@ -86,15 +86,6 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls
             if (kds.size() && kds[0].kernels.size()) {
                 kernelsData = kds;
                 kernelName = implementation->GetName();
-                if (!params.is_shape_agnostic) {
-                    for (size_t k = 0; k < kds[0].kernels.size(); ++k) {
-                        auto gws = kds[0].kernels[k].params.workGroups.global;
-                        kernelsData[0].kernels[k].skip_execution = (std::accumulate(gws.begin(),
-                                                                                    gws.end(),
-                                                                                    static_cast<size_t>(1),
-                                                                                    std::multiplies<size_t>()) == 0);
-                    }
-                }
                 break;
             }
         } catch (std::runtime_error& ex) {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h
index e2331cfbb702c0..f01b4634a9a8d0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h
@@ -114,6 +114,24 @@ struct KernelData {
 
     bool can_reuse_memory = true;
 
+    static bool SkipKernelExecution(const base_params& params, size_t kernel_id = 0) {
+        for (const auto& input : params.inputs) {
+            if (input.LogicalSize() == 0) {
+                return true;
+            }
+        }
+        for (const auto& output : params.outputs) {
+            if (output.LogicalSize() == 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    static bool SkipKernelExecution(const Params& params, size_t kernel_id = 0) {
+        return false;
+    }
+
     template <typename T>
     inline static KernelData Default(const Params& _params, size_t kernel_nums = 1) {
         KernelData kd;
@@ -124,6 +142,10 @@ struct KernelData {
         kd.reorderInput = false;  // for KW
         kd.autoTuneIndex = -1;
         kd.can_reuse_memory = true;
+
+        for (auto& kernel : kd.kernels) {
+            kernel.skip_execution = SkipKernelExecution(orgParams);
+        }
         return kd;
     }
 };
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/activation/activation_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/activation/activation_kernel_base.cpp
index 28a2fff66f1578..db267fe3af9b9b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/activation/activation_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/activation/activation_kernel_base.cpp
@@ -99,6 +99,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
index 5e310440a32dd3..e438e7d0ba04ff 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -128,6 +128,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
 
         const size_t elem_size = prim_params.inputs[0].ElementSize();
         const size_t iav_type_size = elem_size + 4;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
index 59af1a8712cd5e..9354d5f0d9d698 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp
@@ -51,6 +51,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto cldnn_jit = GetJitConstants(orgParams);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.cpp
index fe5fd4180716c2..83996379e78e60 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.cpp
@@ -60,6 +60,10 @@ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params&
     return dispatchData;
 }
 
+bool BorderKernelBase::SkipKernelExecution(const border_params& params) const {
+    return params.outputs[0].LogicalSize() == 0;
+}
+
 KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
                                                    const optional_params& options) const {
     assert(params.GetType() == KernelType::BORDER);
@@ -75,6 +79,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
     };
 
     auto cldnn_jit = GetJitConstants(prim_params);
@@ -82,6 +87,8 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
     auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
     auto& kernel = k_data.kernels[0];
+    kernel.skip_execution = SkipKernelExecution(prim_params);
+
     FillCLKernelData(kernel,
                      dispatchData,
                      params.engineInfo,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h
index 2c5a13017cf452..0b7b27a7a533c8 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h
@@ -50,5 +50,6 @@ class BorderKernelBase : public KernelBaseOpenCL {
     JitConstants GetJitConstants(const border_params& params) const;
     DispatchData SetDefault(const border_params& params) const;
     KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
+    bool SkipKernelExecution(const border_params& params) const;
 };
 }  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
index 141346a5a0c721..858b6eeb8d5b2f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp
@@ -1,4 +1,4 @@
-﻿// Copyright (C) 2018-2023 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -91,6 +91,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto cldnn_jit = GetJitConstants(prim_params);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.cpp
index cb3ca82690a1e0..12aebd30109569 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.cpp
@@ -109,6 +109,10 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
     return dispatchData;
 }
 
+bool ConcatenationKernelBase::SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const {
+    return params.inputs[kernel_id].LogicalSize() == 0;
+}
+
 KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
     if (!Validate(params, options)) {
         return {};
@@ -136,6 +140,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
             DispatchData dispatchData = SetDefault(newParams);
             kernel.params.workGroups.global = dispatchData.gws;
             kernel.params.workGroups.local = dispatchData.lws;
+            kernel.skip_execution = SkipKernelExecution(prim_params, i);
 
             ScalarDescriptor s;
             s.t = ScalarDescriptor::Types::UINT32;
@@ -173,6 +178,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
         kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
         kernel.params.workGroups.global = dispatchData.gws;
         kernel.params.workGroups.local = dispatchData.lws;
+        kernel.skip_execution = SkipKernelExecution(newParams, i);
         if (is_dynamic) {
             kernel.params.arguments.push_back({ArgumentDescriptor::Types::SHAPE_INFO, 0});
         }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.h
index 77511ea135f51e..5274047a024f53 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_base.h
@@ -68,5 +68,6 @@ class ConcatenationKernelBase : public KernelBaseOpenCL {
     virtual size_t GetAlignment(const concatenation_params& /*params*/) const {
         return 1;
     }
+    bool SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const;
 };
 }  // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp
index a4d28fdd4c3d5b..3fb0fd01b35588 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp
@@ -114,6 +114,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
         kernel.params.workGroups.local = dispatchData.lws;
         kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i});
         kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
+        kernel.skip_execution = SkipKernelExecution(newParams, i);
 
         ScalarDescriptor s;
         s.t = ScalarDescriptor::Types::UINT32;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_base.cpp
index 6a8a50730bd2fd..9eb52daaefb680 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_base.cpp
@@ -86,6 +86,7 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
index 2681091ef3f819..f0a7d61a7f2400 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_base.cpp
@@ -714,6 +714,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     DispatchData dispatchData = SetDefault(newParams);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
index 6ab305de21f186..7061add4b6cda3 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -66,6 +66,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params &params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
     fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp
index 0a54ffe60d89b6..0c987987074794 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp
@@ -169,6 +169,7 @@ KernelsData GatherElementsKernelRef::GetKernelsData(const Params& params, const
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
index 25d3517a4ac838..923cb63ceeac1b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
@@ -283,6 +283,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     FillCLKernelData(kernel,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
index e8790df0a055f5..2e72409ac86871 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp
@@ -51,6 +51,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
     auto cldnn_jit = GetJitConstants(prim_params);
     auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_base.cpp
index 8b9864c10a8be0..50a9d6feb676db 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/mvn/mvn_kernel_base.cpp
@@ -71,6 +71,7 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
index cad3fad04c1b96..778288d7398f55 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/count_nonzero_kernel_ref.cpp
@@ -81,6 +81,7 @@ KernelsData CountNonzeroKernelRef::GetKernelsData(const Params& params, const op
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     // In case of count-nonzero, the output shape is static unconditionally,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
index a747f7c60271cc..0c7a85bd2e7583 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_zero/gather_nonzero_kernel_ref.cpp
@@ -85,6 +85,7 @@ KernelsData GatherNonzeroKernelRef::GetKernelsData(const Params& params, const o
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     FillCLKernelData(kernel,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_base.cpp
index 8a5ff548d4cbbc..00045210d65095 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/permute/permute_kernel_base.cpp
@@ -47,6 +47,7 @@ KernelsData PermuteKernelBase::GetKernelsData(const Params& params, const option
         OPENVINO_ASSERT(kernel_data.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kernel_data.kernels[0].params.workGroups.global = dispatchData.gws;
         kernel_data.kernels[0].params.workGroups.local = dispatchData.lws;
+        kernel_data.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp
index 19a3e0ee7fc501..383641a7414cc1 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp
@@ -72,6 +72,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp
index b2b4f7fc4abdbc..70afd02cbcbe27 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp
@@ -40,6 +40,7 @@ KernelsData RangeKernelRef::GetKernelsData(const Params &params, const optional_
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto &clKernelData = kernel_data.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
index aa1656230c444c..ac4abfc651d062 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reduce/reduce_kernel_base.cpp
@@ -234,6 +234,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp
index 119d1905319018..ca1c59836deb70 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp
@@ -239,6 +239,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
index 71d1bc061027a0..b607bfb6ab569a 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
@@ -169,6 +169,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
             auto dispatchData = SetDefault(prim_params, (i == 1));
             kd.kernels[i].params.workGroups.global = dispatchData.gws;
             kd.kernels[i].params.workGroups.local = dispatchData.lws;
+            kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
         }
     };
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
index 17fc2347bf7bca..5ab01a18fa968d 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp
@@ -331,6 +331,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
             auto dispatchData = SetDefault(prim_params, i == 1);
             kd.kernels[i].params.workGroups.global = dispatchData.gws;
             kd.kernels[i].params.workGroups.local = dispatchData.lws;
+            kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
         }
     };
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/select/select_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/select/select_kernel_base.cpp
index 94b7a83e37ca33..0cc48e0bd706e2 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/select/select_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/select/select_kernel_base.cpp
@@ -129,6 +129,7 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.cpp
index 307ed364523afc..1730316ca2b131 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.cpp
@@ -28,6 +28,10 @@ JitConstants ShapeOfKernelRef::GetJitConstants(const shape_of_params& params) co
     return jit;
 }
 
+bool ShapeOfKernelRef::SkipKernelExecution(const shape_of_params& params) const {
+    return false;
+}
+
 KernelsData ShapeOfKernelRef::GetKernelsData(const Params &params, const optional_params &options) const {
     KernelsData kernels_data;
     if (!Validate(params, options))
@@ -40,13 +44,15 @@ KernelsData ShapeOfKernelRef::GetKernelsData(const Params &params, const optiona
     auto jit_constants = GetJitConstants(derived_params);
     auto jit = CreateJit(kernelName, jit_constants, entry_point);
     auto &clKernelData = kernel_data.kernels[0];
+    clKernelData.skip_execution = SkipKernelExecution(derived_params);
 
-    kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
+    kernel_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
         const auto& prim_params = static_cast<const shape_of_params&>(params);
         auto dispatchData = SetDefault(prim_params);
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
     };
 
     FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point, EXE_MODE_DEFAULT,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.h
index 39ef2a55c6a398..bbf0a39b1ca854 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/shape_of/shape_of_kernel_ref.h
@@ -29,6 +29,7 @@ class ShapeOfKernelRef: public KernelBaseOpenCL {
     ParamsKey GetSupportedKey() const override;
     bool Validate(const Params &p, const optional_params &o) const override;
     virtual JitConstants GetJitConstants(const shape_of_params& params) const;
+    bool SkipKernelExecution(const shape_of_params& params) const;
 public:
     ShapeOfKernelRef() :
         KernelBaseOpenCL { "shape_of_ref" } {
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
index fceaf65969a3e0..c34cb4b4196c25 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp
@@ -86,6 +86,7 @@ KernelsData SoftmaxKernel_bf::GetKernelsData(const Params& params, const optiona
             OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
             kd.kernels[0].params.workGroups.global = dispatchData.gws;
             kd.kernels[0].params.workGroups.local = dispatchData.lws;
+            kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
         };
     }
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_ref.cpp
index c3653555f31307..5f99bc71fbabdd 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_ref.cpp
@@ -56,6 +56,7 @@ KernelsData SoftmaxKernelRef::GetKernelsData(const Params& params, const optiona
             OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
             kd.kernels[0].params.workGroups.global = dispatchData.gws;
             kd.kernels[0].params.workGroups.local = dispatchData.lws;
+            kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
             kd.internalBufferSizes.clear();
             kd.internalBufferSizes.push_back(prim_params.inputs[0].PhysicalSizeInBytes());
             kd.internalBufferDataType = prim_params.inputs[0].GetDType();
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
index 99792cd8abf2d0..8c64249302c85f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
@@ -229,6 +229,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp
index d558b0836d2e20..831f6ebc1ec78b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp
@@ -67,6 +67,7 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
         OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
         kd.kernels[0].params.workGroups.global = dispatchData.gws;
         kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
     };
 
     auto& kernel = kd.kernels[0];
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index 7bdc10d4121cfc..fe3f6733863ba4 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -187,7 +187,9 @@ void set_arguments_impl(ocl_kernel_type& kernel,
         }
 
         if (status != CL_SUCCESS) {
-            throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
+            throw std::runtime_error("Error set arg " + std::to_string(i)
+                                     + ", kernel: " + kernel.getInfo<CL_KERNEL_FUNCTION_NAME>()
+                                     + ", error code: " + std::to_string(status) + "\n");
         }
     }
 }
diff --git a/src/plugins/intel_gpu/tests/shape_infer/gather_nd_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/gather_nd_si_test.cpp
new file mode 100644
index 00000000000000..f9720000c1ad2c
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/shape_infer/gather_nd_si_test.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/gather_nd.hpp>
+
+#include "gather_nd_inst.h"
+#include "program_wrapper.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+namespace shape_infer_tests {
+
+struct gather_nd_test_params {
+    layout in0_layout;
+    layout in1_layout;
+    int64_t batch_dim;
+    bool batch_merged_output;
+    layout expected_layout;
+};
+
+class gather_nd_test : public testing::TestWithParam<gather_nd_test_params> {};
+
+TEST_P(gather_nd_test, shape_infer) {
+    auto p = GetParam();
+
+    auto& engine = get_test_engine();
+
+    auto input0_layout_prim = std::make_shared<input_layout>("input0", p.in0_layout);
+    auto input1_layout_prim = std::make_shared<input_layout>("input1", p.in1_layout);
+    uint8_t input_rank = static_cast<uint8_t>(p.in0_layout.get_partial_shape().size());
+    uint8_t indices_rank = static_cast<uint8_t>(p.in1_layout.get_partial_shape().size());
+    auto gather_nd_prim = std::make_shared<gather_nd>("output", input_info("input0"), input_info("input1"),
+                                                      input_rank, indices_rank, p.batch_dim, p.batch_merged_output);
+    cldnn::program prog(engine);
+
+    auto& input0_layout_node = prog.get_or_create(input0_layout_prim);
+    auto& input1_layout_node = prog.get_or_create(input1_layout_prim);
+    auto& gather_nd_node = prog.get_or_create(gather_nd_prim);
+    program_wrapper::add_connection(prog, input0_layout_node, gather_nd_node);
+    program_wrapper::add_connection(prog, input1_layout_node, gather_nd_node);
+    auto res = gather_nd_inst::calc_output_layouts<ov::PartialShape>(gather_nd_node, *gather_nd_node.get_kernel_impl_params());
+
+    ASSERT_EQ(res.size(), 1);
+    ASSERT_EQ(res[0], p.expected_layout);
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke, gather_nd_test,
+    testing::ValuesIn(std::vector<gather_nd_test_params>{
+        {
+            layout{ov::PartialShape{1000, 256, 10, 15}, data_types::f32, format::bfyx},
+            layout{ov::PartialShape{25, 125, 3}, data_types::f32, format::bfyx},
+            0, false,
+            layout{ov::PartialShape{25, 125, 15}, data_types::f32, format::bfyx}
+        },
+        {
+            layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
+            layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
+            2, false,
+            layout{ov::PartialShape{30, 2, 3, 35}, data_types::f32, format::bfyx}
+        },
+        {
+            layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
+            layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
+            2, true,
+            layout{ov::PartialShape{60, 3, 35}, data_types::f32, format::bfyx}
+        }
+    }));
+
+}  // shape_infer_tests
diff --git a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
index ee39a7b6cd4bc4..f014d25e48196d 100644
--- a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp
@@ -32,7 +32,6 @@ TEST_P(test_empty_tensor, concat_two_inputs) {
     auto nonzero_input_mem = engine.allocate_memory(p.nonzero_input_layout);
     auto concat_data_mem = engine.allocate_memory(p.concat_input_layout);
 
-
     std::vector<int32_t> concat_another_input_data = generate_random_1d<int32_t>(p.concat_input_layout.count(), 0, 100);
 
     set_values(concat_data_mem, concat_another_input_data);

From 06cacfe2a70d6ff5854f09be8167c5f1fda18bbf Mon Sep 17 00:00:00 2001
From: Yuan Hu <yuan2.hu@intel.com>
Date: Fri, 7 Apr 2023 14:18:58 +0800
Subject: [PATCH 287/296] Revert "[CPU] optimize shape infer of Reshape
 (#16537)" (#16703)

This reverts commit 75c62ea320418c302e0571ac6d8a9164994c299e.
---
 src/plugins/intel_cpu/src/nodes/reshape.cpp   | 189 +-----------------
 .../single_layer_tests/shape_ops.cpp          |  40 +---
 2 files changed, 10 insertions(+), 219 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp
index 2398140996a7a7..f9cb43ab7a677c 100644
--- a/src/plugins/intel_cpu/src/nodes/reshape.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "reshape.h"
-#include "utils.hpp"
 #include <string>
 #include <dnnl_types.h>
 #include <dnnl_extension_utils.h>
@@ -11,7 +10,6 @@
 #include <ie_ngraph_utils.hpp>
 #include <utils/shape_inference/static_shape.hpp>
 #include <utils/shape_inference/shape_inference.hpp>
-#include "utils/shape_inference/shape_inference_cpu.hpp"
 
 #include "common/cpu_memcpy.h"
 
@@ -36,193 +34,8 @@ bool Reshape::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op
     return true;
 }
 
-namespace {
-class ReshapeShapeInfer : public ShapeInferEmptyPads {
-public:
-    ReshapeShapeInfer(bool specialZero) : m_specialZero(specialZero) {}
-    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
-                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
-        static constexpr size_t RESHAPE_SRC = 0, RESHAPE_PATTERN = 1;
-        const auto& inputShape = input_shapes[RESHAPE_SRC].get();
-        const size_t inputShapeSize = inputShape.size();
-        const auto memPtr = data_dependency.at(RESHAPE_PATTERN);
-        const auto data = memPtr->GetPtr();
-        // const auto outputPatternSize = shape_size(ov::Shape(memPtr->getStaticDims()));
-        const auto& dims = memPtr->getStaticDims();
-        const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
-        std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
-                                              InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
-                                              data,
-                                              outputPatternSize,
-                                              ov::util::Cast<int64_t>());
-        VectorDims outputShape(outputPatternSize);
-        size_t outputProduct(1);
-        int32_t minusOneIdx = -1;
-        int32_t minusOneCount = 0;
-        for (size_t i = 0; i < outputPatternSize; ++i) {
-            if (outPattern[i] == 0 && m_specialZero && i < inputShapeSize) {
-                outputShape[i] = inputShape[i];
-                outputProduct *= outputShape[i];
-            } else if (outPattern[i] == -1) {
-                minusOneIdx = i;
-                minusOneCount++;
-            } else {
-                outputShape[i] = outPattern[i];
-                outputProduct *= outputShape[i];
-            }
-        }
-        size_t inputProduct(1);
-        for (size_t i = 0; i < inputShapeSize; ++i) {
-            inputProduct *= inputShape[i];
-        }
-        if (outputProduct != 0 && minusOneIdx >= 0) {
-            outputShape[minusOneIdx] = inputProduct / outputProduct;
-            outputProduct *= outputShape[minusOneIdx];
-        }
-        if (minusOneCount > 1  || inputProduct != outputProduct) {
-            IE_THROW(Unexpected) << "[cpu]reshape: the shape of input data conflicts with the reshape pattern";
-        }
-        return {{std::move(outputShape)}, ShapeInferStatus::success};
-    }
-    port_mask_t get_port_mask() const override {
-        return PortMask(1);
-    }
-
-private:
-    bool m_specialZero;
-};
-
-class SqueezeShapeInfer : public ShapeInferEmptyPads {
-public:
-    SqueezeShapeInfer() {}
-    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
-                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
-        static constexpr size_t SQUEEZE_SRC = 0, SQUEEZE_PATTERN = 1;
-        const auto& inputShape = input_shapes[SQUEEZE_SRC].get();
-        const size_t inputShapeSize = inputShape.size();
-        auto itr = data_dependency.find(SQUEEZE_PATTERN);
-        VectorDims outputShape;
-        if (itr != data_dependency.end()) {
-            const auto memPtr = data_dependency.at(SQUEEZE_PATTERN);
-            const auto data = memPtr->GetPtr();
-            const auto& dims = memPtr->getStaticDims();
-            const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
-            std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
-                                                  InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
-                                                  data,
-                                                  outputPatternSize,
-                                                  ov::util::Cast<int64_t>());
-            std::vector<bool> removeMask(inputShapeSize, false);
-            bool existError = false;
-            for (size_t i = 0; i < outputPatternSize; i++) {
-                if (outPattern[i] < 0) {
-                    outPattern[i] = inputShapeSize + outPattern[i];
-                }
-                if (outPattern[i] >= 0 && outPattern[i] < static_cast<int64_t>(inputShapeSize)) {
-                    removeMask[outPattern[i]] = true;
-                } else {
-                    existError = true;
-                    break;
-                }
-            }
-            for (size_t i = 0; i < inputShapeSize; i++) {
-                if (!removeMask[i]) {
-                    outputShape.push_back(inputShape[i]);
-                } else if (inputShape[i] != 1) {
-                    existError = true;
-                    break;
-                }
-            }
-            if (existError) {
-                IE_THROW(Unexpected) << "[cpu]squeeze: the shape of input data conflict with the squeeze pattern";
-            }
-        } else {
-            for (size_t i = 0; i < inputShapeSize; i++) {
-                if (inputShape[i] != 1) {
-                    outputShape.push_back(inputShape[i]);
-                }
-            }
-        }
-        return {{std::move(outputShape)}, ShapeInferStatus::success};
-    }
-    port_mask_t get_port_mask() const override {
-        return PortMask(1);
-    }
-};
-
-class UnsqueezeShapeInfer : public ShapeInferEmptyPads {
-public:
-    UnsqueezeShapeInfer() {}
-    Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
-                 const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
-        static constexpr size_t UNSQUEEZE_SRC = 0, UNSQUEEZE_PATTERN = 1;
-        const auto& inputShape = input_shapes[UNSQUEEZE_SRC].get();
-        const size_t inputShapeSize = inputShape.size();
-        const auto memPtr = data_dependency.at(UNSQUEEZE_PATTERN);
-        const auto data = memPtr->GetPtr();
-        const auto& dims = memPtr->getStaticDims();
-        const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<Dim>());
-        std::vector<int64_t> outPattern = ov::get_raw_data_as<int64_t>(
-                                              InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()),
-                                              data,
-                                              outputPatternSize,
-                                              ov::util::Cast<int64_t>());
-        size_t outputShapeSize = inputShapeSize + outputPatternSize;
-        VectorDims outputShape(outputShapeSize, 0);
-        bool existError = false;
-        for (size_t i = 0; i < outputPatternSize; i++) {
-            if (outPattern[i] < 0) {
-                outPattern[i] = outputShapeSize + outPattern[i];
-            }
-            if (outPattern[i] >= 0 && outPattern[i] < static_cast<int64_t>(outputShapeSize)) {
-                outputShape[outPattern[i]] = 1;
-            } else {
-                existError = true;
-                break;
-            }
-        }
-        for (size_t i = 0, y = 0; i < outputShapeSize; i++) {
-            if (outputShape[i] == 0) {
-                if (y < inputShapeSize) {
-                    outputShape[i] = inputShape[y];
-                    y++;
-                } else {
-                    existError = true;
-                    break;
-                }
-            }
-        }
-        if (existError) {
-            IE_THROW(Unexpected) << "[cpu]unsqueeze: the shape of input data conflicts with the unsqueeze pattern";
-        }
-        return {{std::move(outputShape)}, ShapeInferStatus::success};
-    }
-    port_mask_t get_port_mask() const override {
-        return PortMask(1);
-    }
-};
-
-class ReshapeShapeInferFactory : public ShapeInferFactory {
-public:
-    ReshapeShapeInferFactory(std::shared_ptr<ov::Node> op) : m_op(op) {}
-    ShapeInferPtr makeShapeInfer() const override {
-        if (const auto reshapeOp = ov::as_type_ptr<const ov::op::v1::Reshape>(m_op)) {
-            return std::make_shared<ReshapeShapeInfer>(reshapeOp->get_special_zero());
-        } else if (ov::is_type<ov::op::v0::Squeeze>(m_op)) {
-            return std::make_shared<SqueezeShapeInfer>();
-        } else if (ov::is_type<ov::op::v0::Unsqueeze>(m_op)) {
-            return std::make_shared<UnsqueezeShapeInfer>();
-        } else {
-            IE_THROW(Unexpected) << "[cpu]reshape: " << m_op->get_type_name() << "is not implemented";
-        }
-    }
-private:
-    std::shared_ptr<ov::Node> m_op;
-};
-} // namespace
-
 Reshape::Reshape(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context) :
-        Node(op, context, ReshapeShapeInferFactory(op)) {
+        Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) {
     std::string errorMessage;
     if (!isSupportedOperation(op, errorMessage)) {
         IE_THROW(NotImplemented) << errorMessage;
diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
index 775579fd0ed5e3..8faeb80f634c66 100644
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shape_ops.cpp
@@ -45,7 +45,6 @@ using shapeOpsParams = std::tuple<
     ngraph::helpers::InputLayerType,   // second input type
     shapeNodeType,                     // node type
     Precision,                         // precision
-    ngraph::element::Type_t,           // second input precision
     bool>;                             // special zero
 
 class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
@@ -56,8 +55,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         shapeNodeType nodeType;
         Precision prc;
         bool specialZero;
-        element::Type_t tmpSecondInPrc;
-        std::tie(inpDesc, secondType, nodeType, prc, tmpSecondInPrc, specialZero) = obj.param;
+        std::tie(inpDesc, secondType, nodeType, prc, specialZero) = obj.param;
 
         std::ostringstream result;
         result << nodeType << "_";
@@ -74,7 +72,6 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         }
         result << "PRC=" << prc << "_";
         result << "specialZero=" << specialZero;
-        result << "_secondInPrc=" << tmpSecondInPrc;
 
         return result.str();
     }
@@ -87,21 +84,10 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
             const auto& funcInput = funcInputs[i];
             ov::runtime::Tensor tensor;
             if (i == 1) {
-#define RESHAPE_TEST_CASE(INT_TYPE) \
-        case ov::element::Type_t::INT_TYPE: { \
-                    tensor = ov::runtime::Tensor{ov::element::INT_TYPE, targetInputStaticShapes[i]}; \
-                    auto inputData = tensor.data<ov::element_type_traits<ov::element::INT_TYPE>::value_type>(); \
-                    for (size_t j = 0lu; j < data[idx].size(); ++j) { \
-                            inputData[j] =  data[idx][j]; \
-                    } \
-                    break; \
-             }
-                switch (secondInPrc) {
-                    RESHAPE_TEST_CASE(i64)
-                    RESHAPE_TEST_CASE(i32)
-                    default:
-                          FAIL() << "We shouldn't get here.";
-#undef RESHAPE_TEST_CASE
+                tensor = ov::runtime::Tensor{ov::element::i32, targetInputStaticShapes[i]};
+                auto inputData = tensor.data<ov::element_type_traits<ov::element::i32>::value_type>();
+                for (size_t j = 0lu; j < data[idx].size(); ++j) {
+                    inputData[j] =  data[idx][j];
                 }
             } else {
                 if (funcInput.get_element_type().is_real()) {
@@ -124,7 +110,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         shapeNodeType nodeType;
         Precision prc;
         bool specialZero;
-        std::tie(inpDesc, secondType, nodeType, prc, secondInPrc, specialZero) = this->GetParam();
+        std::tie(inpDesc, secondType, nodeType, prc, specialZero) = this->GetParam();
 
         selectedType = std::string("unknown_") + prc.name();
 
@@ -137,6 +123,7 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
         init_input_shapes(inputShapes);
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
+        const auto secondInPrc = ngraph::element::Type_t::i32;
         auto inputs = ngraph::builder::makeDynamicParams(ngPrc, {inputDynamicShapes.front()});
         auto dataInput = inputs.front();
         dataInput->set_friendly_name("param_1");
@@ -171,7 +158,6 @@ class ShapeOpsCPUTest : public testing::WithParamInterface<shapeOpsParams>, virt
 private:
     std::vector<std::vector<int>> data;
     size_t idx;
-    element::Type_t secondInPrc;
 };
 
 TEST_P(ShapeOpsCPUTest, CompareWithRefs) {
@@ -180,7 +166,6 @@ TEST_P(ShapeOpsCPUTest, CompareWithRefs) {
 }
 
 namespace reshapeTest {
-const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
 
 inputDescription noBounds{{{-1, -1, -1, -1},
                            {ngraph::Shape{2, 5, 7, 3}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{1, 2, 5, 5}}},
@@ -190,7 +175,6 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Reshape),
                                        ::testing::Values(Precision::FP32),
-                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic, ShapeOpsCPUTest, params, ShapeOpsCPUTest::getTestCaseName);
@@ -203,7 +187,6 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Reshape),
                                              ::testing::Values(Precision::FP32),
-                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);
@@ -216,7 +199,6 @@ const auto params_dynBatch = ::testing::Combine(::testing::Values(shape_dynBatch
                                                 ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                                 ::testing::Values(shapeNodeType::Reshape),
                                                 ::testing::Values(Precision::FP32),
-                                                ::testing::ValuesIn(secondInPrcs),
                                                 ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynBatch, ShapeOpsCPUTest, params_dynBatch, ShapeOpsCPUTest::getTestCaseName);
@@ -224,7 +206,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynBatch, ShapeOpsCPUTest, params
 } // namespace reshapeTest
 
 namespace squeezeTest {
-const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
+
 inputDescription noBounds{{{-1, -1, -1, -1, -1, -1},
                            {
                                 ngraph::Shape{2, 5, 1, 7, 3, 1},
@@ -238,7 +220,6 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Squeeze),
                                        ::testing::Values(Precision::FP32),
-                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 // at this momemnt squeze produce dynamic output rank, if second input is not constant
@@ -253,7 +234,6 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Squeeze),
                                              ::testing::Values(Precision::FP32),
-                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);
@@ -261,7 +241,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, p
 } // namespace squeezeTest
 
 namespace unsqueezeTest {
-const std::vector<ov::element::Type_t> secondInPrcs{ov::element::Type_t::i64, ov::element::Type_t::i32};
+
 inputDescription noBounds{{{-1, -1, -1, -1},
                            {ngraph::Shape{2, 5, 7, 3}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{10, 6, 10, 5}, ngraph::Shape{5, 1, 5}}},
                            {std::vector<int>{2, 5}, std::vector<int>{1, 2}, std::vector<int>{4, 5}, std::vector<int>{0, 1}}};
@@ -270,7 +250,6 @@ const auto params = ::testing::Combine(::testing::Values(noBounds),
                                        ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                                        ::testing::Values(shapeNodeType::Unsqueeze),
                                        ::testing::Values(Precision::FP32),
-                                       ::testing::ValuesIn(secondInPrcs),
                                        ::testing::Values(true));
 
 // at this momemnt unsqueze produce dynamic output rank, if second input is not constant
@@ -285,7 +264,6 @@ const auto params_const = ::testing::Combine(::testing::Values(noBounds_const),
                                              ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                                              ::testing::Values(shapeNodeType::Unsqueeze),
                                              ::testing::Values(Precision::FP32),
-                                             ::testing::ValuesIn(secondInPrcs),
                                              ::testing::Values(true));
 
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_dynamic_const, ShapeOpsCPUTest, params_const, ShapeOpsCPUTest::getTestCaseName);

From 4ade0e55337789694f7b24a0be0e539177ab560e Mon Sep 17 00:00:00 2001
From: Fang Xu <fang.xu@intel.com>
Date: Fri, 7 Apr 2023 12:58:56 +0530
Subject: [PATCH 288/296] fix wheel_blacklist_extension for macos (#16799)

---
 src/bindings/python/wheel/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py
index 3515af49c8fbc9..4e00f24586805f 100644
--- a/src/bindings/python/wheel/setup.py
+++ b/src/bindings/python/wheel/setup.py
@@ -309,7 +309,7 @@ def get_reallink(self, link_file):
     def generate_package(self, src_dirs):
         """Collect package data files from preinstalled dirs and put all runtime libraries to the subpackage."""
         # additional blacklist filter, just to fix cmake install issues
-        blacklist_patterns = ["^.*\\.lib$", "^.*\\.pdb$", "^.*_debug\\.dll$", "^.*_debug\\.dylib$", "^.*_debug\\.so\\.\\d*$", "^.*\\.la$"]
+        blacklist_patterns = ["^.*\\.lib$", "^.*\\.pdb$", "^.*_debug\\.dll$", "^.*_debug\\.\\d*\\.dylib$", "^.*_debug\\.so\\.\\d*$", "^.*\\.la$"]
         package_dir = os.path.join(get_package_dir(PY_INSTALL_CFG), WHEEL_LIBS_INSTALL_DIR)
 
         for src_dir in src_dirs:

From 1eb6ad20c382c6412d424a172e7a9d60b377f5b8 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Fri, 7 Apr 2023 12:45:49 +0400
Subject: [PATCH 289/296] Update open_model_zoo submodule (#16779)

Fix model serialize

Ticket 107646
---
 thirdparty/open_model_zoo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo
index ec394493f50366..8cf18447523e17 160000
--- a/thirdparty/open_model_zoo
+++ b/thirdparty/open_model_zoo
@@ -1 +1 @@
-Subproject commit ec394493f5036642f3f02045ebe8d00e56d82b7e
+Subproject commit 8cf18447523e17bc2daa55b5123e6bc352d0eb20

From 8b7e6878e88708c8c368137279877660710603b8 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Fri, 7 Apr 2023 12:16:23 +0200
Subject: [PATCH 290/296] [TF FE] Better support for named ports in tensorflow
 frontend (#16697)

* Fix in create_same_type_const_scalar; accurate updating type for parameter when inlining function call body

* Added Unique to the list of operations with named output ports (another MUSE fix)

* Draft: working version of extension with named ports in TF

* Merge fixes

* Refactor and productize POC

* Clean up

* Fix build

* Fix code style

* Fix lib so extension test

* Fix namespaces

* Remove usage of Any from CreatorFunction

* Fix build

* Fix arm build

* Apply review feedback

* Fix build after merge

* Apply suggestions from code review

---------

Co-authored-by: Sergey Lyalin <sergey.lyalin@intel.com>
---
 .../include/openvino/frontend/decoder.hpp     |  15 +-
 .../frontend/extension/conversion.hpp         |   9 +
 .../openvino/frontend/node_context.hpp        |  24 +
 .../tensorflow/extension/conversion.hpp       |   9 +-
 .../frontend/tensorflow/node_context.hpp      |  26 +-
 .../tensorflow/src/decoder_argdef.cpp         |  12 +-
 .../tensorflow/src/decoder_argdef.hpp         |   6 +-
 .../tensorflow/src/decoder_proto.cpp          |  45 +-
 .../tensorflow/src/decoder_proto.hpp          |  10 +-
 src/frontends/tensorflow/src/frontend.cpp     |  25 +-
 src/frontends/tensorflow/src/input_model.cpp  |  12 +-
 src/frontends/tensorflow/src/op_table.cpp     | 444 +++++++++---------
 .../tensorflow/src/translate_session.cpp      |  95 ++--
 src/frontends/tensorflow/tests/conversion.cpp |  12 +-
 .../tests/convert_tricky_models.cpp           |  34 +-
 .../tensorflow/tests/convert_unsupported.cpp  |   8 +-
 .../gen_scripts/split_in_function.py          |  30 ++
 .../models_pbtxt/split_in_function.pbtxt      | 229 +++++++++
 .../include/common_op_table.hpp               |  13 +-
 .../include/helper_ops/internal_operation.hpp |   9 +-
 .../tensorflow_common/include/utils.hpp       |   2 +-
 .../src/op/ctc_greedy_decoder.cpp             |   7 +-
 .../src/op/fused_batch_norm.cpp               |  10 +-
 .../tensorflow_common/src/op/top_k.cpp        |  11 +-
 .../tensorflow_common/src/op/unique.cpp       |   4 +-
 .../src/decoder_flatbuffer.cpp                |   8 +-
 .../tensorflow_lite/src/decoder_flatbuffer.h  |   5 +-
 .../tensorflow_lite/src/decoder_map.hpp       |  16 +-
 .../src/op/op_translation_utils.cpp           |  19 +-
 .../src/op/op_translation_utils.hpp           |   9 +-
 .../tensorflow_lite/src/op_table.cpp          |   2 +-
 src/frontends/tensorflow_lite/src/utils.cpp   |  19 +-
 src/frontends/tensorflow_lite/src/utils.hpp   |   3 +
 src/inference/src/core.cpp                    |   7 +-
 34 files changed, 777 insertions(+), 412 deletions(-)
 create mode 100644 src/frontends/tensorflow/tests/test_models/gen_scripts/split_in_function.py
 create mode 100644 src/frontends/tensorflow/tests/test_models/models_pbtxt/split_in_function.pbtxt

diff --git a/src/frontends/common/include/openvino/frontend/decoder.hpp b/src/frontends/common/include/openvino/frontend/decoder.hpp
index 02d82116595e31..d51f41844f6a1f 100644
--- a/src/frontends/common/include/openvino/frontend/decoder.hpp
+++ b/src/frontends/common/include/openvino/frontend/decoder.hpp
@@ -64,21 +64,12 @@ class FRONTEND_API DecoderBase {
     ///
     /// \param input_port_idx              Input port index by which data is consumed
     /// \param producer_name               A producer name
-    /// \return producer_output_port_index Output port index from which data is generated
-    virtual void get_input_node(size_t input_port_idx,
-                                std::string& producer_name,
-                                size_t& producer_output_port_index) const = 0;
-
-    /// \brief Get a producer name and its output port index
-    ///
-    /// \param input_port_idx              Input port index by which data is consumed
-    /// \param producer_name               A producer name
+    /// \param producer_output_port_name   Output port name if exists
     /// \param producer_output_port_index  Output port index from which data is generated
-    /// \param op_type_by_name             Map of operation name to their types
     virtual void get_input_node(size_t input_port_idx,
                                 std::string& producer_name,
-                                size_t& producer_output_port_index,
-                                const OpTypeByName& op_type_by_name) const = 0;
+                                std::string& producer_output_port_name,
+                                size_t& producer_output_port_index) const = 0;
 
     /// \brief Get operation type
     virtual const std::string& get_op_type() const = 0;
diff --git a/src/frontends/common/include/openvino/frontend/extension/conversion.hpp b/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
index c4aea65af2e565..92d66e485c6c36 100644
--- a/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
+++ b/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
@@ -37,6 +37,10 @@ class FRONTEND_API ConversionExtension : public ConversionExtensionBase {
         : ConversionExtensionBase(op_type),
           m_converter_named(converter) {}
 
+    ConversionExtension(const std::string& op_type, const CreatorFunctionNamedAndIndexed& converter)
+        : ConversionExtensionBase(op_type),
+          m_converter_named_and_indexed(converter) {}
+
     const CreatorFunction& get_converter() const {
         return m_converter;
     };
@@ -45,11 +49,16 @@ class FRONTEND_API ConversionExtension : public ConversionExtensionBase {
         return m_converter_named;
     };
 
+    const CreatorFunctionNamedAndIndexed& get_converter_named_and_indexed() const {
+        return m_converter_named_and_indexed;
+    };
+
     ~ConversionExtension() override;
 
 private:
     CreatorFunction m_converter;
     CreatorFunctionNamed m_converter_named;
+    CreatorFunctionNamedAndIndexed m_converter_named_and_indexed;
 };
 
 }  // namespace frontend
diff --git a/src/frontends/common/include/openvino/frontend/node_context.hpp b/src/frontends/common/include/openvino/frontend/node_context.hpp
index fabf70146a81b4..0f57db65bb1f47 100644
--- a/src/frontends/common/include/openvino/frontend/node_context.hpp
+++ b/src/frontends/common/include/openvino/frontend/node_context.hpp
@@ -117,8 +117,32 @@ class FRONTEND_API NodeContext {
     std::string m_op_type;
 };
 
+struct NamedOutput {
+    NamedOutput(const Output<Node>& _port) : port(_port) {}
+    NamedOutput(const std::string& _name, const Output<Node>& _port) : name(_name), port(_port) {}
+
+    std::string name;
+    Output<Node> port;
+};
+
+using NamedOutputVector = std::vector<NamedOutput>;
+
+inline OutputVector indexed_from_named(const NamedOutputVector& outputs) {
+    OutputVector result;
+    result.reserve(outputs.size());
+    std::transform(outputs.begin(), outputs.end(), std::back_inserter(result), [](const NamedOutput& x) {
+        return x.port;
+    });
+    return result;
+}
+
+inline NamedOutputVector named_from_indexed(const OutputVector& outputs) {
+    return NamedOutputVector(outputs.begin(), outputs.end());
+}
+
 using CreatorFunction = std::function<OutputVector(const NodeContext&)>;
 using CreatorFunctionNamed = std::function<std::map<std::string, OutputVector>(const NodeContext&)>;
+using CreatorFunctionNamedAndIndexed = std::function<NamedOutputVector(const NodeContext&)>;
 
 }  // namespace frontend
 }  // namespace ov
diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp
index e27516b60cb1be..d17cc5083e756d 100644
--- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp
+++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp
@@ -23,14 +23,19 @@ class TENSORFLOW_API ConversionExtension : public ConversionExtensionBase {
         : ConversionExtensionBase(op_type),
           m_converter(converter) {}
 
-    const ov::frontend::CreatorFunction& get_converter() const {
+    ConversionExtension(const std::string& op_type,
+                        const ov::frontend::tensorflow::CreatorFunctionNamedAndIndexed& converter)
+        : ConversionExtensionBase(op_type),
+          m_converter(converter) {}
+
+    const ov::frontend::tensorflow::CreatorFunction& get_converter() const {
         return m_converter;
     }
 
     ~ConversionExtension() override;
 
 private:
-    ov::frontend::CreatorFunction m_converter;
+    ov::frontend::tensorflow::CreatorFunction m_converter;
 };
 
 }  // namespace tensorflow
diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp
index 1d002270cf7d8c..fb26992d753c60 100644
--- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp
+++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp
@@ -68,7 +68,31 @@ class NodeContext : public ov::frontend::NodeContext {
     const OutputVector& m_inputs;
 };
 
-using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::tensorflow::NodeContext&)>;
+using CreatorFunctionIndexed = std::function<ov::OutputVector(const ov::frontend::tensorflow::NodeContext&)>;
+using CreatorFunctionNamedAndIndexed = std::function<NamedOutputVector(const ov::frontend::tensorflow::NodeContext&)>;
+
+class CreatorFunction {
+public:
+    CreatorFunction() = default;
+    CreatorFunction(CreatorFunctionIndexed _func) : func_indexed(_func) {}
+    CreatorFunction(CreatorFunctionNamedAndIndexed _func) : func_named_and_indexed(_func) {}
+
+    NamedOutputVector operator()(const ov::frontend::tensorflow::NodeContext& node) const {
+        if (func_indexed) {
+            auto outputs = func_indexed(node);
+            return NamedOutputVector(outputs.begin(), outputs.end());
+        } else if (func_named_and_indexed) {
+            return func_named_and_indexed(node);
+        } else {
+            FRONT_END_GENERAL_CHECK(false, "No conversion function exist in this CreatorFunction");
+        }
+    }
+
+private:
+    CreatorFunctionIndexed func_indexed = nullptr;
+    CreatorFunctionNamedAndIndexed func_named_and_indexed = nullptr;
+};
+
 using TranslatorDictionaryType = std::map<std::string, CreatorFunction>;
 
 }  // namespace tensorflow
diff --git a/src/frontends/tensorflow/src/decoder_argdef.cpp b/src/frontends/tensorflow/src/decoder_argdef.cpp
index 4f59f851dff980..cf3cb3379b1ab4 100644
--- a/src/frontends/tensorflow/src/decoder_argdef.cpp
+++ b/src/frontends/tensorflow/src/decoder_argdef.cpp
@@ -32,22 +32,14 @@ const std::string& DecoderArgDef::get_op_name() const {
 
 void DecoderArgDef::get_input_node(size_t input_port_idx,
                                    std::string& producer_name,
+                                   std::string& producer_output_port_name,
                                    size_t& producer_output_port_index) const {
     // Body graph nodes may have two colons `:`, for example,
     // producer_name:z:2 means that producer operation name is `producer_name`
     // and output port is 2
     FRONT_END_GENERAL_CHECK(m_op_type == "output_arg",
                             "[TensorFlow Frontend] Internal error: get_input_node is supported only for output_arg.");
-    parse_producer_name(m_producer_name, producer_name, producer_output_port_index, {});
-}
-
-void DecoderArgDef::get_input_node(size_t input_port_idx,
-                                   std::string& producer_name,
-                                   size_t& producer_output_port_index,
-                                   const OpTypeByName& op_type_by_name) const {
-    FRONT_END_GENERAL_CHECK(m_op_type == "output_arg",
-                            "[TensorFlow Frontend] Internal error: get_input_node is supported only for output_arg.");
-    parse_producer_name(m_producer_name, producer_name, producer_output_port_index, op_type_by_name);
+    parse_producer_name(m_producer_name, producer_name, producer_output_port_name, producer_output_port_index);
 }
 
 ov::Any DecoderArgDef::get_attribute(const std::string& name) const {
diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp
index a188a8a5cb8890..dfee9b21e1481c 100644
--- a/src/frontends/tensorflow/src/decoder_argdef.hpp
+++ b/src/frontends/tensorflow/src/decoder_argdef.hpp
@@ -47,13 +47,9 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase {
 
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override;
 
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override;
-
     const std::string& get_op_type() const override;
 
     const std::string& get_op_name() const override;
diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp
index 188fc5d3d30853..19e758e5c53f73 100644
--- a/src/frontends/tensorflow/src/decoder_proto.cpp
+++ b/src/frontends/tensorflow/src/decoder_proto.cpp
@@ -308,32 +308,8 @@ size_t DecoderProto::get_input_size() const {
 
 void parse_producer_name(const std::string& producer_port_name,
                          std::string& producer_name,
-                         size_t& producer_output_port_index,
-                         const DecoderBase::OpTypeByName& op_type_by_name) {
-    using OutputPortIdxMax = std::unordered_map<std::string, int>;
-    // create a table of operation type and its output ports
-    // for which we specify output port indices manually
-    // it is mainly affects multiple output operations
-    // extract this information from tensorflow/core/ops/*.cc files
-    const OutputPortIdxMax output_port_idx_map = {
-        {"TopK:indices", 1},
-        {"TopKV2:indices", 1},
-        {"CTCGreedyDecoder:decoded_values", 1},
-        {"CTCGreedyDecoder:decoded_shape", 2},
-        {"CTCGreedyDecoder:log_probability", 3},
-        {"CTCGreedyDecoder:log_probability", 3},
-        {"FusedBatchNorm:batch_mean", 1},
-        {"FusedBatchNorm:batch_variance", 2},
-        {"FusedBatchNormV2:batch_mean", 1},
-        {"FusedBatchNormV2:batch_variance", 2},
-        {"FusedBatchNormV3:batch_mean", 1},
-        {"FusedBatchNormV3:batch_variance", 2},
-        {"Unique:y", 0},
-        {"Unique:idx", 1},
-        {"RaggedTensorToSparse:sparse_indices", 0},
-        {"RaggedTensorToSparse:sparse_values", 1},
-        {"RaggedTensorToSparse:sparse_dense_shape", 2},
-    };
+                         std::string& producer_output_port_name,
+                         size_t& producer_output_port_index) {
     // Body graph nodes may have two colons `:` input names, for example,
     // `TopKV2Name:indices:0` means that producer operation name is `TopKV2Name`
     // the middle name is output port name of the producer `indices` that means
@@ -350,11 +326,7 @@ void parse_producer_name(const std::string& producer_port_name,
                                 "Port id is not specified or not a number. Value: ",
                                 port_id);
         producer_output_port_index = std::stoi(port_id);
-        auto producer_op_type =
-            (op_type_by_name.count(producer_name) > 0) ? op_type_by_name.at(producer_name) : "Unknown";
-        auto producer_key = producer_op_type + ":" + port_name;
-        producer_output_port_index = output_port_idx_map.count(producer_key) > 0 ? output_port_idx_map.at(producer_key)
-                                                                                 : producer_output_port_index;
+        producer_output_port_name = port_name;
         return;
     } else if (first_colon != std::string::npos) {
         // just one colon case
@@ -372,17 +344,10 @@ void parse_producer_name(const std::string& producer_port_name,
 
 void DecoderProto::get_input_node(size_t input_port_idx,
                                   std::string& producer_name,
+                                  std::string& producer_output_port_name,
                                   size_t& producer_output_port_index) const {
     const std::string producer_port_name = m_node_def->input(static_cast<int>(input_port_idx));
-    parse_producer_name(producer_port_name, producer_name, producer_output_port_index, {});
-}
-
-void DecoderProto::get_input_node(size_t input_port_idx,
-                                  std::string& producer_name,
-                                  size_t& producer_output_port_index,
-                                  const OpTypeByName& op_type_by_name) const {
-    const std::string producer_port_name = m_node_def->input(static_cast<int>(input_port_idx));
-    parse_producer_name(producer_port_name, producer_name, producer_output_port_index, op_type_by_name);
+    parse_producer_name(producer_port_name, producer_name, producer_output_port_name, producer_output_port_index);
 }
 
 const std::string& DecoderProto::get_op_type() const {
diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp
index 570fd7e7eebda5..6d1bdbd11eebd6 100644
--- a/src/frontends/tensorflow/src/decoder_proto.hpp
+++ b/src/frontends/tensorflow/src/decoder_proto.hpp
@@ -26,8 +26,8 @@ ov::element::Type get_ov_type(const ::tensorflow::DataType& type);
 
 void parse_producer_name(const std::string& producer_port_name,
                          std::string& producer_name,
-                         size_t& producer_output_port_index,
-                         const DecoderBase::OpTypeByName& op_type_by_name);
+                         std::string& producer_output_port_name,
+                         size_t& producer_output_port_index);
 
 class DecoderProto : public ov::frontend::tensorflow::DecoderBase {
 public:
@@ -50,13 +50,9 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase {
 
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override;
 
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override;
-
     const std::string& get_op_type() const override;
 
     const std::string& get_op_name() const override;
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index 0fd32f7d77b8f2..6a6d9cd85c007d 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -76,7 +76,7 @@ void translate_framework_node(const std::shared_ptr<FrameworkNode>& node,
     auto old_output = old_outputs.begin();
 
     for (; new_output != new_node_outputs.end() && old_output != old_outputs.end(); ++old_output, ++new_output) {
-        old_output->replace(*new_output);
+        old_output->replace(new_output->port);
     }
 }
 }  // namespace
@@ -343,13 +343,22 @@ void FrontEnd::add_extension(const std::shared_ptr<ov::Extension>& extension) {
         m_extensions.push_back(so_ext);
     } else if (auto common_conv_ext = std::dynamic_pointer_cast<ov::frontend::ConversionExtension>(extension)) {
         m_conversion_extensions.push_back(common_conv_ext);
-        m_op_translators[common_conv_ext->get_op_type()] = [=](const NodeContext& context) {
-            return common_conv_ext->get_converter()(context);
-        };
-    } else if (const auto& tensorflow_conv_ext = std::dynamic_pointer_cast<ConversionExtension>(extension)) {
+        if (common_conv_ext->get_converter()) {
+            m_op_translators[common_conv_ext->get_op_type()] =
+                ov::frontend::tensorflow::CreatorFunctionIndexed([=](const tensorflow::NodeContext& context) {
+                    return common_conv_ext->get_converter()(context);
+                });
+        } else if (common_conv_ext->get_converter_named_and_indexed()) {
+            m_op_translators[common_conv_ext->get_op_type()] =
+                ov::frontend::tensorflow::CreatorFunctionNamedAndIndexed([=](const tensorflow::NodeContext& context) {
+                    return common_conv_ext->get_converter_named_and_indexed()(context);
+                });
+        }
+        // Ignore other types of extensions in particular CreatorFunctionNamed which cannot be used with tensorflow
+        // frontend
+    } else if (const auto& tensorflow_conv_ext =
+                   std::dynamic_pointer_cast<ov::frontend::tensorflow::ConversionExtension>(extension)) {
         m_conversion_extensions.push_back(tensorflow_conv_ext);
-        m_op_translators[tensorflow_conv_ext->get_op_type()] = [=](const NodeContext& context) {
-            return tensorflow_conv_ext->get_converter()(context);
-        };
+        m_op_translators[tensorflow_conv_ext->get_op_type()] = tensorflow_conv_ext->get_converter();
     }
 }
diff --git a/src/frontends/tensorflow/src/input_model.cpp b/src/frontends/tensorflow/src/input_model.cpp
index 904e5f6071fc8e..251dbc4d43beb7 100644
--- a/src/frontends/tensorflow/src/input_model.cpp
+++ b/src/frontends/tensorflow/src/input_model.cpp
@@ -176,9 +176,13 @@ void InputModel::InputModelTFImpl::load_places() {
         }
         for (size_t input_port_idx = 0; input_port_idx < node_decoder->get_input_size(); ++input_port_idx) {
             std::string producer_op_name;
+            std::string producer_output_port_name;
             size_t producer_output_port_idx;
             try {
-                node_decoder->get_input_node(input_port_idx, producer_op_name, producer_output_port_idx);
+                node_decoder->get_input_node(input_port_idx,
+                                             producer_op_name,
+                                             producer_output_port_name,
+                                             producer_output_port_idx);
                 op_names_with_consumers.insert(producer_op_name);
             } catch (const std::exception&) {
                 FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) +
@@ -276,9 +280,13 @@ std::vector<std::shared_ptr<OpPlace>> InputModel::InputModelTFImpl::topologicall
 
             for (size_t input_port_idx = 0; input_port_idx < input_count; ++input_port_idx) {
                 std::string producer_name;
+                std::string producer_output_port_name;
                 size_t producer_output_port_idx;
                 try {
-                    current_operation_decoder->get_input_node(input_port_idx, producer_name, producer_output_port_idx);
+                    current_operation_decoder->get_input_node(input_port_idx,
+                                                              producer_name,
+                                                              producer_output_port_name,
+                                                              producer_output_port_idx);
                 } catch (const std::exception&) {
                     FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " +
                                     std::to_string(input_port_idx) + " for op '" +
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index 1fac101cc4ac25..63f05fffa2e9a2 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -46,236 +46,236 @@ TF_OP_CONVERTER(translate_while_op);
 const std::map<std::string, CreatorFunction> get_supported_ops() {
     return {
         // note: UnaryOp translator declaration for each op must to be added in unary_op.cpp file
-        {"Abs", translate_unary_op<opset8::Abs>},
-        {"Acos", translate_unary_op<opset8::Acos>},
-        {"Acosh", translate_unary_op<opset8::Acosh>},
-        {"Asin", translate_unary_op<opset8::Asin>},
-        {"Asinh", translate_unary_op<opset8::Asinh>},
-        {"Atan", translate_unary_op<opset8::Atan>},
-        {"Atanh", translate_unary_op<opset8::Atanh>},
-        {"Ceil", translate_unary_op<opset8::Ceiling>},
-        {"Cos", translate_unary_op<opset8::Cos>},
-        {"Cosh", translate_unary_op<opset8::Cosh>},
-        {"Erf", translate_unary_op<opset8::Erf>},
-        {"Exp", translate_unary_op<opset8::Exp>},
-        {"Floor", translate_unary_op<opset8::Floor>},
-        {"IsFinite", translate_unary_op<opset10::IsFinite>},
-        {"IsInf", translate_unary_op<opset10::IsInf>},
-        {"IsNan", translate_unary_op<opset10::IsNaN>},
-        {"Log", translate_unary_op<opset8::Log>},
-        {"LogicalNot", translate_unary_op<opset8::LogicalNot>},
-        {"Mish", translate_unary_op<opset8::Mish>},
-        {"Neg", translate_unary_op<opset8::Negative>},
-        {"Relu", translate_unary_op<opset8::Relu>},
-        {"Sigmoid", translate_unary_op<opset8::Sigmoid>},
-        {"Sin", translate_unary_op<opset8::Sin>},
-        {"Sinh", translate_unary_op<opset8::Sinh>},
-        {"Sign", translate_unary_op<opset8::Sign>},
-        {"Softplus", translate_unary_op<opset8::SoftPlus>},
-        {"Softsign", translate_unary_op<opset9::SoftSign>},
-        {"Tan", translate_unary_op<opset8::Tan>},
-        {"Tanh", translate_unary_op<opset8::Tanh>},
-        {"Swish", translate_unary_op<opset8::Swish>},
+        {"Abs", CreatorFunction(translate_unary_op<opset8::Abs>)},
+        {"Acos", CreatorFunction(translate_unary_op<opset8::Acos>)},
+        {"Acosh", CreatorFunction(translate_unary_op<opset8::Acosh>)},
+        {"Asin", CreatorFunction(translate_unary_op<opset8::Asin>)},
+        {"Asinh", CreatorFunction(translate_unary_op<opset8::Asinh>)},
+        {"Atan", CreatorFunction(translate_unary_op<opset8::Atan>)},
+        {"Atanh", CreatorFunction(translate_unary_op<opset8::Atanh>)},
+        {"Ceil", CreatorFunction(translate_unary_op<opset8::Ceiling>)},
+        {"Cos", CreatorFunction(translate_unary_op<opset8::Cos>)},
+        {"Cosh", CreatorFunction(translate_unary_op<opset8::Cosh>)},
+        {"Erf", CreatorFunction(translate_unary_op<opset8::Erf>)},
+        {"Exp", CreatorFunction(translate_unary_op<opset8::Exp>)},
+        {"Floor", CreatorFunction(translate_unary_op<opset8::Floor>)},
+        {"IsFinite", CreatorFunction(translate_unary_op<opset10::IsFinite>)},
+        {"IsInf", CreatorFunction(translate_unary_op<opset10::IsInf>)},
+        {"IsNan", CreatorFunction(translate_unary_op<opset10::IsNaN>)},
+        {"Log", CreatorFunction(translate_unary_op<opset8::Log>)},
+        {"LogicalNot", CreatorFunction(translate_unary_op<opset8::LogicalNot>)},
+        {"Mish", CreatorFunction(translate_unary_op<opset8::Mish>)},
+        {"Neg", CreatorFunction(translate_unary_op<opset8::Negative>)},
+        {"Relu", CreatorFunction(translate_unary_op<opset8::Relu>)},
+        {"Sigmoid", CreatorFunction(translate_unary_op<opset8::Sigmoid>)},
+        {"Sin", CreatorFunction(translate_unary_op<opset8::Sin>)},
+        {"Sinh", CreatorFunction(translate_unary_op<opset8::Sinh>)},
+        {"Sign", CreatorFunction(translate_unary_op<opset8::Sign>)},
+        {"Softplus", CreatorFunction(translate_unary_op<opset8::SoftPlus>)},
+        {"Softsign", CreatorFunction(translate_unary_op<opset9::SoftSign>)},
+        {"Tan", CreatorFunction(translate_unary_op<opset8::Tan>)},
+        {"Tanh", CreatorFunction(translate_unary_op<opset8::Tanh>)},
+        {"Swish", CreatorFunction(translate_unary_op<opset8::Swish>)},
 
         // note: BinaryOp translator declaration for each op must to be added in binary_op.cpp file
-        {"Add", translate_binary_op<opset8::Add>},
-        {"AddV2", translate_binary_op<opset8::Add>},
-        {"Equal", translate_binary_op<opset8::Equal>},
-        {"FloorMod", translate_binary_op<opset8::FloorMod>},
-        {"Greater", translate_binary_op<opset8::Greater>},
-        {"GreaterEqual", translate_binary_op<opset8::GreaterEqual>},
-        {"Less", translate_binary_op<opset8::Less>},
-        {"LessEqual", translate_binary_op<opset8::LessEqual>},
-        {"LogicalAnd", translate_binary_op<opset8::LogicalAnd>},
-        {"LogicalOr", translate_binary_op<opset8::LogicalOr>},
-        {"LogicalXor", translate_binary_op<opset8::LogicalXor>},
-        {"Maximum", translate_binary_op<opset8::Maximum>},
-        {"Minimum", translate_binary_op<opset8::Minimum>},
-        {"Mul", translate_binary_op<opset8::Multiply>},
-        {"Mod", translate_binary_op<opset8::Mod>},
-        {"NotEqual", translate_binary_op<opset8::NotEqual>},
-        {"Pow", translate_binary_op<opset8::Power>},
-        {"RealDiv", translate_binary_op<opset8::Divide>},
-        {"SquaredDifference", translate_binary_op<opset8::SquaredDifference>},
-        {"Sub", translate_binary_op<opset8::Subtract>},
+        {"Add", CreatorFunction(translate_binary_op<opset8::Add>)},
+        {"AddV2", CreatorFunction(translate_binary_op<opset8::Add>)},
+        {"Equal", CreatorFunction(translate_binary_op<opset8::Equal>)},
+        {"FloorMod", CreatorFunction(translate_binary_op<opset8::FloorMod>)},
+        {"Greater", CreatorFunction(translate_binary_op<opset8::Greater>)},
+        {"GreaterEqual", CreatorFunction(translate_binary_op<opset8::GreaterEqual>)},
+        {"Less", CreatorFunction(translate_binary_op<opset8::Less>)},
+        {"LessEqual", CreatorFunction(translate_binary_op<opset8::LessEqual>)},
+        {"LogicalAnd", CreatorFunction(translate_binary_op<opset8::LogicalAnd>)},
+        {"LogicalOr", CreatorFunction(translate_binary_op<opset8::LogicalOr>)},
+        {"LogicalXor", CreatorFunction(translate_binary_op<opset8::LogicalXor>)},
+        {"Maximum", CreatorFunction(translate_binary_op<opset8::Maximum>)},
+        {"Minimum", CreatorFunction(translate_binary_op<opset8::Minimum>)},
+        {"Mul", CreatorFunction(translate_binary_op<opset8::Multiply>)},
+        {"Mod", CreatorFunction(translate_binary_op<opset8::Mod>)},
+        {"NotEqual", CreatorFunction(translate_binary_op<opset8::NotEqual>)},
+        {"Pow", CreatorFunction(translate_binary_op<opset8::Power>)},
+        {"RealDiv", CreatorFunction(translate_binary_op<opset8::Divide>)},
+        {"SquaredDifference", CreatorFunction(translate_binary_op<opset8::SquaredDifference>)},
+        {"Sub", CreatorFunction(translate_binary_op<opset8::Subtract>)},
 
         // note: ReduceOp translator declaration for each op must to be added in reduce.cpp file
-        {"Any", translate_direct_reduce_op<opset8::ReduceLogicalOr>},
-        {"All", translate_direct_reduce_op<opset8::ReduceLogicalAnd>},
-        {"EuclideanNorm", translate_direct_reduce_op<opset8::ReduceL2>},
-        {"Max", translate_direct_reduce_op<opset8::ReduceMax>},
-        {"Mean", translate_direct_reduce_op<opset8::ReduceMean>},
-        {"Min", translate_direct_reduce_op<opset8::ReduceMin>},
-        {"Prod", translate_direct_reduce_op<opset8::ReduceProd>},
-        {"Sum", translate_direct_reduce_op<opset8::ReduceSum>},
+        {"Any", CreatorFunction(translate_direct_reduce_op<opset8::ReduceLogicalOr>)},
+        {"All", CreatorFunction(translate_direct_reduce_op<opset8::ReduceLogicalAnd>)},
+        {"EuclideanNorm", CreatorFunction(translate_direct_reduce_op<opset8::ReduceL2>)},
+        {"Max", CreatorFunction(translate_direct_reduce_op<opset8::ReduceMax>)},
+        {"Mean", CreatorFunction(translate_direct_reduce_op<opset8::ReduceMean>)},
+        {"Min", CreatorFunction(translate_direct_reduce_op<opset8::ReduceMin>)},
+        {"Prod", CreatorFunction(translate_direct_reduce_op<opset8::ReduceProd>)},
+        {"Sum", CreatorFunction(translate_direct_reduce_op<opset8::ReduceSum>)},
 
         // Separate translators:
-        {"AddN", translate_add_n_op},
-        {"ArgMax", translate_arg_max_op},
-        {"ArgMin", translate_arg_min_op},
-        {"Assert", translate_no_op},
-        {"AvgPool", translate_avg_pool_op},
-        {"AvgPool3D", translate_avg_pool_op},
-        {"BatchMatMul", translate_batch_mat_mul_op},
-        {"BatchMatMulV2", translate_batch_mat_mul_op},
-        {"BatchToSpaceND", translate_batch_to_space_nd_op},
-        {"BroadcastArgs", translate_broadcast_args_op},
-        {"BroadcastTo", translate_broadcast_to_op},
-        {"Bucketize", translate_bucketize_op},
-        {"BiasAdd", translate_bias_add_op},
-        {"Cast", translate_cast_op},
-        {"ClipByValue", translate_clip_by_value_op},
-        {"Concat", translate_concat_op},
-        {"ConcatV2", translate_concat_op},
-        {"Const", translate_const_op},
-        {"Conv2D", translate_conv_2d_op},
-        {"Conv2DBackpropInput", translate_conv_2d_backprop_input_op},
-        {"Conv3D", translate_conv_3d_op},
-        {"Conv3DBackpropInputV2", translate_conv_3d_backprop_input_v2_op},
-        {"CropAndResize", translate_crop_and_resize_op},
-        {"CTCGreedyDecoder", translate_ctc_greedy_decoder_op},
-        {"CTCLoss", translate_ctc_loss_op},
-        {"Cumsum", translate_cumsum_op},
-        {"DepthToSpace", translate_depth_to_space_op},
-        {"DepthwiseConv2dNative", translate_depthwise_conv_2d_native_op},
-        {"DynamicPartition", translate_dynamic_partition_op},
-        {"Einsum", translate_einsum_op},
-        {"Elu", translate_elu_op},
-        {"EmptyTensorList", translate_tensor_list_reserve_op},
-        {"ExpandDims", translate_expand_dims_op},
-        {"ExtractImagePatches", translate_extract_image_patches_op},
-        {"FakeQuantWithMinMaxVars", translate_fake_quant_op},
-        {"FakeQuantWithMinMaxVarsPerChannel", translate_fake_quant_op},
-        {"FIFOQueue", translate_fifo_queue_op},
-        {"FIFOQueueV2", translate_fifo_queue_op},
-        {"Fill", translate_fill_op},
-        {"FloorDiv", translate_floor_div_op},
-        {"FusedBatchNorm", translate_fused_batch_norm_op},
-        {"FusedBatchNormV2", translate_fused_batch_norm_op},
-        {"FusedBatchNormV3", translate_fused_batch_norm_op},
-        {"Gather", translate_gather_op},
-        {"GatherV2", translate_gather_v2_op},
-        {"GatherNd", translate_gather_nd_op},
-        {"HashTable", translate_hash_table_op},
-        {"HashTableV2", translate_hash_table_op},
-        {"Identity", translate_identity_op},
-        {"IdentityN", translate_identity_n_op},
-        {"If", translate_if_op},
-        {"input_arg", translate_input_arg_op},
-        {"Iterator", translate_iterator_op},
-        {"IteratorGetNext", translate_iterator_get_next_op},
-        {"IteratorV2", translate_iterator_op},
-        {"output_arg", translate_output_arg_op},
-        {"L2Loss", translate_l2_loss_op},
-        {"LeakyRelu", translate_leaky_relu_op},
-        {"LinSpace", translate_linspace_op},
-        {"ListDiff", translate_list_diff_op},
-        {"LogSoftmax", translate_log_softmax_op},
-        {"Log1p", translate_log_1p_op},
-        {"LookupTableInsert", translate_no_op},
-        {"LookupTableInsertV2", translate_no_op},
-        {"LRN", translate_lrn_op},
-        {"MatMul", translate_mat_mul_op},
-        {"MatrixDiag", translate_matrix_diag_op},
-        {"MaxPool", translate_max_pool_op},
-        {"MaxPoolV2", translate_max_pool_op},
-        {"MaxPool3D", translate_max_pool_op},
-        {"MirrorPad", translate_mirror_pad_op},
-        {"MutableHashTable", translate_hash_table_op},
-        {"MutableHashTableV2", translate_hash_table_op},
-        {"NonMaxSuppression", translate_non_max_suppression_op},
-        {"NonMaxSuppressionV2", translate_non_max_suppression_op},
-        {"NonMaxSuppressionV3", translate_non_max_suppression_op},
-        {"NonMaxSuppressionV4", translate_non_max_suppression_op},
-        {"NonMaxSuppressionV5", translate_non_max_suppression_op},
-        {"NoOp", translate_no_op},  // do nothing
-        {"OneHot", translate_one_hot_op},
-        {"OneShotIterator", translate_iterator_op},
-        {"Pack", translate_pack_op},
-        {"Pad", translate_pad_op},
-        {"PadV2", translate_padv2_op},
-        {"QueueDequeue", translate_queue_dequeue_op},
-        {"QueueDequeueV2", translate_queue_dequeue_op},
-        {"QueueDequeueUpTo", translate_queue_dequeue_many_op},
-        {"QueueDequeueUpToV2", translate_queue_dequeue_many_op},
-        {"QueueDequeueMany", translate_queue_dequeue_many_op},
-        {"DynamicStitch", translate_parallel_dynamic_stitch_op},
-        {"ParallelDynamicStitch", translate_parallel_dynamic_stitch_op},
-        {"PartitionedCall", translate_partitioned_call_op},
-        {"Placeholder", translate_placeholder_op},
-        {"PlaceholderWithDefault", translate_placeholder_with_default_op},
-        {"PreventGradient", translate_identity_op},
-        {"Range", translate_range_op},
-        {"Rank", translate_rank_op},
-        {"RandomUniform", translate_random_uniform_op},
-        {"RandomUniformInt", translate_random_uniform_int_op},
-        {"Reciprocal", translate_reciprocal_op},
-        {"Relu6", translate_relu_6_op},
-        {"Reshape", translate_reshape_op},
-        {"Reverse", translate_reverse_op},
-        {"ReverseSequence", translate_reverse_sequence_op},
-        {"ReverseV2", translate_reverse_v2_op},
-        {"ResizeBilinear", translate_interpolate_op},
-        {"ResizeNearestNeighbor", translate_interpolate_op},
-        {"ResourceGather", translate_resource_gather_op},
-        {"Roll", translate_roll_op},
-        {"Round", translate_round_op},
-        {"Rsqrt", translate_rsqrt_op},
-        {"SaveV2", translate_no_op},
-        {"ScatterNd", translate_scatter_nd_op},
-        {"SegmentSum", translate_segment_sum_op},
-        {"SparseToDense", translate_sparse_to_dense_op},
-        {"Select", translate_select_op},
-        {"SelectV2", translate_select_v2_op},
-        {"Shape", translate_shape_op},
-        {"Size", translate_size_op},
-        {"Slice", translate_slice_op},
-        {"Snapshot", translate_identity_op},
-        {"Softmax", translate_softmax_op},
-        {"SpaceToDepth", translate_space_to_depth_op},
-        {"SparseReshape", translate_sparse_reshape_op},
-        {"Split", translate_split_op},
-        {"SplitV", translate_split_v_op},
-        {"StopGradient", translate_identity_op},
-        {"Sqrt", translate_sqrt_op},
-        {"Square", translate_square_op},
-        {"Squeeze", translate_squeeze_op},
-        {"SpaceToBatchND", translate_space_to_batch_nd_op},
-        {"StatefulPartitionedCall", translate_partitioned_call_op},
-        {"StatelessIf", translate_if_op},
-        {"StatelessWhile", translate_while_op},
-        {"StridedSlice", translate_strided_slice_op},
-        {"TensorListFromTensor", translate_tensor_list_from_tensor_op},
-        {"TensorListGetItem", translate_tensor_list_get_item_op},
-        {"TensorListPushBack", translate_tensor_list_push_back_op},
-        {"TensorListSetItem", translate_tensor_list_set_item_op},
-        {"TensorListStack", translate_tensor_list_stack_op},
-        {"TensorListReserve", translate_tensor_list_reserve_op},
-        {"Tile", translate_tile_op},
-        {"TopK", translate_top_k_op},
-        {"TopKV2", translate_top_k_v2_op},
-        {"Transpose", translate_transpose_op},
-        {"ReadVariableOp", translate_readvariable_op},
-        {"AssignVariableOp", translate_assignvariable_op},
-        {"VarIsInitializedOp", translate_varisinitialized_op},
-        {"VarHandleOp", translate_varhandle_op},
-        {"RestoreV2", translate_restorev2_op},
-        {"StaticRegexFullMatch", translate_staticregexfullmatch_op},
-        {"StringJoin", translate_stringjoin_op},
-        {"ShardedFilename", translate_identity_op},
-        {"MergeV2Checkpoints", translate_identity_op},
-        {"Unpack", translate_unpack_op},
-        {"While", translate_while_op},
-        {"Where", translate_where_op},
-        {"Xdivy", translate_x_div_y_op},
-        {"ZerosLike", translate_zeros_like_op},
+        {"AddN", CreatorFunction(translate_add_n_op)},
+        {"ArgMax", CreatorFunction(translate_arg_max_op)},
+        {"ArgMin", CreatorFunction(translate_arg_min_op)},
+        {"Assert", CreatorFunction(translate_no_op)},
+        {"AvgPool", CreatorFunction(translate_avg_pool_op)},
+        {"AvgPool3D", CreatorFunction(translate_avg_pool_op)},
+        {"BatchMatMul", CreatorFunction(translate_batch_mat_mul_op)},
+        {"BatchMatMulV2", CreatorFunction(translate_batch_mat_mul_op)},
+        {"BatchToSpaceND", CreatorFunction(translate_batch_to_space_nd_op)},
+        {"BroadcastArgs", CreatorFunction(translate_broadcast_args_op)},
+        {"BroadcastTo", CreatorFunction(translate_broadcast_to_op)},
+        {"Bucketize", CreatorFunction(translate_bucketize_op)},
+        {"BiasAdd", CreatorFunction(translate_bias_add_op)},
+        {"Cast", CreatorFunction(translate_cast_op)},
+        {"ClipByValue", CreatorFunction(translate_clip_by_value_op)},
+        {"Concat", CreatorFunction(translate_concat_op)},
+        {"ConcatV2", CreatorFunction(translate_concat_op)},
+        {"Const", CreatorFunction(translate_const_op)},
+        {"Conv2D", CreatorFunction(translate_conv_2d_op)},
+        {"Conv2DBackpropInput", CreatorFunction(translate_conv_2d_backprop_input_op)},
+        {"Conv3D", CreatorFunction(translate_conv_3d_op)},
+        {"Conv3DBackpropInputV2", CreatorFunction(translate_conv_3d_backprop_input_v2_op)},
+        {"CropAndResize", CreatorFunction(translate_crop_and_resize_op)},
+        {"CTCGreedyDecoder", CreatorFunction(translate_ctc_greedy_decoder_op)},
+        {"CTCLoss", CreatorFunction(translate_ctc_loss_op)},
+        {"Cumsum", CreatorFunction(translate_cumsum_op)},
+        {"DepthToSpace", CreatorFunction(translate_depth_to_space_op)},
+        {"DepthwiseConv2dNative", CreatorFunction(translate_depthwise_conv_2d_native_op)},
+        {"DynamicPartition", CreatorFunction(translate_dynamic_partition_op)},
+        {"Einsum", CreatorFunction(translate_einsum_op)},
+        {"Elu", CreatorFunction(translate_elu_op)},
+        {"EmptyTensorList", CreatorFunction(translate_tensor_list_reserve_op)},
+        {"ExpandDims", CreatorFunction(translate_expand_dims_op)},
+        {"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)},
+        {"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)},
+        {"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)},
+        {"FIFOQueue", CreatorFunction(translate_fifo_queue_op)},
+        {"FIFOQueueV2", CreatorFunction(translate_fifo_queue_op)},
+        {"Fill", CreatorFunction(translate_fill_op)},
+        {"FloorDiv", CreatorFunction(translate_floor_div_op)},
+        {"FusedBatchNorm", CreatorFunction(translate_fused_batch_norm_op)},
+        {"FusedBatchNormV2", CreatorFunction(translate_fused_batch_norm_op)},
+        {"FusedBatchNormV3", CreatorFunction(translate_fused_batch_norm_op)},
+        {"Gather", CreatorFunction(translate_gather_op)},
+        {"GatherV2", CreatorFunction(translate_gather_v2_op)},
+        {"GatherNd", CreatorFunction(translate_gather_nd_op)},
+        {"HashTable", CreatorFunction(translate_hash_table_op)},
+        {"HashTableV2", CreatorFunction(translate_hash_table_op)},
+        {"Identity", CreatorFunction(translate_identity_op)},
+        {"IdentityN", CreatorFunction(translate_identity_n_op)},
+        {"If", CreatorFunction(translate_if_op)},
+        {"input_arg", CreatorFunction(translate_input_arg_op)},
+        {"Iterator", CreatorFunction(translate_iterator_op)},
+        {"IteratorGetNext", CreatorFunction(translate_iterator_get_next_op)},
+        {"IteratorV2", CreatorFunction(translate_iterator_op)},
+        {"output_arg", CreatorFunction(translate_output_arg_op)},
+        {"L2Loss", CreatorFunction(translate_l2_loss_op)},
+        {"LeakyRelu", CreatorFunction(translate_leaky_relu_op)},
+        {"LinSpace", CreatorFunction(translate_linspace_op)},
+        {"ListDiff", CreatorFunction(translate_list_diff_op)},
+        {"LogSoftmax", CreatorFunction(translate_log_softmax_op)},
+        {"Log1p", CreatorFunction(translate_log_1p_op)},
+        {"LookupTableInsert", CreatorFunction(translate_no_op)},
+        {"LookupTableInsertV2", CreatorFunction(translate_no_op)},
+        {"LRN", CreatorFunction(translate_lrn_op)},
+        {"MatMul", CreatorFunction(translate_mat_mul_op)},
+        {"MatrixDiag", CreatorFunction(translate_matrix_diag_op)},
+        {"MaxPool", CreatorFunction(translate_max_pool_op)},
+        {"MaxPoolV2", CreatorFunction(translate_max_pool_op)},
+        {"MaxPool3D", CreatorFunction(translate_max_pool_op)},
+        {"MirrorPad", CreatorFunction(translate_mirror_pad_op)},
+        {"MutableHashTable", CreatorFunction(translate_hash_table_op)},
+        {"MutableHashTableV2", CreatorFunction(translate_hash_table_op)},
+        {"NonMaxSuppression", CreatorFunction(translate_non_max_suppression_op)},
+        {"NonMaxSuppressionV2", CreatorFunction(translate_non_max_suppression_op)},
+        {"NonMaxSuppressionV3", CreatorFunction(translate_non_max_suppression_op)},
+        {"NonMaxSuppressionV4", CreatorFunction(translate_non_max_suppression_op)},
+        {"NonMaxSuppressionV5", CreatorFunction(translate_non_max_suppression_op)},
+        {"NoOp", CreatorFunction(translate_no_op)},  // do nothing
+        {"OneHot", CreatorFunction(translate_one_hot_op)},
+        {"OneShotIterator", CreatorFunction(translate_iterator_op)},
+        {"Pack", CreatorFunction(translate_pack_op)},
+        {"Pad", CreatorFunction(translate_pad_op)},
+        {"PadV2", CreatorFunction(translate_padv2_op)},
+        {"QueueDequeue", CreatorFunction(translate_queue_dequeue_op)},
+        {"QueueDequeueV2", CreatorFunction(translate_queue_dequeue_op)},
+        {"QueueDequeueUpTo", CreatorFunction(translate_queue_dequeue_many_op)},
+        {"QueueDequeueUpToV2", CreatorFunction(translate_queue_dequeue_many_op)},
+        {"QueueDequeueMany", CreatorFunction(translate_queue_dequeue_many_op)},
+        {"DynamicStitch", CreatorFunction(translate_parallel_dynamic_stitch_op)},
+        {"ParallelDynamicStitch", CreatorFunction(translate_parallel_dynamic_stitch_op)},
+        {"PartitionedCall", CreatorFunction(translate_partitioned_call_op)},
+        {"Placeholder", CreatorFunction(translate_placeholder_op)},
+        {"PlaceholderWithDefault", CreatorFunction(translate_placeholder_with_default_op)},
+        {"PreventGradient", CreatorFunction(translate_identity_op)},
+        {"Range", CreatorFunction(translate_range_op)},
+        {"Rank", CreatorFunction(translate_rank_op)},
+        {"RandomUniform", CreatorFunction(translate_random_uniform_op)},
+        {"RandomUniformInt", CreatorFunction(translate_random_uniform_int_op)},
+        {"Reciprocal", CreatorFunction(translate_reciprocal_op)},
+        {"Relu6", CreatorFunction(translate_relu_6_op)},
+        {"Reshape", CreatorFunction(translate_reshape_op)},
+        {"Reverse", CreatorFunction(translate_reverse_op)},
+        {"ReverseSequence", CreatorFunction(translate_reverse_sequence_op)},
+        {"ReverseV2", CreatorFunction(translate_reverse_v2_op)},
+        {"ResizeBilinear", CreatorFunction(translate_interpolate_op)},
+        {"ResizeNearestNeighbor", CreatorFunction(translate_interpolate_op)},
+        {"ResourceGather", CreatorFunction(translate_resource_gather_op)},
+        {"Roll", CreatorFunction(translate_roll_op)},
+        {"Round", CreatorFunction(translate_round_op)},
+        {"Rsqrt", CreatorFunction(translate_rsqrt_op)},
+        {"SaveV2", CreatorFunction(translate_no_op)},
+        {"ScatterNd", CreatorFunction(translate_scatter_nd_op)},
+        {"SegmentSum", CreatorFunction(translate_segment_sum_op)},
+        {"SparseToDense", CreatorFunction(translate_sparse_to_dense_op)},
+        {"Select", CreatorFunction(translate_select_op)},
+        {"SelectV2", CreatorFunction(translate_select_v2_op)},
+        {"Shape", CreatorFunction(translate_shape_op)},
+        {"Size", CreatorFunction(translate_size_op)},
+        {"Slice", CreatorFunction(translate_slice_op)},
+        {"Snapshot", CreatorFunction(translate_identity_op)},
+        {"Softmax", CreatorFunction(translate_softmax_op)},
+        {"SpaceToDepth", CreatorFunction(translate_space_to_depth_op)},
+        {"SparseReshape", CreatorFunction(translate_sparse_reshape_op)},
+        {"Split", CreatorFunction(translate_split_op)},
+        {"SplitV", CreatorFunction(translate_split_v_op)},
+        {"StopGradient", CreatorFunction(translate_identity_op)},
+        {"Sqrt", CreatorFunction(translate_sqrt_op)},
+        {"Square", CreatorFunction(translate_square_op)},
+        {"Squeeze", CreatorFunction(translate_squeeze_op)},
+        {"SpaceToBatchND", CreatorFunction(translate_space_to_batch_nd_op)},
+        {"StatefulPartitionedCall", CreatorFunction(translate_partitioned_call_op)},
+        {"StatelessIf", CreatorFunction(translate_if_op)},
+        {"StatelessWhile", CreatorFunction(translate_while_op)},
+        {"StridedSlice", CreatorFunction(translate_strided_slice_op)},
+        {"TensorListFromTensor", CreatorFunction(translate_tensor_list_from_tensor_op)},
+        {"TensorListGetItem", CreatorFunction(translate_tensor_list_get_item_op)},
+        {"TensorListPushBack", CreatorFunction(translate_tensor_list_push_back_op)},
+        {"TensorListSetItem", CreatorFunction(translate_tensor_list_set_item_op)},
+        {"TensorListStack", CreatorFunction(translate_tensor_list_stack_op)},
+        {"TensorListReserve", CreatorFunction(translate_tensor_list_reserve_op)},
+        {"Tile", CreatorFunction(translate_tile_op)},
+        {"TopK", CreatorFunction(translate_top_k_op)},
+        {"TopKV2", CreatorFunction(translate_top_k_v2_op)},
+        {"Transpose", CreatorFunction(translate_transpose_op)},
+        {"ReadVariableOp", CreatorFunction(translate_readvariable_op)},
+        {"AssignVariableOp", CreatorFunction(translate_assignvariable_op)},
+        {"VarIsInitializedOp", CreatorFunction(translate_varisinitialized_op)},
+        {"VarHandleOp", CreatorFunction(translate_varhandle_op)},
+        {"RestoreV2", CreatorFunction(translate_restorev2_op)},
+        {"StaticRegexFullMatch", CreatorFunction(translate_staticregexfullmatch_op)},
+        {"StringJoin", CreatorFunction(translate_stringjoin_op)},
+        {"ShardedFilename", CreatorFunction(translate_identity_op)},
+        {"MergeV2Checkpoints", CreatorFunction(translate_identity_op)},
+        {"Unpack", CreatorFunction(translate_unpack_op)},
+        {"While", CreatorFunction(translate_while_op)},
+        {"Where", CreatorFunction(translate_where_op)},
+        {"Xdivy", CreatorFunction(translate_x_div_y_op)},
+        {"ZerosLike", CreatorFunction(translate_zeros_like_op)},
 
         // Translators for internal operations
-        {"BlockLSTM", translate_block_lstm_op},
-        {"GRUBlockCell", translate_gru_block_cell_op},
-        {"SparseFillEmptyRows", translate_sparse_fill_empty_rows_op},
-        {"SparseSegmentSum", translate_sparse_segment_sum_op},
-        {"Unique", translate_unique_op},
+        {"BlockLSTM", CreatorFunction(translate_block_lstm_op)},
+        {"GRUBlockCell", CreatorFunction(translate_gru_block_cell_op)},
+        {"SparseFillEmptyRows", CreatorFunction(translate_sparse_fill_empty_rows_op)},
+        {"SparseSegmentSum", CreatorFunction(translate_sparse_segment_sum_op)},
+        {"Unique", CreatorFunction(translate_unique_op)},
     };
 };
 }  // namespace op
diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp
index b06652a2979eb2..571a92f99676e3 100644
--- a/src/frontends/tensorflow/src/translate_session.cpp
+++ b/src/frontends/tensorflow/src/translate_session.cpp
@@ -71,6 +71,26 @@ ov::OutputVector create_fw_node_with_exception(const std::shared_ptr<DecoderBase
     set_node_name(operation_name, fw_node);
     return fw_node->outputs();
 }
+
+size_t get_flat_index_by_name_and_id(const ov::frontend::NamedOutputVector& outputs,
+                                     const std::string& name,
+                                     size_t idx) {
+    // Assume that if at least one output port has name, then all the ports should have names
+    if (!outputs.empty() && !outputs.front().name.empty()) {
+        // Producer has names in ports
+        auto it = std::find_if(outputs.begin(), outputs.end(), [&](const ov::frontend::NamedOutput& x) {
+            return name == x.name;
+        });
+        FRONT_END_GENERAL_CHECK(outputs.end() - it > ptrdiff_t(idx),
+                                "There is no output port specified by name and index");
+        FRONT_END_GENERAL_CHECK(it[idx].name == name,
+                                "There is no output port with specified index in a group with specified name");
+        return it - outputs.begin() + idx;
+    } else {
+        // There are no named ports in the producer node, so reference by name wouldn't work
+        return idx;
+    }
+}
 }  // namespace
 
 TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model,
@@ -119,7 +139,6 @@ void TranslateSession::inject_body_model(std::shared_ptr<ov::Model> body_model,
 
 void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& input_model,
                                        std::shared_ptr<ov::Model>& ov_model) {
-    DecoderBase::OpTypeByName op_type_by_name;
     OpMap ng_op_map;
     ov::ParameterVector params;
     ov::ResultVector results;
@@ -166,14 +185,13 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
             }
         }
         params.push_back(param);
-        ng_op_map[input_name] = {param};
+        ng_op_map[input_name] = {NamedOutput(param)};
     }
 
     // create the OV ops from TensorFlow ops
     for (const auto& operation_place : operation_places) {
         auto operation_decoder = operation_place->get_decoder();
         auto operation_name = operation_place->get_names()[0];
-        op_type_by_name[operation_name] = operation_decoder->get_op_type();
         // output for parameter nodes has been already generated
         if (ng_op_map.count(operation_name)) {
             continue;
@@ -195,7 +213,12 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
             std::string producer_name;
             size_t producer_port_idx;
             try {
-                operation_decoder->get_input_node(input_port_idx, producer_name, producer_port_idx, op_type_by_name);
+                std::string producer_port_name;
+                operation_decoder->get_input_node(input_port_idx, producer_name, producer_port_name, producer_port_idx);
+                if (!producer_port_name.empty()) {
+                    producer_port_idx =
+                        get_flat_index_by_name_and_id(ng_op_map[producer_name], producer_port_name, producer_port_idx);
+                }
             } catch (const std::exception&) {
                 FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) +
                                 " for op '" + operation_decoder->get_op_name() + "', expected input name: '" +
@@ -220,28 +243,28 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                 const auto& input_outputs_vector = ng_op_map.at(std::to_string(input_port_idx) + ":" + operation_name);
                 FRONT_END_GENERAL_CHECK(input_outputs_vector.size() == 1,
                                         "Input created with pruning must have one output");
-                ov_inputs.push_back(input_outputs_vector.at(0));
+                ov_inputs.push_back(input_outputs_vector.at(0).port);
             } else if (ng_op_map.count(producer_name + ":" + std::to_string(producer_port_idx))) {
                 const auto& input_outputs_vector =
                     ng_op_map.at(producer_name + ":" + std::to_string(producer_port_idx));
                 FRONT_END_GENERAL_CHECK(input_outputs_vector.size() == 1,
                                         "Input created with pruning must have one output");
-                ov_inputs.push_back(input_outputs_vector.at(0));
+                ov_inputs.push_back(input_outputs_vector.at(0).port);
             } else if (ng_op_map.count(producer_name)) {
                 const auto& input_outputs_vector = ng_op_map.at(producer_name);
                 if (input_outputs_vector.size() <= producer_port_idx) {
-                    auto producer_node = input_outputs_vector[0].get_node_shared_ptr();
+                    auto producer_node = input_outputs_vector[0].port.get_node_shared_ptr();
                     if (std::dynamic_pointer_cast<FrameworkNode>(producer_node)) {
                         // FrameworkNode node does not know in advance how many output ports will be used
                         // so we can increase number of outputs by demand
                         producer_node->set_output_type(producer_port_idx, element::dynamic, PartialShape::dynamic());
                         // update output vector in node map
-                        ng_op_map[producer_name] = producer_node->outputs();
+                        ng_op_map[producer_name] = named_from_indexed(producer_node->outputs());
                     }
                 }
                 FRONT_END_GENERAL_CHECK(input_outputs_vector.size() > producer_port_idx,
                                         "Input created with pruning must have one output");
-                ov_inputs.push_back(input_outputs_vector.at(producer_port_idx));
+                ov_inputs.push_back(input_outputs_vector.at(producer_port_idx).port);
             } else {
                 FRONT_END_GENERAL_CHECK(false,
                                         "No input is found for node \"" + operation_name + "\" by port " +
@@ -250,7 +273,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
         }
 
         // generate OV node output vector for the current operation node
-        ov::OutputVector ov_outputs;
+        NamedOutputVector ov_outputs;
         auto operation_type = operation_decoder->get_op_type();
         if (m_translator_map->count(operation_type)) {
             try {
@@ -259,26 +282,30 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                 ov_outputs = translator(node_context);
             } catch (const std::exception& ex) {
                 // save the root-cause of the translation failure
-                ov_outputs = create_fw_node_with_exception(operation_decoder,
-                                                           ov_inputs,
-                                                           operation_place->get_output_ports().size(),
-                                                           operation_name,
-                                                           ex.what());
+                const auto fw_outs = create_fw_node_with_exception(operation_decoder,
+                                                                   ov_inputs,
+                                                                   operation_place->get_output_ports().size(),
+                                                                   operation_name,
+                                                                   ex.what());
+                ov_outputs = named_from_indexed(fw_outs);
             } catch (...) {
                 // save unknown exception type
-                ov_outputs = create_fw_node_with_exception(operation_decoder,
-                                                           ov_inputs,
-                                                           operation_place->get_output_ports().size(),
-                                                           operation_name,
-                                                           "Unknown exception type");
+                const auto fw_outs = create_fw_node_with_exception(operation_decoder,
+                                                                   ov_inputs,
+                                                                   operation_place->get_output_ports().size(),
+                                                                   operation_name,
+                                                                   "Unknown exception type");
+                ov_outputs = named_from_indexed(fw_outs);
             }
         } else if (auto body_ov_model = get_body_ov_model(operation_type)) {
-            inject_body_model(body_ov_model, operation_type, ov_inputs, ov_outputs);
+            OutputVector indexed_ov_outputs;
+            inject_body_model(body_ov_model, operation_type, ov_inputs, indexed_ov_outputs);
 
             // set output tensor names
-            for (size_t idx = 0; idx < ov_outputs.size(); ++idx) {
-                ov_outputs[idx].get_tensor().set_names({operation_name + ":" + std::to_string(idx)});
+            for (size_t idx = 0; idx < indexed_ov_outputs.size(); ++idx) {
+                indexed_ov_outputs[idx].get_tensor().set_names({operation_name + ":" + std::to_string(idx)});
             }
+            ov_outputs = named_from_indexed(indexed_ov_outputs);
         } else {
             // continue translation by replacing with FrameworkNode
             // for example, it helps auto-pruning to be triggered on later nodes
@@ -286,16 +313,16 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                                                            ov_inputs,
                                                            operation_place->get_output_ports().size());
             set_node_name(operation_name, fw_node);
-            ov_outputs = fw_node->outputs();
+            ov_outputs = named_from_indexed(fw_node->outputs());
         }
 
         // register OV node outputs in the map for new operation node
         for (const auto& output : ov_outputs) {
-            if (auto result = as_type_ptr<ov::opset10::Result>(output.get_node_shared_ptr())) {
+            if (auto result = as_type_ptr<ov::opset10::Result>(output.port.get_node_shared_ptr())) {
                 // do not add RetVal type operation to ng_op_map
                 results.push_back(result);
             } else {
-                auto param = as_type_ptr<ov::opset8::Parameter>(output.get_node_shared_ptr());
+                auto param = as_type_ptr<ov::opset8::Parameter>(output.port.get_node_shared_ptr());
                 // avoid duplicating Parameter nodes if they are already in the Parameters vector
                 if (param && std::find(params.begin(), params.end(), param) == params.end()) {
                     params.push_back(param);
@@ -319,7 +346,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                                                                       port_type);
 
             if (port_type == "none") {
-                for (const auto& node_output : ng_op_map[operation_name]) {
+                for (const auto& node_output : indexed_from_named(ng_op_map[operation_name])) {
                     auto result_node = std::make_shared<ov::opset8::Result>(node_output);
                     // Customize output name in case we have mapping from Saved Model format
                     if (saved_model_outputs.get() && saved_model_outputs->size() > 0) {
@@ -347,7 +374,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                     }
                 }
             } else if (port_type == "out") {
-                const auto& node_outputs = ng_op_map[operation_name];
+                const auto& node_outputs = indexed_from_named(ng_op_map[operation_name]);
                 FRONT_END_GENERAL_CHECK(node_outputs.size() > port_index,
                                         "Output port with index " + std::to_string(port_index) + " of " +
                                             operation_name + "node specified as custom output does not exist");
@@ -368,9 +395,15 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
 
                 // get to know a producer node and by which its output port data is generated
                 std::string producer_name;
+                std::string producer_port_name;
                 size_t producer_port_idx;
                 try {
-                    operation_decoder->get_input_node(port_index, producer_name, producer_port_idx, op_type_by_name);
+                    operation_decoder->get_input_node(port_index, producer_name, producer_port_name, producer_port_idx);
+                    if (!producer_port_name.empty()) {
+                        producer_port_idx = get_flat_index_by_name_and_id(ng_op_map[producer_name],
+                                                                          producer_port_name,
+                                                                          producer_port_idx);
+                    }
                 } catch (const std::exception&) {
                     FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(port_index) +
                                     " for op '" + operation_decoder->get_op_name() + "', expected input name: '" +
@@ -379,7 +412,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
                 }
 
                 // add Result node for this producer output port
-                const auto& node_outputs = ng_op_map[producer_name];
+                const auto& node_outputs = indexed_from_named(ng_op_map[producer_name]);
                 FRONT_END_GENERAL_CHECK(node_outputs.size() > producer_port_idx,
                                         "Output port with index " + std::to_string(producer_port_idx) + " of " +
                                             producer_name + "node specified as custom output does not exist");
@@ -398,7 +431,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu
     if (results.empty()) {
         for (const auto& node_output_vector : ng_op_map) {
             for (size_t output_ind = 0; output_ind < node_output_vector.second.size(); ++output_ind) {
-                auto output = node_output_vector.second[output_ind];
+                auto output = node_output_vector.second[output_ind].port;
                 if (output.get_target_inputs().empty() &&
                     !std::dynamic_pointer_cast<ov::opset8::Result>(output.get_node_shared_ptr())) {
                     auto model_output_name =
diff --git a/src/frontends/tensorflow/tests/conversion.cpp b/src/frontends/tensorflow/tests/conversion.cpp
index ef719eb612bcc0..c7754e08275a9b 100644
--- a/src/frontends/tensorflow/tests/conversion.cpp
+++ b/src/frontends/tensorflow/tests/conversion.cpp
@@ -19,11 +19,13 @@ class TensorflowFrontendWrapper : public ov::frontend::tensorflow::FrontEnd {
         ov::frontend::tensorflow::FrontEnd::add_extension(extension);
 
         if (auto conv_ext = std::dynamic_pointer_cast<ConversionExtension>(extension)) {
-            EXPECT_NE(std::find(m_conversion_extensions.begin(), m_conversion_extensions.end(), conv_ext),
-                      m_conversion_extensions.end())
-                << "ConversionExtension is not registered.";
-            EXPECT_NE(m_op_translators.find(conv_ext->get_op_type()), m_op_translators.end())
-                << conv_ext->get_op_type() << " translator is not registered.";
+            if (conv_ext->get_converter() || conv_ext->get_converter_named_and_indexed()) {
+                EXPECT_NE(std::find(m_conversion_extensions.begin(), m_conversion_extensions.end(), conv_ext),
+                          m_conversion_extensions.end())
+                    << "ConversionExtension is not registered.";
+                EXPECT_NE(m_op_translators.find(conv_ext->get_op_type()), m_op_translators.end())
+                    << conv_ext->get_op_type() << " translator is not registered.";
+            }
         } else if (auto telemetry = std::dynamic_pointer_cast<TelemetryExtension>(extension)) {
             EXPECT_EQ(m_telemetry, telemetry) << "TelemetryExtension is not registered.";
         } else if (auto transformation = std::dynamic_pointer_cast<DecoderTransformationExtension>(extension)) {
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
index b1faca0554fd40..1566b2efd0e52b 100644
--- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp
+++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -43,7 +43,7 @@ shared_ptr<Model> convert_model(const string& model_path, const ConversionExtens
     return model;
 }
 
-ov::OutputVector fake_translator_ragged_tensor_to_sparse(const ov::frontend::NodeContext& node) {
+NamedOutputVector fake_translator_ragged_tensor_to_sparse(const NodeContext& node) {
     // NOTE: pay attention that this is a fake translator for RaggedTensorToSparse
     // only serves for testing purposes
     FRONT_END_GENERAL_CHECK(node.get_input_size() > 1, "RaggedTensorToSparse expects at least two inputs.");
@@ -70,7 +70,7 @@ ov::OutputVector fake_translator_ragged_tensor_to_sparse(const ov::frontend::Nod
     add.get_tensor().add_names({node_name + ":1"});
     sub.get_tensor().add_names({node_name + ":2"});
 
-    return {mul, add, sub};
+    return {{"sparse_indices", mul}, {"sparse_values", add}, {"sparse_dense_shape", sub}};
 }
 }  // namespace
 
@@ -465,3 +465,33 @@ TEST_F(TransformationTestsF, RaggedTensorToSparse) {
         model_ref = make_shared<Model>(OutputVector{concat}, ParameterVector{row_splits, strings});
     }
 }
+
+TEST_F(TransformationTestsF, SplitInFunction) {
+    {
+        // create FAKE conversion extension for Split using named ports, this is not required for Split, but it tests
+        // how named ports will work if there is one name and many outputs associated with it
+        auto conv_ext = std::make_shared<ov::frontend::ConversionExtension>("Split", [](const NodeContext& node) {
+            auto axis = node.get_input(0);
+            auto value = node.get_input(1);
+            auto num_split = node.get_attribute<int64_t>("num_split");
+
+            auto split = make_shared<Split>(value, axis, num_split);
+            NamedOutputVector res;
+            for (const auto& output : split->outputs()) {
+                res.push_back({"output", output});
+            }
+            return res;
+        });
+        model = convert_model("split_in_function/split_in_function.pbtxt", conv_ext);
+    }
+    {
+        auto x = make_shared<Parameter>(f32, PartialShape{3, 20});
+
+        auto const_zero = make_shared<Constant>(i32, Shape{}, 0);
+        auto split = make_shared<Split>(x, const_zero, 3);
+        auto add1 = make_shared<Add>(split->output(0), split->output(1));
+        auto add2 = make_shared<Add>(add1, split->output(2));
+
+        model_ref = make_shared<Model>(OutputVector{add2}, ParameterVector{x});
+    }
+}
diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp
index 1c336061da9b15..3cef3b7a03323f 100644
--- a/src/frontends/tensorflow/tests/convert_unsupported.cpp
+++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp
@@ -37,17 +37,11 @@ class TestDecoder : public ov::frontend::DecoderBase {
 
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override {
         throw "Not implemented";
     }
 
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override {
-        throw "Not implemented";
-    }
-
     const std::string& get_op_type() const override {
         return m_op_type;
     }
diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/split_in_function.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/split_in_function.py
new file mode 100644
index 00000000000000..1af51a2aaea217
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/split_in_function.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+
+import tensorflow.compat.v1 as tf
+import numpy as np
+
+
+def main():
+    tf.compat.v1.reset_default_graph()
+
+    @tf.function
+    def second_func(x):
+        x1, x2, x3 = tf.split(x, 3)
+        return x1 + x2 + x3
+
+    @tf.function
+    def first_func(x):
+        return second_func(x)
+
+    tf_net = first_func.get_concrete_function(tf.constant(
+        np.random.randn(3, 20), dtype=tf.float32)).graph.as_graph_def()
+    tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "split_in_function"),
+                      "split_in_function.pb", False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/frontends/tensorflow/tests/test_models/models_pbtxt/split_in_function.pbtxt b/src/frontends/tensorflow/tests/test_models/models_pbtxt/split_in_function.pbtxt
new file mode 100644
index 00000000000000..1de73f6fe11aa5
--- /dev/null
+++ b/src/frontends/tensorflow/tests/test_models/models_pbtxt/split_in_function.pbtxt
@@ -0,0 +1,229 @@
+node {
+  name: "x"
+  op: "Placeholder"
+  attr {
+    key: "_user_specified_name"
+    value {
+      s: "x"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 20
+        }
+      }
+    }
+  }
+}
+node {
+  name: "PartitionedCall"
+  op: "PartitionedCall"
+  input: "x"
+  attr {
+    key: "Tin"
+    value {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "_collective_manager_ids"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "_read_only_resource_inputs"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "config"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "config_proto"
+    value {
+      s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001\202\001\000"
+    }
+  }
+  attr {
+    key: "executor_type"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "f"
+    value {
+      func {
+        name: "__inference_second_func_15"
+      }
+    }
+  }
+}
+node {
+  name: "Identity"
+  op: "Identity"
+  input: "PartitionedCall"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "__inference_second_func_15"
+      input_arg {
+        name: "x"
+        type: DT_FLOAT
+      }
+      output_arg {
+        name: "identity"
+        type: DT_FLOAT
+      }
+    }
+    node_def {
+      name: "split/split_dim"
+      op: "Const"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 0
+          }
+        }
+      }
+    }
+    node_def {
+      name: "split"
+      op: "Split"
+      input: "split/split_dim:output:0"
+      input: "x"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "num_split"
+        value {
+          i: 3
+        }
+      }
+    }
+    node_def {
+      name: "add"
+      op: "AddV2"
+      input: "split:output:0"
+      input: "split:output:1"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node_def {
+      name: "add_1"
+      op: "AddV2"
+      input: "add:z:0"
+      input: "split:output:2"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node_def {
+      name: "Identity"
+      op: "Identity"
+      input: "add_1:z:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    ret {
+      key: "identity"
+      value: "Identity:output:0"
+    }
+    attr {
+      key: "_construction_context"
+      value {
+        s: "kEagerRuntime"
+      }
+    }
+    arg_attr {
+      value {
+        attr {
+          key: "_output_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 3
+                }
+                dim {
+                  size: 20
+                }
+              }
+            }
+          }
+        }
+        attr {
+          key: "_user_specified_name"
+          value {
+            s: "x"
+          }
+        }
+      }
+    }
+  }
+}
+versions {
+  producer: 1286
+  min_consumer: 12
+}
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
index 87afdca960f003..db49abd5e59186 100644
--- a/src/frontends/tensorflow_common/include/common_op_table.hpp
+++ b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -21,7 +21,8 @@ namespace ov {
 namespace frontend {
 namespace tensorflow {
 namespace op {
-#define OP_CONVERTER(op) OutputVector op(const ov::frontend::NodeContext& node)
+#define OP_CONVERTER(op)       OutputVector op(const ov::frontend::NodeContext& node)
+#define OP_CONVERTER_NAMED(op) NamedOutputVector op(const ov::frontend::NodeContext& node)
 #define OP_T_CONVERTER(op) \
     template <class T>     \
     OutputVector op(const ov::frontend::NodeContext& node)
@@ -48,7 +49,7 @@ OP_CONVERTER(translate_conv_2d_op);
 OP_CONVERTER(translate_conv_2d_backprop_input_op);
 OP_CONVERTER(translate_conv_3d_op);
 OP_CONVERTER(translate_conv_3d_backprop_input_v2_op);
-OP_CONVERTER(translate_ctc_greedy_decoder_op);
+OP_CONVERTER_NAMED(translate_ctc_greedy_decoder_op);
 OP_CONVERTER(translate_ctc_loss_op);
 OP_CONVERTER(translate_cumsum_op);
 OP_CONVERTER(translate_crop_and_resize_op);
@@ -62,7 +63,7 @@ OP_CONVERTER(translate_extract_image_patches_op);
 OP_CONVERTER(translate_fake_quant_op);
 OP_CONVERTER(translate_fill_op);
 OP_CONVERTER(translate_floor_div_op);
-OP_CONVERTER(translate_fused_batch_norm_op);
+OP_CONVERTER_NAMED(translate_fused_batch_norm_op);
 OP_CONVERTER(translate_gather_op);
 OP_CONVERTER(translate_gather_v2_op);
 OP_CONVERTER(translate_gather_nd_op);
@@ -132,8 +133,8 @@ OP_CONVERTER(translate_tensor_list_reserve_op);
 OP_CONVERTER(translate_tensor_list_set_item_op);
 OP_CONVERTER(translate_tensor_list_stack_op);
 OP_CONVERTER(translate_tile_op);
-OP_CONVERTER(translate_top_k_op);
-OP_CONVERTER(translate_top_k_v2_op);
+OP_CONVERTER_NAMED(translate_top_k_op);
+OP_CONVERTER_NAMED(translate_top_k_v2_op);
 OP_CONVERTER(translate_transpose_op);
 OP_CONVERTER(translate_unpack_op);
 OP_CONVERTER(translate_where_op);
@@ -141,7 +142,7 @@ OP_CONVERTER(translate_x_div_y_op);
 OP_CONVERTER(translate_zeros_like_op);
 
 // Translators for internal operations
-OP_CONVERTER(translate_unique_op);
+OP_CONVERTER_NAMED(translate_unique_op);
 
 }  // namespace op
 }  // namespace tensorflow
diff --git a/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp b/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp
index f882474c79c49b..2f91c4f4232f6b 100644
--- a/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp
+++ b/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp
@@ -30,19 +30,12 @@ class DecoderFake : public ov::frontend::DecoderBase {
 
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override {
         FRONT_END_OP_CONVERSION_CHECK(false,
                                       "Internal error: the get_input_node method of the fake node decoder is invoked.");
     }
 
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override {
-        FRONT_END_OP_CONVERSION_CHECK(false,
-                                      "Internal error: the get_input_node method of the fake node decoder is invoked.");
-    }
-
     const std::string& get_op_type() const override {
         // this method must not throw an exception since it is used by TF FE FrameworkNode constructor
         return op_type;
diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp
index bdca58b713e776..1abbb13a3b29b9 100644
--- a/src/frontends/tensorflow_common/include/utils.hpp
+++ b/src/frontends/tensorflow_common/include/utils.hpp
@@ -19,7 +19,7 @@
 namespace ov {
 namespace frontend {
 namespace tensorflow {
-using OpMap = std::unordered_map<std::string, std::vector<ov::Output<ov::Node>>>;
+using OpMap = std::unordered_map<std::string, NamedOutputVector>;
 
 void extract_operation_name_and_port(const std::string& port_name,
                                      std::string& operation_name,
diff --git a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
index 773fdacd526ac2..ea98d701d1a1cf 100644
--- a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
+++ b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp
@@ -17,7 +17,7 @@ namespace frontend {
 namespace tensorflow {
 namespace op {
 
-OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) {
+NamedOutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) {
     default_op_checks(node, 2, {"CTCGreedyDecoder"});
     auto inputs = node.get_input(0);
     auto sequence_length = node.get_input(1);
@@ -82,7 +82,10 @@ OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) {
     set_node_name(node.get_name() + ":2", dense_shape);
     set_node_name(node.get_name() + ":3", neg_sum_logits);
 
-    return {decoded_indices_transposed, decoded_values, dense_shape, neg_sum_logits};
+    return {{"decoded_indices", decoded_indices_transposed},
+            {"decoded_values", decoded_values},
+            {"decoded_shape", dense_shape},
+            {"log_probability", neg_sum_logits}};
 }
 }  // namespace op
 }  // namespace tensorflow
diff --git a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
index 8ad5023e1b42b3..3b1d5613db2d0b 100644
--- a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
+++ b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp
@@ -234,7 +234,7 @@ void compute_fused_batch_norm_training(const NodeContext& node,
 }
 }  // namespace
 
-OutputVector translate_fused_batch_norm_op(const NodeContext& node) {
+NamedOutputVector translate_fused_batch_norm_op(const NodeContext& node) {
     default_op_checks(node, 3, {"FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3"});
     auto scale = node.get_input(1);
 
@@ -263,11 +263,15 @@ OutputVector translate_fused_batch_norm_op(const NodeContext& node) {
     set_node_name(node.get_name() + ":3", zero_const.get_node_shared_ptr());
     set_node_name(node.get_name() + ":4", zero_const2.get_node_shared_ptr());
 
-    OutputVector results = OutputVector{fused_batch_norm, batch_mean, batch_variance, zero_const, zero_const2};
+    auto results = NamedOutputVector{{"y", fused_batch_norm},
+                                     {"batch_mean", batch_mean},
+                                     {"batch_variance", batch_variance},
+                                     {"reserve_space_1", zero_const},
+                                     {"reserve_space_2", zero_const2}};
     if (is_v3) {
         auto zero_const3 = create_same_type_const_scalar<float>(scale, 0);
         set_node_name(node.get_name() + ":5", zero_const3.get_node_shared_ptr());
-        results.push_back(zero_const3);
+        results.push_back({"reserve_space_3", zero_const3});
     }
 
     return results;
diff --git a/src/frontends/tensorflow_common/src/op/top_k.cpp b/src/frontends/tensorflow_common/src/op/top_k.cpp
index f1f709c214f448..b3761b4348f318 100644
--- a/src/frontends/tensorflow_common/src/op/top_k.cpp
+++ b/src/frontends/tensorflow_common/src/op/top_k.cpp
@@ -12,7 +12,9 @@ namespace ov {
 namespace frontend {
 namespace tensorflow {
 namespace op {
-OutputVector translate_top_k_base_op(const NodeContext& node, const ov::Output<ov::Node>& k_input, int min_input_size) {
+NamedOutputVector translate_top_k_base_op(const NodeContext& node,
+                                          const ov::Output<ov::Node>& k_input,
+                                          int min_input_size) {
     default_op_checks(node, min_input_size, {"TopK", "TopKV2"});
     auto input = node.get_input(0);
 
@@ -26,17 +28,16 @@ OutputVector translate_top_k_base_op(const NodeContext& node, const ov::Output<o
                                    ov::element::i32,
                                    true);
     set_node_name(node.get_name(), top_k);
-    return top_k->outputs();
+    return {{"values", top_k->output(0)}, {"indices", top_k->output(1)}};
 }
-
-OutputVector translate_top_k_op(const NodeContext& node) {
+NamedOutputVector translate_top_k_op(const NodeContext& node) {
     // retrieve k attribute
     auto k = node.get_attribute<int64_t>("k");
     auto k_input = make_shared<Constant>(ov::element::i64, Shape{}, std::vector<int64_t>({k}));
     return translate_top_k_base_op(node, k_input, 1);
 }
 
-OutputVector translate_top_k_v2_op(const NodeContext& node) {
+NamedOutputVector translate_top_k_v2_op(const NodeContext& node) {
     default_op_checks(node, 2, {"TopKV2"});
     auto k_input = node.get_input(1);
     return translate_top_k_base_op(node, k_input, 1);
diff --git a/src/frontends/tensorflow_common/src/op/unique.cpp b/src/frontends/tensorflow_common/src/op/unique.cpp
index 379ffe9e8ea39d..9c36001d9bc309 100644
--- a/src/frontends/tensorflow_common/src/op/unique.cpp
+++ b/src/frontends/tensorflow_common/src/op/unique.cpp
@@ -12,7 +12,7 @@ namespace ov {
 namespace frontend {
 namespace tensorflow {
 namespace op {
-OutputVector translate_unique_op(const NodeContext& node) {
+NamedOutputVector translate_unique_op(const NodeContext& node) {
     // This operation returns a tensor y containing all of the unique elements of x sorted in the same order that they
     // occur in x. This operation also returns a tensor idx the same size as x that contains the index of each value of
     // x in the unique output y.
@@ -28,7 +28,7 @@ OutputVector translate_unique_op(const NodeContext& node) {
     set_out_name(node_name + ":0", unique->output(0));
     set_out_name(node_name + ":1", unique->output(2));
 
-    return {unique->output(0), unique->output(2)};
+    return {{"y", unique->output(0)}, {"idx", unique->output(2)}};
 }
 
 }  // namespace op
diff --git a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp
index fe5bc351adacd8..74a7398c28fd34 100644
--- a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp
+++ b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp
@@ -17,6 +17,7 @@ size_t DecoderFlatBuffer::get_input_size() const {
 
 void DecoderFlatBuffer::get_input_node(size_t input_port_idx,
                                        std::string& producer_name,
+                                       std::string& producer_output_port_name,
                                        size_t& producer_output_port_index) const {
     const auto inputs = m_node_def->inputs();
     FRONT_END_GENERAL_CHECK(inputs->size() > input_port_idx,
@@ -33,13 +34,6 @@ void DecoderFlatBuffer::get_input_node(size_t input_port_idx,
     producer_output_port_index = input_tensor_idx;
 }
 
-void DecoderFlatBuffer::get_input_node(size_t input_port_idx,
-                                       std::string& producer_name,
-                                       size_t& producer_output_port_index,
-                                       const OpTypeByName& op_type_by_name) const {
-    FRONT_END_NOT_IMPLEMENTED("get_input_node method with op_type_by_name map is not implemented for TFL FE.");
-}
-
 const std::string& DecoderFlatBuffer::get_op_type() const {
     return m_type;
 }
diff --git a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.h b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.h
index 92325596b70656..65381fcbad3af6 100644
--- a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.h
+++ b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.h
@@ -52,11 +52,8 @@ class DecoderFlatBuffer : public ov::frontend::DecoderBase {
 
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override;
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override;
 
     std::string get_output_tensor_name(size_t idx) const;
     element::Type get_output_tensor_type(size_t idx) const;
diff --git a/src/frontends/tensorflow_lite/src/decoder_map.hpp b/src/frontends/tensorflow_lite/src/decoder_map.hpp
index 99923b89bef639..476d5c788e1f56 100644
--- a/src/frontends/tensorflow_lite/src/decoder_map.hpp
+++ b/src/frontends/tensorflow_lite/src/decoder_map.hpp
@@ -57,21 +57,9 @@ class DecoderMap : public ov::frontend::DecoderBase {
     /// \return producer_output_port_index Output port index from which data is generated
     void get_input_node(size_t input_port_idx,
                         std::string& producer_name,
+                        std::string& producer_output_port_name,
                         size_t& producer_output_port_index) const override {
-        m_decoder->get_input_node(input_port_idx, producer_name, producer_output_port_index);
-    }
-
-    /// \brief Get a producer name and its output port index
-    ///
-    /// \param input_port_idx              Input port index by which data is consumed
-    /// \param producer_name               A producer name
-    /// \param producer_output_port_index  Output port index from which data is generated
-    /// \param op_type_by_name             Map of operation name to their types
-    void get_input_node(size_t input_port_idx,
-                        std::string& producer_name,
-                        size_t& producer_output_port_index,
-                        const OpTypeByName& op_type_by_name) const override {
-        FRONT_END_NOT_IMPLEMENTED("get_input_node method with op_type_by_name map is not implemented for TFL FE.");
+        m_decoder->get_input_node(input_port_idx, producer_name, producer_output_port_name, producer_output_port_index);
     }
 
     /// \brief Get operation type
diff --git a/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp b/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
index 0df83e39c33a45..7c51b45e7c9f5b 100644
--- a/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
+++ b/src/frontends/tensorflow_lite/src/op/op_translation_utils.cpp
@@ -131,7 +131,7 @@ std::shared_ptr<ov::frontend::tensorflow_lite::DecoderMap> get_pool_decoder_map(
 
 OutputVector attribute_helper(const ov::frontend::tensorflow_lite::NodeContext& node,
                               const std::map<std::string, ov::Any>& attrs,
-                              ov::OutputVector (*converter)(const ov::frontend::NodeContext&),
+                              ov::frontend::CreatorFunction converter,
                               std::string new_op_type,
                               bool empty_name,
                               ov::OutputVector inputs) {
@@ -152,6 +152,23 @@ OutputVector attribute_helper(const ov::frontend::tensorflow_lite::NodeContext&
     return outputs;
 }
 
+OutputVector attribute_helper(const ov::frontend::tensorflow_lite::NodeContext& node,
+                              const std::map<std::string, ov::Any>& attrs,
+                              ov::frontend::CreatorFunctionNamedAndIndexed converter,
+                              std::string new_op_type,
+                              bool empty_name,
+                              ov::OutputVector inputs) {
+    return attribute_helper(
+        node,
+        attrs,
+        [&](const ov::frontend::NodeContext& ctx) {
+            return indexed_from_named(converter(ctx));
+        },
+        new_op_type,
+        empty_name,
+        inputs);
+}
+
 std::shared_ptr<DecoderFlatBuffer> get_decoder(const ov::frontend::tensorflow_lite::NodeContext& node) {
     const auto& decoder = std::dynamic_pointer_cast<DecoderFlatBuffer>(node.get_decoder());
     FRONT_END_GENERAL_CHECK(decoder != nullptr,
diff --git a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
index d4eee069f57a25..16ee477f292de5 100644
--- a/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
+++ b/src/frontends/tensorflow_lite/src/op/op_translation_utils.hpp
@@ -91,7 +91,14 @@ template OutputVector translate_binary_op_with_activation<opset10::Divide, tflit
 
 OutputVector attribute_helper(const ov::frontend::tensorflow_lite::NodeContext& node,
                               const std::map<std::string, ov::Any>& attrs,
-                              ov::OutputVector (*converter)(const ov::frontend::NodeContext&),
+                              ov::frontend::CreatorFunction converter,
+                              std::string new_op_type = "",
+                              bool empty_name = false,
+                              ov::OutputVector inputs = {});
+
+OutputVector attribute_helper(const ov::frontend::tensorflow_lite::NodeContext& node,
+                              const std::map<std::string, ov::Any>& attrs,
+                              ov::frontend::CreatorFunctionNamedAndIndexed converter,
                               std::string new_op_type = "",
                               bool empty_name = false,
                               ov::OutputVector inputs = {});
diff --git a/src/frontends/tensorflow_lite/src/op_table.cpp b/src/frontends/tensorflow_lite/src/op_table.cpp
index d4393eec372f1d..e5cf074034d151 100644
--- a/src/frontends/tensorflow_lite/src/op_table.cpp
+++ b/src/frontends/tensorflow_lite/src/op_table.cpp
@@ -28,7 +28,7 @@ using namespace ov::frontend::tensorflow::op;
         auto inputs = node.get_inputs();                                                                           \
         ov::frontend::tensorflow_lite::dequantize_inputs(inputs);                                                  \
         auto context = frontend::tensorflow_lite::NodeContext(decoder, inputs);                                    \
-        return func(context);                                                                                      \
+        return get_indexed_outputs(func(context));                                                                 \
     }
 
 namespace ov {
diff --git a/src/frontends/tensorflow_lite/src/utils.cpp b/src/frontends/tensorflow_lite/src/utils.cpp
index e69b0d056a5a02..b89c7e0f659fda 100644
--- a/src/frontends/tensorflow_lite/src/utils.cpp
+++ b/src/frontends/tensorflow_lite/src/utils.cpp
@@ -99,4 +99,21 @@ void ov::frontend::tensorflow_lite::dequantize_inputs(OutputVector& deq_inputs)
             continue;
         deq_input = std::make_shared<opset10::Convert>(deq_input, element::f32);
     }
-}
\ No newline at end of file
+}
+
+namespace ov {
+namespace frontend {
+namespace tensorflow_lite {
+// namespace required by arm compiler to specify template
+template <>
+OutputVector get_indexed_outputs(const OutputVector& outputs) {
+    return outputs;
+};
+
+template <>
+OutputVector get_indexed_outputs(const frontend::NamedOutputVector& outputs) {
+    return indexed_from_named(outputs);
+};
+}  // namespace tensorflow_lite
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_lite/src/utils.hpp b/src/frontends/tensorflow_lite/src/utils.hpp
index 7d4f3ecdd2003f..640c7683bf1bff 100644
--- a/src/frontends/tensorflow_lite/src/utils.hpp
+++ b/src/frontends/tensorflow_lite/src/utils.hpp
@@ -23,6 +23,9 @@ std::shared_ptr<QuantizationInfo> get_quantization(const tflite::QuantizationPar
 void apply_quantization(ov::Output<ov::Node>& output, ov::element::Type type);
 void dequantize_inputs(OutputVector& deq_inputs);
 
+template <typename T>
+OutputVector get_indexed_outputs(const T& outputs);
+
 }  // namespace tensorflow_lite
 }  // namespace frontend
 }  // namespace ov
\ No newline at end of file
diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp
index 45c9e9665efb17..19008d60ac9120 100644
--- a/src/inference/src/core.cpp
+++ b/src/inference/src/core.cpp
@@ -176,8 +176,11 @@ void Core::add_extension(const std::string& library_path) {
             OPENVINO_SUPPRESS_DEPRECATED_START
             add_extension(extension_ptr);
             OPENVINO_SUPPRESS_DEPRECATED_END
-        } catch (const std::runtime_error&) {
-            OPENVINO_THROW("Cannot add extension. Cannot find entry point to the extension library");
+        } catch (const std::runtime_error& e) {
+            OPENVINO_THROW(
+                std::string(
+                    "Cannot add extension. Cannot find entry point to the extension library. This error happened: ") +
+                e.what());
         }
     }
 }

From 72952bdc4558db5b93067ac8f071037aed6e0b31 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Fri, 7 Apr 2023 14:50:59 +0400
Subject: [PATCH 291/296] Disable ConstantFolding for ShapeOf subgraph in TS
 transformation (#16765)

* Disable ConstantFolding for ShapeOf expressions in TS transformation

* update ModelWithEmptyTensorListAndPushBack: add ShapeOf subgraph
---
 .../transpose_sinking/ts_general.cpp          |  3 +++
 .../transpose_sinking/ts_general_test.cpp     | 23 +++++++++++++++++++
 .../tests/convert_tricky_models.cpp           |  8 ++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
index b6cbe79fd03edc..c3ba55647720f8 100644
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_general.cpp
@@ -9,6 +9,7 @@
 #include "openvino/pass/graph_rewrite.hpp"
 #include "openvino/pass/manager.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/common_optimizations/disable_shapeof_constant_folding.hpp"
 #include "transformations/transpose_sinking/ts_binary.hpp"
 #include "transformations/transpose_sinking/ts_concat.hpp"
 #include "transformations/transpose_sinking/ts_data_movement.hpp"
@@ -58,6 +59,7 @@ bool TSGeneral::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(TSGeneral);
     {
         Manager manager(get_pass_config());
+        manager.register_pass<DisableShapeOfConstantFolding>();
         manager.register_pass<TSGeneralForward>();
         manager.register_pass<ConstantFolding>();
         manager.run_passes(f);
@@ -65,6 +67,7 @@ bool TSGeneral::run_on_model(const std::shared_ptr<ov::Model>& f) {
 
     {
         Manager manager(get_pass_config());
+        manager.register_pass<DisableShapeOfConstantFolding>();
         manager.register_pass<TSGeneralBackward>();
         manager.register_pass<ConstantFolding>();
         manager.run_passes(f);
diff --git a/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp b/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp
index ee077f667340ab..358bab067e4cd8 100644
--- a/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp
+++ b/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp
@@ -412,6 +412,29 @@ TEST_F(TransformationTestsF, TSGeneralTestMultipleTypes) {
     manager.register_pass<TSGeneral>();
 }
 
+TEST_F(TransformationTestsF, TSGeneralCheckShapeOfConstFoldingDisabled) {
+    using namespace transpose_sinking::testing::general;
+    ov::Shape input_shape = {96, 40, 55};
+    ov::Shape reshape_shape = {1, 96, 40, 55};
+    ov::element::Type input_type = ov::element::f32;
+    {
+        auto X = std::make_shared<Parameter>(input_type, input_shape);
+        auto Shape = std::make_shared<Parameter>(input_type, reshape_shape);
+
+        auto order = std::make_shared<Constant>(ov::element::u64, ov::Shape{3}, ov::Shape{0, 2, 1});
+        auto transpose = std::make_shared<Transpose>(X, order);
+
+        auto shape_of = std::make_shared<ShapeOf>(Shape);
+        auto reshape = std::make_shared<Reshape>(transpose, shape_of, false);
+
+        auto ng_order1 = std::make_shared<Constant>(ov::element::u64, ov::Shape{4}, ov::Shape{0, 3, 1, 2});
+        auto transpose1 = std::make_shared<Transpose>(reshape, ng_order1);
+
+        function = std::make_shared<ov::Model>(transpose1, ov::ParameterVector{X, Shape});
+    }
+    manager.register_pass<TSGeneral>();
+}
+
 }  // namespace general
 }  // namespace testing
 }  // namespace transpose_sinking
\ No newline at end of file
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
index 1566b2efd0e52b..091508fd1c68b9 100644
--- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp
+++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -406,7 +406,13 @@ TEST_F(TransformationTestsF, ModelWithEmptyTensorListAndPushBack) {
         auto x_unsqueeze_flatten = make_shared<Unsqueeze>(x_flatten, zero_const);
         auto empty_const = make_shared<Constant>(f32, Shape{0, 30}, vector<float>{});
         auto list_push_back = make_shared<Concat>(OutputVector{empty_const, x_unsqueeze_flatten}, 0);
-        auto recover_item_shape = make_shared<Constant>(i32, Shape{4}, vector<int32_t>{1, 2, 3, 5});
+        auto list_push_back_shape = make_shared<ShapeOf>(list_push_back, element::i32);
+        auto start = make_shared<Constant>(i32, Shape{1}, 0);
+        auto stop = make_shared<Constant>(i32, Shape{1}, 1);
+        auto step = make_shared<Constant>(i32, Shape{1}, 1);
+        auto batch = make_shared<Slice>(list_push_back_shape, start, stop, step);
+        auto shape_without_batch = make_shared<Constant>(i32, Shape{3}, vector<int32_t>{2, 3, 5});
+        auto recover_item_shape = make_shared<Concat>(OutputVector{batch, shape_without_batch}, 0);
         auto recover_item = make_shared<Reshape>(list_push_back, recover_item_shape, false);
         model_ref = make_shared<Model>(OutputVector{recover_item}, ParameterVector{x});
     }

From c6fc8e5adc12e784bae040353f63a7dd650e3227 Mon Sep 17 00:00:00 2001
From: Sofya Balandina <sofya.balandina@intel.com>
Date: Fri, 7 Apr 2023 13:17:50 +0100
Subject: [PATCH 292/296] [apiConformance] Exec_network_base refactor and
 define mandatory scope (#16413)

---
 .../exec_network_base.cpp                     |  42 +-
 .../skip_tests_config.cpp                     |  11 +-
 .../ov_executable_network/exec_net_base.cpp   |  12 +-
 .../skip_tests_config.cpp                     |  15 +-
 .../exec_network_base.cpp                     |  43 +-
 .../tests/functional/skip_tests_config.cpp    |   6 +-
 .../exec_network_base.cpp                     |  12 +-
 .../ov_executable_network/exec_net_base.cpp   |  12 +-
 .../skip_tests_config.cpp                     |   4 +-
 .../compiled_model/compiled_model_base.hpp    | 656 ++++++++++++++++++
 .../exec_network_base.hpp                     |   2 +
 .../ov_executable_network/get_metric.cpp      |  34 +
 12 files changed, 810 insertions(+), 39 deletions(-)
 create mode 100644 src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
index d8c7eb26fa53fc..cf31802902256e 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/ov_executable_network/exec_network_base.hpp"
+#include "behavior/compiled_model/compiled_model_base.hpp"
 #include "ie_plugin_config.hpp"
 
 using namespace ov::test::behavior;
@@ -18,29 +18,53 @@ namespace {
     const std::vector<ov::AnyMap> heteroConfigs = {
             {ov::device::priorities(CommonTestUtils::DEVICE_CPU)}};
 
-    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                     ::testing::ValuesIn(configs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_MULTI),
                                     ::testing::ValuesIn(multiConfigs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
                                     ::testing::ValuesIn(multiConfigs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVCompiledModelBaseTest,
                              ::testing::Combine(
                                      ::testing::Values(CommonTestUtils::DEVICE_HETERO),
                                      ::testing::ValuesIn(heteroConfigs)),
-                             OVExecutableNetworkBaseTest::getTestCaseName);
+                             OVCompiledModelBaseTest::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                    ::testing::ValuesIn(configs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                    ::testing::ValuesIn(multiConfigs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                    ::testing::ValuesIn(multiConfigs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVCompiledModelBaseTestOptional,
+                             ::testing::Combine(
+                                     ::testing::Values(CommonTestUtils::DEVICE_HETERO),
+                                     ::testing::ValuesIn(heteroConfigs)),
+                             OVCompiledModelBaseTestOptional::getTestCaseName);
 
     const std::vector<InferenceEngine::Precision> netPrecisions = {
             InferenceEngine::Precision::FP32,
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index a419adabb64a62..456bd12167ffbc 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -76,7 +76,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*Auto.*Behavior.*ExecutableNetworkBaseTest.*canLoadCorrectNetworkToGetExecutableWithIncorrectConfig.*)",
         R"(.*(Auto|Multi).*Behavior.*CorrectConfigAPITests.*CanSetExclusiveAsyncRequests.*)",
         R"(.*(Auto|Multi).*Behavior.*IncorrectConfigTests.*CanNotLoadNetworkWithIncorrectConfig.*)",
-        R"(.*OVExecutableNetworkBaseTest.*(CanGetInputsInfoAndCheck|CanSetConfigToExecNet).*)",
+        R"(.*OVCompiledModelBaseTest.*(CanGetInputsInfoAndCheck|canSetConfigToCompiledModel).*)",
         R"(.*Behavior.*CorrectConfigCheck.*(canSetConfigAndCheckGetConfig|canSetConfigTwiceAndCheckGetConfig).*CPU_BIND_THREAD=YES.*)",
         // Issue: 72021 Unreasonable abs_threshold for comparing bf16 results
         R"(.*smoke_Reduce.*type=(Prod|Min).*netPRC=(BF|bf)16.*)",
@@ -85,17 +85,26 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=U64.*)",
         // Not implemented yet:
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNet.*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*canSetConfigToCompiledModel.*)",
         R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*checkGetExecGraphInfo.*)",
+        R"(.*(Auto|Multi).*Behavior.*OVCompiledModelBaseTest.*checkGetExecGraphInfo.*)",
         R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*CanCreateTwoExeNetworksAndCheckFunction.*)",
+        R"(.*(Auto|Multi).*Behavior.*OVCompiledModelBaseTest.*canCreateTwoCompiledModelAndCheckTheir.*)",
         R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
+        R"(.*(Auto|Multi).*Behavior.*OVCompiledModelBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
         R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*CheckExecGraphInfoSerialization.*)",
+        R"(.*(Auto|Multi).*Behavior.*OVCompiledModelBaseTest.*CheckExecGraphInfoSerialization.*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*canExportModel.*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNetWithIncorrectConfig.*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*canSetConfigToCompiledModelWithIncorrectConfig.*)",
         R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*ExecGraphInfo.*)",
+        R"(.*Hetero.*Behavior.*OVCompiledModelBaseTest.*ExecGraphInfo.*)",
         R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*CanCreateTwoExeNetworksAndCheckFunction.*)",
         // TODO: CVS-104942
         R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*canLoadCorrectNetworkToGetExecutableAndCheckConfig.*)",
         R"(.*(Auto|Multi).*SetPropLoadNetWorkGetPropTests.*)",
+        R"(.*Hetero.*Behavior.*OVCompiledModelBaseTest.*canCreateTwoCompiledModelAndCheckTheir.*)",
         // CPU does not support dynamic rank
         // Issue: CVS-66778
         R"(.*smoke_BehaviorTests.*InferFullyDynamicNetworkWith(S|G)etTensor.*)",
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
index 91f3ad7f7cabb2..2c01fcdfaabd83 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/ov_executable_network/exec_network_base.hpp"
+#include "behavior/compiled_model/compiled_model_base.hpp"
 
 using namespace ov::test::behavior;
 namespace {
@@ -11,8 +11,14 @@ const std::vector<ov::AnyMap> configs = {
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
-                         OVExecutableNetworkBaseTest,
+                         OVCompiledModelBaseTest,
                          ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GNA),
                                             ::testing::ValuesIn(configs)),
-                         OVExecutableNetworkBaseTest::getTestCaseName);
+                         OVCompiledModelBaseTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
+                         OVCompiledModelBaseTestOptional,
+                         ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                            ::testing::ValuesIn(configs)),
+                         OVCompiledModelBaseTestOptional::getTestCaseName);
 }  // namespace
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 8233e4bd0b8af2..0a05c5cca16eb0 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -51,10 +51,15 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
         // Not implemented yet
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(canSetConfigToExecNet|canSetConfigToExecNetWithIncorrectConfig).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(canSetConfigToCompiledModel|canSetConfigToCompiledModelWithIncorrectConfig).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution|CheckExecGraphInfoSerialization).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTestOptional.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*canExportModel.*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(CanCreateTwoExeNetworksAndCheckFunction).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(canCreateTwoCompiledModelAndCheckTheir).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
         // Not implemented yet (dynamic cases)
         R"(.*Behavior.*OVInferenceChaining.*(StaticOutputToDynamicInput).*)",
         R"(.*Behavior.*OVInferenceChaining.*(DynamicOutputToDynamicInput).*)",
@@ -62,15 +67,15 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*Behavior.*OVInferRequestDynamicTests.*)",
         // Not expected behavior
         R"(.*Behavior.*ExecNetSetPrecision.*canSetInputPrecisionForNetwork.*FP16.*)",
-        R"(.*OVExecutableNetworkBaseTest.*CanSetConfigToExecNet.*)",
-        R"(.*OVExecutableNetworkBaseTest.*CanGetInputsInfoAndCheck.*)",
-        R"(.*OVExecutableNetworkBaseTest.*getOutputsFromSplitFunctionWithSeveralOutputs.*)",
-        R"(.*OVExecutableNetworkBaseTest.*canLoadNetworkFromMemory.*)",
+        R"(.*OVCompiledModelBaseTest.*canSetConfigToCompiledModel.*)",
+        R"(.*OVCompiledModelBaseTest.*canGetInputsInfoAndCheck.*)",
+        R"(.*OVCompiledModelBaseTest.*getOutputsFromSplitFunctionWithSeveralOutputs.*)",
+        R"(.*OVCompiledModelBaseTest.*canCompileModelFromMemory.*)",
         R"(.*(OVClass|IEClass)HeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)",
         R"(.*LoadNetwork*.*LoadNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
         R"(.*QueryNetwork*.*QueryNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
         R"(.*LoadNetworkTest.*QueryNetwork(MULTIWithHETERO|HETEROWithMULTI)NoThrow_V10.*)",
-        R"(.*Behavior.*OVExecutableNetworkBaseTest.*get(Inputs|Outputs)FromFunctionWithSeveral(Inputs|Outputs).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*get(Inputs|Outputs)FromFunctionWithSeveral(Inputs|Outputs).*)",
         // TODO: temporary disabled. Need to be enabled when PR 9282 is merged
         R"(.*OVExecGraphImportExportTest.*readFromV10IR.*)",
         // Issue connected with OV2.0
diff --git a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
index 7ddc208d218f45..17c8bbb64a3119 100644
--- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
+++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/exec_network_base.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/ov_executable_network/exec_network_base.hpp"
+#include "behavior/compiled_model/compiled_model_base.hpp"
 #include "ie_plugin_config.hpp"
 
 using namespace ov::test::behavior;
@@ -15,27 +15,52 @@ namespace {
             {ov::device::priorities(CommonTestUtils::DEVICE_TEMPLATE)}
     };
 
-    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
                                     ::testing::ValuesIn(configs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_MULTI),
                                     ::testing::ValuesIn(swPluginConfigs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVCompiledModelBaseTest,
                             ::testing::Combine(
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
                                     ::testing::ValuesIn(swPluginConfigs)),
-                            OVExecutableNetworkBaseTest::getTestCaseName);
+                            OVCompiledModelBaseTest::getTestCaseName);
 
-    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVExecutableNetworkBaseTest,
+    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVCompiledModelBaseTest,
                              ::testing::Combine(
                                      ::testing::Values(CommonTestUtils::DEVICE_HETERO),
                                      ::testing::ValuesIn(swPluginConfigs)),
-                             OVExecutableNetworkBaseTest::getTestCaseName);
+                             OVCompiledModelBaseTest::getTestCaseName);
+
+
+    INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
+                                    ::testing::ValuesIn(configs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                    ::testing::ValuesIn(swPluginConfigs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVCompiledModelBaseTestOptional,
+                            ::testing::Combine(
+                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                    ::testing::ValuesIn(swPluginConfigs)),
+                            OVCompiledModelBaseTestOptional::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVCompiledModelBaseTestOptional,
+                             ::testing::Combine(
+                                     ::testing::Values(CommonTestUtils::DEVICE_HETERO),
+                                     ::testing::ValuesIn(swPluginConfigs)),
+                             OVCompiledModelBaseTestOptional::getTestCaseName);
 }  // namespace
diff --git a/src/plugins/template/tests/functional/skip_tests_config.cpp b/src/plugins/template/tests/functional/skip_tests_config.cpp
index 58c7ec69b02912..b08f4095a3bbc0 100644
--- a/src/plugins/template/tests/functional/skip_tests_config.cpp
+++ b/src/plugins/template/tests/functional/skip_tests_config.cpp
@@ -21,11 +21,15 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*InferRequestPreprocessDynamicallyInSetBlobTest.*oPRC=0.*oLT=1.*)",
         // Not Implemented
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(canSetConfigToExecNet|canSetConfigToExecNetAndCheckConfigAndCheck).*)",
-        R"(.*OVExecutableNetworkBaseTest.*(CanSetConfigToExecNet|canSetConfigToExecNetAndCheckConfigAndCheck).*)",
+        R"(.*OVCompiledModelBaseTest.*(CanSetConfigToExecNet|canSetConfigToExecNetAndCheckConfigAndCheck).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution|CheckExecGraphInfoSerialization).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*canExport.*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(CanCreateTwoExeNetworksAndCheckFunction).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(CanCreateTwoExeNetworksAndCheckFunction).*)",
         R"(.*Behavior.*ExecutableNetworkBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
+        R"(.*Behavior.*OVCompiledModelBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
         R"(.*LoadNetworkCreateDefaultExecGraphResult.*)",
         R"(.*OVClassExecutableNetworkGetMetricTest_EXEC_DEVICES.*CanGetExecutionDeviceInfo.*)",
         R"(.*OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS.*GetMetricNoThrow.*)",
diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp
index 85e688494d715c..fcf1952c55af4c 100644
--- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp
+++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/ov_executable_network/exec_network_base.hpp"
+#include "behavior/compiled_model/compiled_model_base.hpp"
 #include "ie_plugin_config.hpp"
 #include "ov_api_conformance_helpers.hpp"
 
@@ -11,9 +11,15 @@ namespace {
 using namespace ov::test::behavior;
 using namespace ov::test::conformance;
 
-INSTANTIATE_TEST_SUITE_P(ov_compiled_model, OVExecutableNetworkBaseTest,
+INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVCompiledModelBaseTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(return_all_possible_device_combination()),
                                 ::testing::Values(pluginConfig)),
-                        OVExecutableNetworkBaseTest::getTestCaseName);
+                        OVCompiledModelBaseTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(ov_compiled_model, OVCompiledModelBaseTestOptional,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(return_all_possible_device_combination()),
+                                ::testing::Values(pluginConfig)),
+                        OVCompiledModelBaseTestOptional::getTestCaseName);
 }  // namespace
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
index c6fa3d2a9ce2f7..f0cfbb72a7b863 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_executable_network/exec_net_base.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/ov_executable_network/exec_network_base.hpp"
+#include "behavior/compiled_model/compiled_model_base.hpp"
 
 using namespace ov::test::behavior;
 namespace {
@@ -28,21 +28,21 @@ const std::vector<ov::AnyMap> autoConfigs = {
 #endif
 };
 
-INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVExecutableNetworkBaseTest,
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelBaseTest,
                         ::testing::Combine(
                                 ::testing::Values(CommonTestUtils::DEVICE_GPU),
                                 ::testing::ValuesIn(configs())),
-                        OVExecutableNetworkBaseTest::getTestCaseName);
+                        OVCompiledModelBaseTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_AutoBatchBehaviorTests, OVExecutableNetworkBaseTest,
+INSTANTIATE_TEST_SUITE_P(smoke_AutoBatchBehaviorTests, OVCompiledModelBaseTest,
                          ::testing::Combine(
                                  ::testing::Values(CommonTestUtils::DEVICE_BATCH),
                                  ::testing::ValuesIn(autoBatchConfigs())),
-                         OVExecutableNetworkBaseTest::getTestCaseName);
+                         OVCompiledModelBaseTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests,
                          OVAutoExecutableNetworkTest,
                          ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_AUTO),
                                             ::testing::ValuesIn(autoConfigs)),
-                         OVExecutableNetworkBaseTest::getTestCaseName);
+                         OVCompiledModelBaseTest::getTestCaseName);
 }  // namespace
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 9ef080844e9cac..058e4bc4463106 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -61,8 +61,8 @@ std::vector<std::string> disabledTestPatterns() {
             // Not implemented yet:
             R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNet.*)",
             R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
-            R"(.*OVExecutableNetworkBaseTest.*CanSetConfigToExecNet.*)",
-            R"(.*OVExecutableNetworkBaseTest.*CanSetConfigToExecNetAndCheckConfigAndCheck.*)",
+            R"(.*OVCompiledModelBaseTest.*CanSetConfigToExecNet.*)",
+            R"(.*OVCompiledModelBaseTest.*CanSetConfigToExecNetAndCheckConfigAndCheck.*)",
             // TODO: Issue 67408
             R"(.*smoke_LSTMSequenceCommonClip.*LSTMSequenceTest.*CompareWithRefs.*)",
             // Expected behavior. GPU plugin doesn't support i64 for eltwise power operation.
diff --git a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp
new file mode 100644
index 00000000000000..886cd3a871dd22
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp
@@ -0,0 +1,656 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifcorer: Apache-2.0
+//
+
+#include <exec_graph_info.hpp>
+#include <fstream>
+#include <openvino/pass/serialize.hpp>
+
+#include "base/ov_behavior_test_utils.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "openvino/runtime/tensor.hpp"
+
+namespace ov {
+namespace test {
+namespace behavior {
+
+class OVCompiledModelBaseTest : public testing::WithParamInterface<InferRequestParams>,
+                                public OVCompiledNetworkTestBase {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<InferRequestParams> obj) {
+        std::string targetDevice;
+        ov::AnyMap configuration;
+        std::tie(targetDevice, configuration) = obj.param;
+        std::replace(targetDevice.begin(), targetDevice.end(), ':', '.');
+
+        std::ostringstream result;
+        result << "targetDevice=" << targetDevice << "_";
+        if (!configuration.empty()) {
+            for (auto& configItem : configuration) {
+                result << "configItem=" << configItem.first << "_";
+                configItem.second.print(result);
+                result << "_";
+            }
+        }
+        return result.str();
+    }
+
+    void SetUp() override {
+        std::tie(target_device, configuration) = this->GetParam();
+        // Skip test according to plugin specific disabledTestPatterns() (if any)
+        SKIP_IF_CURRENT_TEST_IS_DISABLED();
+        APIBaseTest::SetUp();
+        function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(target_device);
+    }
+
+    void TearDown() override {
+        if (!configuration.empty()) {
+            utils::PluginCache::get().reset();
+        }
+        APIBaseTest::TearDown();
+    }
+
+    bool compareTensors(const ov::Tensor& t1, const ov::Tensor& t2) {
+        void* data1;
+        void* data2;
+        try {
+            data1 = t1.data();
+        } catch (const ov::Exception&) {
+            // Remote tensor
+            data1 = nullptr;
+        }
+        try {
+            data2 = t2.data();
+        } catch (const ov::Exception&) {
+            // Remote tensor
+            data2 = nullptr;
+        }
+        return t1.get_element_type() == t2.get_element_type() && t1.get_shape() == t2.get_shape() &&
+               t1.get_byte_size() == t2.get_byte_size() && t1.get_size() == t2.get_size() &&
+               t1.get_strides() == t2.get_strides() && data1 == data2;
+    }
+
+protected:
+    std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
+    ov::AnyMap configuration;
+    std::shared_ptr<ov::Model> function;
+
+    void set_api_entity() override { api_entity = ov::test::utils::ov_entity::ov_compiled_model; }
+};
+
+using OVAutoExecutableNetworkTest = OVCompiledModelBaseTest;
+using OVCompiledModelBaseTestOptional = OVCompiledModelBaseTest;
+
+TEST_P(OVCompiledModelBaseTest, canCompileModel) {
+    EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration));
+}
+
+TEST_P(OVCompiledModelBaseTest, canCompileModelFromMemory) {
+    std::string model = R"V0G0N(
+        <net name="Network" version="10">
+            <layers>
+                <layer name="in1" type="Parameter" id="0" version="opset8">
+                    <data element_type="f16" shape="1,3,22,22"/>
+                    <output>
+                        <port id="0" precision="FP16" names="data">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer name="round" id="1" type="Round" version="opset8">
+                    <data mode="half_to_even"/>
+                    <input>
+                        <port id="1" precision="FP16">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </input>
+                    <output>
+                        <port id="2" precision="FP16" names="r">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer name="output" type="Result" id="2" version="opset8">
+                    <input>
+                        <port id="0" precision="FP16">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </input>
+                </layer>
+            </layers>
+            <edges>
+                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
+                <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
+            </edges>
+        </net>
+        )V0G0N";
+    EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration));
+}
+
+TEST(OVCompiledModelBaseTest, canCompileModelToDefaultDevice) {
+    std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
+    std::shared_ptr<ov::Model> function = ngraph::builder::subgraph::makeConvPoolRelu();
+    EXPECT_NO_THROW(auto execNet = core->compile_model(function));
+}
+
+
+
+TEST_P(OVCompiledModelBaseTestOptional, canCompileModelAndCreateInferRequest) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_NO_THROW(auto req = execNet.create_infer_request());
+}
+
+TEST_P(OVCompiledModelBaseTest, checkGetExecGraphInfoIsNotNullptr) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    auto execGraph = execNet.get_runtime_model();
+    EXPECT_NE(execGraph, nullptr);
+}
+
+TEST_P(OVCompiledModelBaseTest, canCreateTwoCompiledModel) {
+    std::vector<ov::CompiledModel> vec;
+    for (auto i = 0; i < 2; i++) {
+        EXPECT_NO_THROW(vec.push_back(core->compile_model(function, target_device, configuration)));
+        EXPECT_NE(nullptr, function);
+    }
+}
+
+TEST_P(OVCompiledModelBaseTest, CanGetInputsInfo) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_NO_THROW(auto inInfo = execNet.inputs());
+}
+
+TEST_P(OVCompiledModelBaseTest, CanGetOutputsInfo) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_NO_THROW(auto outInfo = execNet.outputs());
+}
+
+TEST_P(OVCompiledModelBaseTest, CanGetInputsInfoAndCheck) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    auto inputs = execNet.inputs();
+    std::vector<std::string> paramVec;
+    for (const auto& input : inputs) {
+        paramVec.push_back(input.get_tensor().get_any_name());
+    }
+    auto params = function->get_parameters();
+    for (const auto& param : params) {
+        EXPECT_NE(std::find(paramVec.begin(), paramVec.end(), param->get_output_tensor(0).get_any_name()),
+                  paramVec.end());
+    }
+}
+
+TEST_P(OVCompiledModelBaseTest, CanGetOutputsInfoAndCheck) {
+    auto execNet = core->compile_model(function, target_device, configuration);
+    auto outputs = execNet.outputs();
+    std::vector<std::string> resVec;
+    for (const auto& out : outputs) {
+        resVec.push_back(out.get_tensor().get_any_name());
+    }
+    auto results = function->get_results();
+    for (const auto& param : results) {
+        EXPECT_NE(std::find(resVec.begin(), resVec.end(), param->get_output_tensor(0).get_any_name()),
+                  resVec.end());
+    }
+}
+
+TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoBeforeExecution) {
+    std::shared_ptr<const ov::Model> execGraph;
+    // Load CNNNetwork to target plugins
+    auto execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_NO_THROW(execGraph = execNet.get_runtime_model());
+    std::map<std::string, int> originalLayersMap;
+    for (const auto& layer : function->get_ops()) {
+        originalLayersMap[layer->get_friendly_name()] = 0;
+    }
+    int constCnt = 0;
+
+    std::shared_ptr<const ngraph::Function> getFunction = std::dynamic_pointer_cast<const ngraph::Function>(execGraph);
+    EXPECT_NE(getFunction, nullptr);
+
+    for (const auto& op : getFunction->get_ops()) {
+        const ov::RTMap& rtInfo = op->get_rt_info();
+
+        auto getExecValue = [&rtInfo](const std::string& paramName) -> std::string {
+            auto it = rtInfo.find(paramName);
+            IE_ASSERT(rtInfo.end() != it);
+            return it->second.as<std::string>();
+        };
+
+        // Each layer from the execGraphInfo network must have PM data option set
+        EXPECT_EQ("not_executed", getExecValue(ExecGraphInfoSerialization::PERF_COUNTER));
+        // Parse origin layer names (fused/merged layers) from the executable graph
+        // and compare with layers from the original model
+        auto origFromExecLayer = getExecValue(ExecGraphInfoSerialization::ORIGINAL_NAMES);
+        if (origFromExecLayer.empty()) {
+            constCnt++;
+        } else {
+            auto origFromExecLayerSep = CommonTestUtils::splitStringByDelimiter(origFromExecLayer);
+            std::for_each(origFromExecLayerSep.begin(), origFromExecLayerSep.end(), [&](const std::string& op) {
+                auto origLayer = originalLayersMap.find(op);
+                EXPECT_NE(originalLayersMap.end(), origLayer) << op;
+                origLayer->second++;
+            });
+        }
+    }
+
+    // All layers from the original IR must be present with in ExecGraphInfo
+    for (auto& layer : originalLayersMap) {
+        if ((layer.second == 0) && (constCnt > 0)) {
+            constCnt--;
+        } else {
+            EXPECT_GE(layer.second, 0);
+        }
+    }
+}
+
+TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoAfterExecution) {
+    std::shared_ptr<const ov::Model> execGraph;
+    // Load CNNNetwork to target plugins
+    auto execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_NO_THROW(execGraph = execNet.get_runtime_model());
+    std::map<std::string, int> originalLayersMap;
+    for (const auto& layer : function->get_ops()) {
+        originalLayersMap[layer->get_friendly_name()] = 0;
+    }
+    int constCnt = 0;
+    // Store all the layers from the executable graph information represented as CNNNetwork
+    bool hasOpWithValidTime = false;
+    auto getFunction = std::dynamic_pointer_cast<const ngraph::Function>(execGraph);
+    EXPECT_NE(nullptr, getFunction);
+
+    for (const auto& op : getFunction->get_ops()) {
+        const auto& rtInfo = op->get_rt_info();
+
+        auto getExecValue = [&rtInfo](const std::string& paramName) -> std::string {
+            auto it = rtInfo.find(paramName);
+            IE_ASSERT(rtInfo.end() != it);
+            return it->second.as<std::string>();
+        };
+
+        // At least one layer in the topology should be executed and have valid perf counter value
+        try {
+            float x = static_cast<float>(std::atof(getExecValue(ExecGraphInfoSerialization::PERF_COUNTER).c_str()));
+            std::cout << "TIME: " << x << std::endl;
+            EXPECT_GE(x, 0.0f);
+            hasOpWithValidTime = true;
+        } catch (std::exception&) {
+        }
+
+        // Parse origin layer names (fused/merged layers) from the executable graph
+        // and compare with layers from the original model
+        auto origFromExecLayer = getExecValue(ExecGraphInfoSerialization::ORIGINAL_NAMES);
+        std::vector<std::string> origFromExecLayerSep = CommonTestUtils::splitStringByDelimiter(origFromExecLayer);
+        if (origFromExecLayer.empty()) {
+            constCnt++;
+        } else {
+            std::for_each(origFromExecLayerSep.begin(), origFromExecLayerSep.end(), [&](const std::string& layer) {
+                auto origLayer = originalLayersMap.find(layer);
+                EXPECT_NE(originalLayersMap.end(), origLayer) << layer;
+                origLayer->second++;
+            });
+        }
+    }
+
+    EXPECT_TRUE(hasOpWithValidTime);
+
+    // All layers from the original IR must be present within ExecGraphInfo
+    for (auto& layer : originalLayersMap) {
+        if ((layer.second == 0) && (constCnt > 0)) {
+            constCnt--;
+        } else {
+            EXPECT_GE(layer.second, 0);
+        }
+    }
+}
+
+TEST_P(OVCompiledModelBaseTest, getInputFromFunctionWithSingleInput) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_EQ(function->inputs().size(), 1);
+    EXPECT_EQ(function->inputs().size(), execNet.inputs().size());
+    EXPECT_NO_THROW(execNet.input());
+    EXPECT_EQ(function->input().get_tensor().get_names(), execNet.input().get_tensor().get_names());
+    EXPECT_EQ(function->input().get_tensor().get_partial_shape(), execNet.input().get_tensor().get_partial_shape());
+    EXPECT_EQ(function->input().get_tensor().get_element_type(), execNet.input().get_tensor().get_element_type());
+
+    ov::InferRequest request = execNet.create_infer_request();
+
+    ov::Tensor tensor1, tensor2;
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.input()));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.input().get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input().get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_input_tensor(0));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+}
+
+TEST_P(OVCompiledModelBaseTest, getOutputFromFunctionWithSingleInput) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_EQ(function->outputs().size(), 1);
+    EXPECT_EQ(function->outputs().size(), execNet.outputs().size());
+    EXPECT_NO_THROW(execNet.output());
+    EXPECT_EQ(function->output().get_tensor().get_names(), execNet.output().get_tensor().get_names());
+    EXPECT_EQ(function->output().get_tensor().get_partial_shape(), execNet.output().get_tensor().get_partial_shape());
+    EXPECT_EQ(function->output().get_tensor().get_element_type(), execNet.output().get_tensor().get_element_type());
+
+    ov::InferRequest request = execNet.create_infer_request();
+    ov::Tensor tensor1, tensor2;
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.output()));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.output().get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output().get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_output_tensor(0));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+}
+
+TEST_P(OVCompiledModelBaseTest, getInputsFromFunctionWithSeveralInputs) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    // Create simple function
+    {
+        auto param1 = std::make_shared<ov::opset8::Parameter>(element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param1->set_friendly_name("param1");
+        param1->output(0).get_tensor().set_names({"data1"});
+        auto param2 = std::make_shared<ov::opset8::Parameter>(element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param2->set_friendly_name("param2");
+        param2->output(0).get_tensor().set_names({"data2"});
+        auto relu = std::make_shared<ov::opset8::Relu>(param1);
+        relu->set_friendly_name("relu_op");
+        relu->output(0).get_tensor().set_names({"relu"});
+        auto result1 = std::make_shared<ov::opset8::Result>(relu);
+        result1->set_friendly_name("result1");
+        auto concat = std::make_shared<ov::opset8::Concat>(OutputVector{relu, param2}, 1);
+        concat->set_friendly_name("concat_op");
+        concat->output(0).get_tensor().set_names({"concat"});
+        auto result2 = std::make_shared<ov::opset8::Result>(concat);
+        result2->set_friendly_name("result2");
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                      ngraph::ParameterVector{param1, param2});
+        function->set_friendly_name("SimpleReLU");
+    }
+    execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_EQ(function->inputs().size(), 2);
+    EXPECT_EQ(function->inputs().size(), execNet.inputs().size());
+    EXPECT_THROW(execNet.input(), ov::Exception);
+    EXPECT_EQ(function->input(0).get_tensor().get_names(), execNet.input(0).get_tensor().get_names());
+    EXPECT_EQ(function->input(0).get_tensor().get_partial_shape(), execNet.input(0).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->input(0).get_tensor().get_element_type(), execNet.input(0).get_tensor().get_element_type());
+    EXPECT_EQ(function->input(1).get_tensor().get_names(), execNet.input(1).get_tensor().get_names());
+    EXPECT_EQ(function->input(1).get_tensor().get_partial_shape(), execNet.input(1).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->input(1).get_tensor().get_element_type(), execNet.input(1).get_tensor().get_element_type());
+    EXPECT_EQ(function->input(0).get_node(), function->input("data1").get_node());
+    EXPECT_NE(function->input(1).get_node(), function->input("data1").get_node());
+    EXPECT_EQ(function->input(1).get_node(), function->input("data2").get_node());
+    EXPECT_NE(function->input(0).get_node(), function->input("data2").get_node());
+
+    ov::InferRequest request = execNet.create_infer_request();
+
+    ov::Tensor tensor1, tensor2;
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.input(0)));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input(0)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.input(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_input_tensor(0));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.input(1)));
+    try {
+        // To avoid case with remote tensors
+        tensor1.data();
+        EXPECT_FALSE(compareTensors(tensor1, tensor2));
+    } catch (const ov::Exception&) {
+    }
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input(1)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.input(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->input(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_input_tensor(1));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+}
+
+TEST_P(OVCompiledModelBaseTest, getOutputsFromFunctionWithSeveralOutputs) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    // Create simple function
+    {
+        auto param1 = std::make_shared<ov::opset8::Parameter>(ov::element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param1->set_friendly_name("param1");
+        param1->output(0).get_tensor().set_names({"data1"});
+        auto param2 = std::make_shared<ov::opset8::Parameter>(ov::element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param2->set_friendly_name("param2");
+        param2->output(0).get_tensor().set_names({"data2"});
+        auto relu = std::make_shared<ov::opset8::Relu>(param1);
+        relu->set_friendly_name("relu_op");
+        relu->output(0).get_tensor().set_names({"relu"});
+        auto result1 = std::make_shared<ov::opset8::Result>(relu);
+        result1->set_friendly_name("result1");
+        auto concat = std::make_shared<ov::opset8::Concat>(OutputVector{relu, param2}, 1);
+        concat->set_friendly_name("concat_op");
+        concat->output(0).get_tensor().set_names({"concat"});
+        auto result2 = std::make_shared<ov::opset8::Result>(concat);
+        result2->set_friendly_name("result2");
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                      ngraph::ParameterVector{param1, param2});
+        function->set_friendly_name("SimpleReLU");
+    }
+    execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_EQ(function->outputs().size(), 2);
+    EXPECT_EQ(function->outputs().size(), execNet.outputs().size());
+    EXPECT_THROW(execNet.output(), ov::Exception);
+    EXPECT_EQ(function->output(0).get_tensor().get_names(), execNet.output(0).get_tensor().get_names());
+    EXPECT_EQ(function->output(0).get_tensor().get_partial_shape(), execNet.output(0).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->output(0).get_tensor().get_element_type(), execNet.output(0).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(1).get_tensor().get_names(), execNet.output(1).get_tensor().get_names());
+    EXPECT_EQ(function->output(1).get_tensor().get_partial_shape(), execNet.output(1).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->output(1).get_tensor().get_element_type(), execNet.output(1).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(0).get_node(), function->output("relu").get_node());
+    EXPECT_NE(function->output(1).get_node(), function->output("relu").get_node());
+    EXPECT_EQ(function->output(1).get_node(), function->output("concat").get_node());
+    EXPECT_NE(function->output(0).get_node(), function->output("concat").get_node());
+
+    ov::InferRequest request = execNet.create_infer_request();
+
+    ov::Tensor tensor1, tensor2;
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.output(0)));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(0)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.output(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_output_tensor(0));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.output(1)));
+    try {
+        // To avoid case with remote tensors
+        tensor1.data();
+        EXPECT_FALSE(compareTensors(tensor1, tensor2));
+    } catch (const ov::Exception&) {
+    }
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(1)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.output(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_output_tensor(1));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+}
+
+TEST_P(OVCompiledModelBaseTest, getOutputsFromSplitFunctionWithSeveralOutputs) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    // Create simple function
+    {
+        auto param1 = std::make_shared<ov::opset8::Parameter>(ov::element::Type_t::f32, ngraph::Shape({1, 4, 24, 24}));
+        param1->set_friendly_name("param1");
+        param1->output(0).get_tensor().set_names({"data1"});
+        auto axis_node = ov::opset8::Constant::create(element::i64, Shape{}, {1});
+        auto split = std::make_shared<ov::opset8::Split>(param1, axis_node, 2);
+        split->set_friendly_name("split");
+        split->output(0).get_tensor().set_names({"tensor_split_1"});
+        split->output(1).get_tensor().set_names({"tensor_split_2"});
+        auto result1 = std::make_shared<ov::opset8::Result>(split->output(0));
+        result1->set_friendly_name("result1");
+        auto result2 = std::make_shared<ov::opset8::Result>(split->output(1));
+        result2->set_friendly_name("result2");
+        function =
+            std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2}, ngraph::ParameterVector{param1});
+        function->set_friendly_name("SingleSplit");
+    }
+    execNet = core->compile_model(function, target_device, configuration);
+    EXPECT_EQ(function->outputs().size(), 2);
+    EXPECT_EQ(function->outputs().size(), execNet.outputs().size());
+    EXPECT_THROW(execNet.output(), ov::Exception);
+    EXPECT_EQ(function->output(0).get_tensor().get_names(), execNet.output(0).get_tensor().get_names());
+    EXPECT_EQ(function->output(0).get_tensor().get_partial_shape(), execNet.output(0).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->output(0).get_tensor().get_element_type(), execNet.output(0).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(1).get_tensor().get_names(), execNet.output(1).get_tensor().get_names());
+    EXPECT_EQ(function->output(1).get_tensor().get_partial_shape(), execNet.output(1).get_tensor().get_partial_shape());
+    EXPECT_EQ(function->output(1).get_tensor().get_element_type(), execNet.output(1).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(0).get_node(), function->output("tensor_split_1").get_node());
+    EXPECT_NE(function->output(1).get_node(), function->output("tensor_split_1").get_node());
+    EXPECT_EQ(function->output(1).get_node(), function->output("tensor_split_2").get_node());
+    EXPECT_NE(function->output(0).get_node(), function->output("tensor_split_2").get_node());
+
+    ov::InferRequest request = execNet.create_infer_request();
+
+    ov::Tensor tensor1, tensor2;
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.output(0)));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(0)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.output(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(0).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_output_tensor(0));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor1 = request.get_tensor(execNet.output(1)));
+    try {
+        // To avoid case with remote tensors
+        tensor1.data();
+        EXPECT_FALSE(compareTensors(tensor1, tensor2));
+    } catch (const ov::Exception&) {
+    }
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(1)));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(execNet.output(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_tensor(function->output(1).get_any_name()));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+    EXPECT_NO_THROW(tensor2 = request.get_output_tensor(1));
+    EXPECT_TRUE(compareTensors(tensor1, tensor2));
+}
+
+// Load correct network to Plugin to get executable network
+TEST_P(OVCompiledModelBaseTest, precisionsAsInOriginalFunction) {
+    ov::CompiledModel execNet;
+    EXPECT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+
+    EXPECT_EQ(function->get_parameters().size(), execNet.inputs().size());
+    auto ref_parameter = function->get_parameters().back();
+    auto actual_parameter = execNet.inputs().back().get_node_shared_ptr();
+    EXPECT_EQ(ref_parameter->get_element_type(), actual_parameter->get_element_type());
+    EXPECT_EQ(ref_parameter->get_shape(), actual_parameter->get_shape());
+    EXPECT_EQ(ref_parameter->get_friendly_name(), actual_parameter->get_friendly_name());
+
+    EXPECT_EQ(function->get_results().size(), execNet.outputs().size());
+    auto ref_result = function->get_results().back();
+    auto actual_result = execNet.outputs().back().get_node_shared_ptr();
+    EXPECT_EQ(ref_result->get_element_type(), actual_result->get_element_type());
+    EXPECT_EQ(ref_result->get_shape(), actual_result->get_shape());
+    EXPECT_EQ(ref_result->get_friendly_name(), actual_result->get_friendly_name());
+}
+
+TEST_P(OVCompiledModelBaseTest, loadIncorrectV10Model) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    // Create simple function
+    {
+        auto param1 = std::make_shared<ov::opset8::Parameter>(element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param1->set_friendly_name("param1");
+        param1->output(0).get_tensor().set_names({"data1"});
+        auto relu = std::make_shared<ov::opset8::Relu>(param1);
+        relu->set_friendly_name("data1");
+        relu->output(0).get_tensor().set_names({"relu"});
+        auto result = std::make_shared<ov::opset8::Result>(relu);
+        result->set_friendly_name("result");
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{param1});
+        function->get_rt_info()["version"] = int64_t(10);
+        function->set_friendly_name("SimpleReLU");
+    }
+    EXPECT_THROW(core->compile_model(function, target_device, configuration), ov::Exception);
+}
+
+TEST_P(OVCompiledModelBaseTest, loadIncorrectV11Model) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::CompiledModel execNet;
+
+    // Create simple function
+    {
+        auto param1 = std::make_shared<ov::opset8::Parameter>(element::Type_t::f32, ngraph::Shape({1, 3, 24, 24}));
+        param1->set_friendly_name("param1");
+        param1->output(0).get_tensor().set_names({"data1"});
+        auto relu = std::make_shared<ov::opset8::Relu>(param1);
+        relu->set_friendly_name("data1");
+        relu->output(0).get_tensor().set_names({"relu"});
+        auto result = std::make_shared<ov::opset8::Result>(relu);
+        result->set_friendly_name("result");
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{param1});
+        function->get_rt_info()["version"] = int64_t(11);
+        function->set_friendly_name("SimpleReLU");
+    }
+    EXPECT_NO_THROW(core->compile_model(function, target_device, configuration));
+}
+
+TEST_P(OVAutoExecutableNetworkTest, AutoNotImplementedSetConfigToExecNet) {
+    std::map<std::string, ov::Any> config;
+    for (const auto& confItem : configuration) {
+        config.emplace(confItem.first, confItem.second);
+    }
+    auto execNet = core->compile_model(function, target_device, config);
+    EXPECT_ANY_THROW(execNet.set_property(config));
+}
+
+}  // namespace behavior
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp
index ee7ca6e06494ed..ac1409644529a2 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp
@@ -16,6 +16,8 @@ namespace ov {
 namespace test {
 namespace behavior {
 
+// ===================== DEPRECATED =====================
+
 class OVExecutableNetworkBaseTest : public testing::WithParamInterface<InferRequestParams>,
                                     public OVCompiledNetworkTestBase {
 public:
diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/get_metric.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/get_metric.cpp
index 0383ca086b3742..439ba034875812 100644
--- a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/get_metric.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/get_metric.cpp
@@ -172,6 +172,18 @@ TEST_P(OVClassExecutableNetworkSetConfigTest, SetConfigThrows) {
     ASSERT_THROW(compiled_model.set_property({{"unsupported_config", "some_value"}}), ov::Exception);
 }
 
+TEST_P(OVClassExecutableNetworkSetConfigTest, canNotSetConfigToCompiledModelWithIncorrectConfig) {
+    ov::Core ie = createCoreWithTemplate();
+
+    auto compiled_model = ie.compile_model(simpleNetwork, target_device);
+    std::map<std::string, std::string> incorrectConfig = {{"abc", "def"}};
+    std::map<std::string, ov::Any> config;
+    for (const auto& confItem : incorrectConfig) {
+        config.emplace(confItem.first, confItem.second);
+    }
+    EXPECT_ANY_THROW(compiled_model.set_property(config));
+}
+
 TEST_P(OVClassExecutableNetworkSupportedConfigTest, SupportedConfigWorks) {
     ov::Core ie = createCoreWithTemplate();
     ov::Any p;
@@ -182,6 +194,28 @@ TEST_P(OVClassExecutableNetworkSupportedConfigTest, SupportedConfigWorks) {
     ASSERT_EQ(p, configValue);
 }
 
+TEST_P(OVClassExecutableNetworkGetMetricTestForSpecificConfig, canSetConfigToCompiledModel) {
+    ov::Core ie = createCoreWithTemplate();
+    std::shared_ptr<ov::Model> function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(target_device);
+    auto execNet = ie.compile_model(function, target_device);
+    std::map<std::string, ov::Any> config;
+    config.emplace(configKey, configValue);
+    EXPECT_NO_THROW(execNet.set_property(config));
+}
+
+TEST_P(OVClassExecutableNetworkGetMetricTestForSpecificConfig, canSetConfigToCompiledModelGetConfigAndCheck) {
+    ov::Core ie = createCoreWithTemplate();
+    std::shared_ptr<ov::Model> function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(target_device);
+    auto execNet = ie.compile_model(simpleNetwork, target_device);
+    std::map<std::string, ov::Any> config;
+    config.emplace(configKey, configValue);
+    execNet.set_property(config);
+    ov::Any param;
+    EXPECT_NO_THROW(param = execNet.get_property(configKey));
+    EXPECT_FALSE(param.empty());
+    EXPECT_EQ(param, configValue);
+}
+
 TEST_P(OVClassExecutableNetworkUnsupportedConfigTest, UnsupportedConfigThrows) {
     ov::Core ie = createCoreWithTemplate();
 

From 8c40bfd9c74cdcbd442a225d77a92413c183ead9 Mon Sep 17 00:00:00 2001
From: Daria Mityagina <daria.mityagina@intel.com>
Date: Fri, 7 Apr 2023 13:25:05 +0100
Subject: [PATCH 293/296] detected vulnerability with shared_ptr (#16791)

---
 src/inference/src/os/win/win_system_conf.cpp | 2 +-
 src/inference/tests/unit/cpu_map_parser.cpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp
index bbf8181bc57e43..f2add2736da529 100644
--- a/src/inference/src/os/win/win_system_conf.cpp
+++ b/src/inference/src/os/win/win_system_conf.cpp
@@ -23,7 +23,7 @@ void CPU::init_cpu(CPU& cpu) {
         return;
     }
 
-    std::shared_ptr<char> base_shared_ptr(new char[len]);
+    std::unique_ptr<char[]> base_shared_ptr(new char[len]);
     char* base_ptr = base_shared_ptr.get();
     if (!GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)base_ptr, &len)) {
         return;
diff --git a/src/inference/tests/unit/cpu_map_parser.cpp b/src/inference/tests/unit/cpu_map_parser.cpp
index fc2d08b589c948..514899b32146bb 100644
--- a/src/inference/tests/unit/cpu_map_parser.cpp
+++ b/src/inference/tests/unit/cpu_map_parser.cpp
@@ -618,7 +618,7 @@ class WinCpuMapParserTests : public CommonTestUtils::TestsCommon,
         const char* test_ptr = (char*)test_data.system_info.c_str();
         std::size_t test_len = test_data.system_info.length();
 
-        std::shared_ptr<char> test_info(new char[test_len / 2]);
+        std::unique_ptr<char[]> test_info(new char[test_len / 2]);
         char* test_info_ptr = test_info.get();
 
         Hex2Bin(test_ptr, test_len, test_info_ptr);

From 769353df007d3ede0ac38aaf93db651a2943da03 Mon Sep 17 00:00:00 2001
From: yanlan song <bell.song@intel.com>
Date: Fri, 7 Apr 2023 20:44:36 +0800
Subject: [PATCH 294/296] Support dynamic output models with all possible
 devices instead of CPU only (#15594)

* with dynamic output models, do not use intermediate IE blobs

Signed-off-by: fishbell <bell.song@intel.com>

* enable tests

Signed-off-by: fishbell <bell.song@intel.com>

* add some log/comment

Signed-off-by: fishbell <bell.song@intel.com>

* refine and enable tests

Signed-off-by: fishbell <bell.song@intel.com>

* change implementation

Signed-off-by: fishbell <bell.song@intel.com>

* fix issue with 1.0API

Signed-off-by: fishbell <bell.song@intel.com>

* enable unit test

Signed-off-by: fishbell <bell.song@intel.com>

* integrate test with folder change

Signed-off-by: fishbell <bell.song@intel.com>

* clean up cmake

Signed-off-by: fishbell <bell.song@intel.com>

* fix warnings

Signed-off-by: fishbell <bell.song@intel.com>

* fix conflict with master

Signed-off-by: fishbell <bell.song@intel.com>

* optimize common mock infer request

Signed-off-by: fishbell <bell.song@intel.com>

* rebase with master

Signed-off-by: fishbell <bell.song@intel.com>

* resolve merge conflict

Signed-off-by: fishbell <bell.song@intel.com>

---------

Signed-off-by: fishbell <bell.song@intel.com>
---
 src/plugins/auto/CMakeLists.txt               |  14 +-
 src/plugins/auto/README.md                    |   3 +-
 .../auto/{ => src}/async_infer_request.cpp    |   0
 .../auto/{ => src}/async_infer_request.hpp    |   0
 .../{ => src}/auto_executable_network.cpp     |   0
 .../{ => src}/auto_executable_network.hpp     |   0
 src/plugins/auto/{ => src}/auto_schedule.cpp  |   0
 src/plugins/auto/{ => src}/auto_schedule.hpp  |   0
 .../auto/{ => src}/bind_multi_schedule.cpp    |   0
 .../auto/{ => src}/bind_multi_schedule.hpp    |   0
 src/plugins/auto/{ => src}/common.hpp         |   0
 .../auto/{ => src}/executable_network.cpp     |   0
 .../auto/{ => src}/executable_network.hpp     |   0
 src/plugins/auto/{ => src}/infer_request.cpp  |  19 +++
 src/plugins/auto/{ => src}/infer_request.hpp  |   2 +
 src/plugins/auto/{ => src}/itt.hpp            |   0
 .../{ => src}/multi_executable_network.cpp    |   0
 .../{ => src}/multi_executable_network.hpp    |   0
 src/plugins/auto/{ => src}/multi_schedule.cpp |   0
 src/plugins/auto/{ => src}/multi_schedule.hpp |   0
 src/plugins/auto/{ => src}/plugin.cpp         |  19 ++-
 src/plugins/auto/{ => src}/plugin.hpp         |   2 +-
 src/plugins/auto/{ => src}/plugin_config.cpp  |   2 +-
 .../auto/{utils => src}/plugin_config.hpp     |   2 +-
 src/plugins/auto/{ => src}/schedule.hpp       |   0
 src/plugins/auto/{ => src}/utils/log.cpp      |   0
 src/plugins/auto/{ => src}/utils/log.hpp      |   0
 src/plugins/auto/{ => src}/utils/log_util.hpp |   2 +-
 .../auto/{ => src}/utils/non_copyable.hpp     |   0
 .../auto/{ => src}/utils/singleton.hpp        |   0
 .../auto/{ => src}/utils/time_utils.cpp       |   0
 .../auto/{ => src}/utils/time_utils.hpp       |   0
 .../auto/tests}/CMakeLists.txt                |   6 +-
 .../auto/tests/unit}/CMakeLists.txt           |   8 +-
 .../unit/auto_async_infer_request_test.cpp    | 118 +++++++++++++++
 .../tests/unit}/auto_ctput_call_multi.cpp     |  10 +-
 .../auto/tests/unit}/auto_ctput_test.cpp      |  10 +-
 .../unit}/auto_default_perf_hint_test.cpp     |  12 +-
 .../tests/unit/auto_dynamic_output_test.cpp   | 105 +++++++++++++
 .../unit/auto_infer_request_test_base.cpp     | 105 +++++++++++++
 .../auto_load_network_properties_test.cpp     |  12 +-
 .../tests/unit}/auto_release_helper_test.cpp  |   6 +-
 .../unit}/auto_runtime_fallback_test.cpp      |   9 +-
 .../unit}/auto_select_device_failed_test.cpp  |   8 +-
 .../auto_startup_fallback_properties_test.cpp |   6 +-
 .../tests/unit}/exec_network_get_metrics.cpp  |   8 +-
 .../auto/tests/unit}/get_device_list.cpp      |   9 +-
 .../include/auto_infer_request_test_base.hpp  | 138 ++++++++++++++++++
 .../unit/include}/mock_auto_device_plugin.hpp |   0
 .../auto/tests/unit/include}/mock_common.hpp  |   0
 .../tests/unit/include}/mock_log_utils.hpp    |   0
 .../tests/unit}/key_network_priority_test.cpp |  37 ++---
 .../tests/unit}/log_utils_format_test.cpp     |   0
 .../auto/tests/unit}/log_utils_test.cpp       |   2 +-
 .../auto/tests/unit}/mock_common.cpp          |   2 +-
 .../tests/unit}/parse_meta_device_test.cpp    |  33 ++---
 .../auto/tests/unit}/select_device_test.cpp   |  27 ++--
 src/tests/CMakeLists.txt                      |   1 -
 .../unit/auto/plugin/mock_infer_request.hpp   |  40 -----
 .../plugin/mock_load_network_properties.hpp   |  32 ----
 60 files changed, 610 insertions(+), 199 deletions(-)
 rename src/plugins/auto/{ => src}/async_infer_request.cpp (100%)
 rename src/plugins/auto/{ => src}/async_infer_request.hpp (100%)
 rename src/plugins/auto/{ => src}/auto_executable_network.cpp (100%)
 rename src/plugins/auto/{ => src}/auto_executable_network.hpp (100%)
 rename src/plugins/auto/{ => src}/auto_schedule.cpp (100%)
 rename src/plugins/auto/{ => src}/auto_schedule.hpp (100%)
 rename src/plugins/auto/{ => src}/bind_multi_schedule.cpp (100%)
 rename src/plugins/auto/{ => src}/bind_multi_schedule.hpp (100%)
 rename src/plugins/auto/{ => src}/common.hpp (100%)
 rename src/plugins/auto/{ => src}/executable_network.cpp (100%)
 rename src/plugins/auto/{ => src}/executable_network.hpp (100%)
 rename src/plugins/auto/{ => src}/infer_request.cpp (84%)
 rename src/plugins/auto/{ => src}/infer_request.hpp (94%)
 rename src/plugins/auto/{ => src}/itt.hpp (100%)
 rename src/plugins/auto/{ => src}/multi_executable_network.cpp (100%)
 rename src/plugins/auto/{ => src}/multi_executable_network.hpp (100%)
 rename src/plugins/auto/{ => src}/multi_schedule.cpp (100%)
 rename src/plugins/auto/{ => src}/multi_schedule.hpp (100%)
 rename src/plugins/auto/{ => src}/plugin.cpp (98%)
 rename src/plugins/auto/{ => src}/plugin.hpp (99%)
 rename src/plugins/auto/{ => src}/plugin_config.cpp (99%)
 rename src/plugins/auto/{utils => src}/plugin_config.hpp (99%)
 rename src/plugins/auto/{ => src}/schedule.hpp (100%)
 rename src/plugins/auto/{ => src}/utils/log.cpp (100%)
 rename src/plugins/auto/{ => src}/utils/log.hpp (100%)
 rename src/plugins/auto/{ => src}/utils/log_util.hpp (98%)
 rename src/plugins/auto/{ => src}/utils/non_copyable.hpp (100%)
 rename src/plugins/auto/{ => src}/utils/singleton.hpp (100%)
 rename src/plugins/auto/{ => src}/utils/time_utils.cpp (100%)
 rename src/plugins/auto/{ => src}/utils/time_utils.hpp (100%)
 rename src/{tests/unit => plugins/auto/tests}/CMakeLists.txt (84%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/CMakeLists.txt (76%)
 create mode 100644 src/plugins/auto/tests/unit/auto_async_infer_request_test.cpp
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_ctput_call_multi.cpp (96%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_ctput_test.cpp (96%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_default_perf_hint_test.cpp (98%)
 create mode 100644 src/plugins/auto/tests/unit/auto_dynamic_output_test.cpp
 create mode 100644 src/plugins/auto/tests/unit/auto_infer_request_test_base.cpp
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_load_network_properties_test.cpp (97%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_release_helper_test.cpp (98%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_runtime_fallback_test.cpp (98%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_select_device_failed_test.cpp (98%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/auto_startup_fallback_properties_test.cpp (96%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/exec_network_get_metrics.cpp (98%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/get_device_list.cpp (94%)
 create mode 100644 src/plugins/auto/tests/unit/include/auto_infer_request_test_base.hpp
 rename src/{tests/unit/auto/plugin => plugins/auto/tests/unit/include}/mock_auto_device_plugin.hpp (100%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit/include}/mock_common.hpp (100%)
 rename src/{tests/unit/auto/plugin => plugins/auto/tests/unit/include}/mock_log_utils.hpp (100%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/key_network_priority_test.cpp (94%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/log_utils_format_test.cpp (100%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/log_utils_test.cpp (98%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/mock_common.cpp (92%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/parse_meta_device_test.cpp (94%)
 rename src/{tests/unit/auto => plugins/auto/tests/unit}/select_device_test.cpp (93%)
 delete mode 100644 src/tests/unit/auto/plugin/mock_infer_request.hpp
 delete mode 100644 src/tests/unit/auto/plugin/mock_load_network_properties.hpp

diff --git a/src/plugins/auto/CMakeLists.txt b/src/plugins/auto/CMakeLists.txt
index ed24e998a5f421..25e6b8b358d578 100644
--- a/src/plugins/auto/CMakeLists.txt
+++ b/src/plugins/auto/CMakeLists.txt
@@ -9,14 +9,14 @@ endif()
 
 set (TARGET_NAME "openvino_auto_plugin")
 
-file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/utils/*.cpp)
+file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/*.cpp)
 
 if(ENABLE_AUTO AND ENABLE_MULTI)
     ie_add_plugin(NAME ${TARGET_NAME}
                   DEVICE_NAME "MULTI"
                   PSEUDO_DEVICE
                   SOURCES ${SOURCES}
-                  VERSION_DEFINES_FOR plugin.cpp)
+                  VERSION_DEFINES_FOR src/plugin.cpp)
 
     ie_add_plugin(NAME ${TARGET_NAME}
                   DEVICE_NAME "AUTO"
@@ -27,13 +27,17 @@ elseif(ENABLE_AUTO)
                   DEVICE_NAME "AUTO"
                   PSEUDO_DEVICE
                   SOURCES ${SOURCES}
-                  VERSION_DEFINES_FOR plugin.cpp)
+                  VERSION_DEFINES_FOR src/plugin.cpp)
 elseif(ENABLE_MULTI)
     ie_add_plugin(NAME ${TARGET_NAME}
                   DEVICE_NAME "MULTI"
                   PSEUDO_DEVICE
                   SOURCES ${SOURCES}
-                  VERSION_DEFINES_FOR plugin.cpp)
+                  VERSION_DEFINES_FOR src/plugin.cpp)
+endif()
+
+if(ENABLE_TESTS)
+    add_subdirectory(tests)
 endif()
 
 set_ie_threading_interface_for(${TARGET_NAME})
@@ -41,4 +45,4 @@ set_ie_threading_interface_for(${TARGET_NAME})
 # must be called after all target_link_libraries
 ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 
-set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
+set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
\ No newline at end of file
diff --git a/src/plugins/auto/README.md b/src/plugins/auto/README.md
index eae0fa2de31303..2ac7a8883cd065 100644
--- a/src/plugins/auto/README.md
+++ b/src/plugins/auto/README.md
@@ -17,7 +17,8 @@ In case of any questions, review and merge requests, contact the [AUTO Plugin ma
 
 The AUTO plugin follows the OpenVINO™ plugin architecture and consists of several main components:
  * [docs](./docs) contains developer documentation for the AUTO plugin.
- * [current](./) folder contains sources of the AUTO plugin.
+ * [src](./src/) - folder contains sources of the AUTO plugin.
+ * [tests](./tests/) - tests for Auto Plugin components.
 
 Learn more in the [OpenVINO™ Plugin Developer Guide](https://docs.openvino.ai/latest/openvino_docs_ie_plugin_dg_overview.html).
 
diff --git a/src/plugins/auto/async_infer_request.cpp b/src/plugins/auto/src/async_infer_request.cpp
similarity index 100%
rename from src/plugins/auto/async_infer_request.cpp
rename to src/plugins/auto/src/async_infer_request.cpp
diff --git a/src/plugins/auto/async_infer_request.hpp b/src/plugins/auto/src/async_infer_request.hpp
similarity index 100%
rename from src/plugins/auto/async_infer_request.hpp
rename to src/plugins/auto/src/async_infer_request.hpp
diff --git a/src/plugins/auto/auto_executable_network.cpp b/src/plugins/auto/src/auto_executable_network.cpp
similarity index 100%
rename from src/plugins/auto/auto_executable_network.cpp
rename to src/plugins/auto/src/auto_executable_network.cpp
diff --git a/src/plugins/auto/auto_executable_network.hpp b/src/plugins/auto/src/auto_executable_network.hpp
similarity index 100%
rename from src/plugins/auto/auto_executable_network.hpp
rename to src/plugins/auto/src/auto_executable_network.hpp
diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/src/auto_schedule.cpp
similarity index 100%
rename from src/plugins/auto/auto_schedule.cpp
rename to src/plugins/auto/src/auto_schedule.cpp
diff --git a/src/plugins/auto/auto_schedule.hpp b/src/plugins/auto/src/auto_schedule.hpp
similarity index 100%
rename from src/plugins/auto/auto_schedule.hpp
rename to src/plugins/auto/src/auto_schedule.hpp
diff --git a/src/plugins/auto/bind_multi_schedule.cpp b/src/plugins/auto/src/bind_multi_schedule.cpp
similarity index 100%
rename from src/plugins/auto/bind_multi_schedule.cpp
rename to src/plugins/auto/src/bind_multi_schedule.cpp
diff --git a/src/plugins/auto/bind_multi_schedule.hpp b/src/plugins/auto/src/bind_multi_schedule.hpp
similarity index 100%
rename from src/plugins/auto/bind_multi_schedule.hpp
rename to src/plugins/auto/src/bind_multi_schedule.hpp
diff --git a/src/plugins/auto/common.hpp b/src/plugins/auto/src/common.hpp
similarity index 100%
rename from src/plugins/auto/common.hpp
rename to src/plugins/auto/src/common.hpp
diff --git a/src/plugins/auto/executable_network.cpp b/src/plugins/auto/src/executable_network.cpp
similarity index 100%
rename from src/plugins/auto/executable_network.cpp
rename to src/plugins/auto/src/executable_network.cpp
diff --git a/src/plugins/auto/executable_network.hpp b/src/plugins/auto/src/executable_network.hpp
similarity index 100%
rename from src/plugins/auto/executable_network.hpp
rename to src/plugins/auto/src/executable_network.hpp
diff --git a/src/plugins/auto/infer_request.cpp b/src/plugins/auto/src/infer_request.cpp
similarity index 84%
rename from src/plugins/auto/infer_request.cpp
rename to src/plugins/auto/src/infer_request.cpp
index 2453ce60c52881..be047f02ba190f 100644
--- a/src/plugins/auto/infer_request.cpp
+++ b/src/plugins/auto/src/infer_request.cpp
@@ -5,9 +5,12 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "infer_request.hpp"
+#include <ngraph/node.hpp>
+#include <transformations/utils/utils.hpp>
 #include <ie_input_info.hpp>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 #include <blob_factory.hpp>
+#include <debug.h>
 
 namespace MultiDevicePlugin {
 
@@ -20,6 +23,12 @@ MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_p
                                                  InferenceEngine::RemoteContext::Ptr ctx)
         : IInferRequestInternal(inputs, outputs),
           _sharedRequest(request_to_share_blobs_with)  {
+    for (const std::shared_ptr<const ov::Node>& in : inputs) {
+        modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output<const ngraph::Node>(in))] = in;
+    }
+    for (const std::shared_ptr<const ov::Node>& out : outputs) {
+        modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out;
+    }
     CreateInferRequest(request_to_share_blobs_with, ctx);
 }
 
@@ -57,6 +66,16 @@ void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInfer
         auto l = it.second->getLayout();
         auto p = it.second->getPrecision();
         auto dims = it.second->getTensorDesc().getDims();
+        // for 1.0 API, dims is not dynamic anyway
+        if (InferenceEngine::details::product(dims) == 0 && !modelOutputsMap.empty()) {
+            // replace the dims with one from dynamic shape
+            const auto outputNodeItr = modelOutputsMap.find(it.first);
+            if (outputNodeItr != modelOutputsMap.end()) {
+                const auto shape = outputNodeItr->second->get_input_partial_shape(0);
+                // update dims
+                dims = shape.get_max_shape();
+            }
+        }
 
         TensorDesc desc = TensorDesc(p, dims, l);
         if (ctx) {
diff --git a/src/plugins/auto/infer_request.hpp b/src/plugins/auto/src/infer_request.hpp
similarity index 94%
rename from src/plugins/auto/infer_request.hpp
rename to src/plugins/auto/src/infer_request.hpp
index d540fd686ba8c6..181aedcc97a0a4 100644
--- a/src/plugins/auto/infer_request.hpp
+++ b/src/plugins/auto/src/infer_request.hpp
@@ -57,6 +57,8 @@ class MultiDeviceInferRequest : public InferenceEngine::IInferRequestInternal {
     void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
                             InferenceEngine::RemoteContext::Ptr ctx);
     InferenceEngine::SoIInferRequestInternal _sharedRequest;
+    std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelInputsMap;
+    std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelOutputsMap;
 };
 
 }  // namespace MultiDevicePlugin
diff --git a/src/plugins/auto/itt.hpp b/src/plugins/auto/src/itt.hpp
similarity index 100%
rename from src/plugins/auto/itt.hpp
rename to src/plugins/auto/src/itt.hpp
diff --git a/src/plugins/auto/multi_executable_network.cpp b/src/plugins/auto/src/multi_executable_network.cpp
similarity index 100%
rename from src/plugins/auto/multi_executable_network.cpp
rename to src/plugins/auto/src/multi_executable_network.cpp
diff --git a/src/plugins/auto/multi_executable_network.hpp b/src/plugins/auto/src/multi_executable_network.hpp
similarity index 100%
rename from src/plugins/auto/multi_executable_network.hpp
rename to src/plugins/auto/src/multi_executable_network.hpp
diff --git a/src/plugins/auto/multi_schedule.cpp b/src/plugins/auto/src/multi_schedule.cpp
similarity index 100%
rename from src/plugins/auto/multi_schedule.cpp
rename to src/plugins/auto/src/multi_schedule.cpp
diff --git a/src/plugins/auto/multi_schedule.hpp b/src/plugins/auto/src/multi_schedule.hpp
similarity index 100%
rename from src/plugins/auto/multi_schedule.hpp
rename to src/plugins/auto/src/multi_schedule.hpp
diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/src/plugin.cpp
similarity index 98%
rename from src/plugins/auto/plugin.cpp
rename to src/plugins/auto/src/plugin.cpp
index 531af4ef31f346..096e16ab99abd3 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/src/plugin.cpp
@@ -847,6 +847,23 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::FilterDeviceByNetwork
         return false;
     };
 
+    auto isOutputDynamic = [&]() {
+        for (size_t i = 0; i < model->inputs().size() ; i++) {
+            if (model->input(i).get_partial_shape().is_dynamic()) {
+            // any input is dynamic
+                return false;
+            }
+        }
+        for (size_t i = 0; i < model->outputs().size() ; i++) {
+            if (model->output(i).get_partial_shape().is_dynamic()) {
+            // any output is dynamic
+                LOG_INFO_TAG("dynamic output model");
+                return true;
+            }
+        }
+        return false;
+    };
+
     // Check if CPU is in candidate list
     auto cpuiter = std::find_if(metaDevices.begin(), metaDevices.end(), [](const DeviceInformation& deviceInfo) {
         return deviceInfo.deviceName.find("CPU") != std::string::npos;
@@ -855,7 +872,7 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::FilterDeviceByNetwork
     // If CPU is in candidate list, load dynamic network to CPU first
     // For MULTI do not only load stateful network to CPU
     // For AUTO CTPUT only load stateful network to CPU
-    if ((model->is_dynamic() || (isStateful() && _LogTag != "MULTI")) && cpuiter != metaDevices.end()) {
+    if (((model->is_dynamic() && !isOutputDynamic()) || (isStateful() && _LogTag != "MULTI")) && cpuiter != metaDevices.end()) {
         filterDevice.push_back(*cpuiter);
         return filterDevice;
     }
diff --git a/src/plugins/auto/plugin.hpp b/src/plugins/auto/src/plugin.hpp
similarity index 99%
rename from src/plugins/auto/plugin.hpp
rename to src/plugins/auto/src/plugin.hpp
index cf796e977455c4..26ff842d51a6a9 100644
--- a/src/plugins/auto/plugin.hpp
+++ b/src/plugins/auto/src/plugin.hpp
@@ -14,7 +14,7 @@
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include "utils/log_util.hpp"
 #include "common.hpp"
-#include "utils/plugin_config.hpp"
+#include "plugin_config.hpp"
 
 #ifdef  MULTIUNITTEST
 #define MOCKTESTMACRO virtual
diff --git a/src/plugins/auto/plugin_config.cpp b/src/plugins/auto/src/plugin_config.cpp
similarity index 99%
rename from src/plugins/auto/plugin_config.cpp
rename to src/plugins/auto/src/plugin_config.cpp
index d9a819aee49e14..cc305a7b5e638a 100644
--- a/src/plugins/auto/plugin_config.cpp
+++ b/src/plugins/auto/src/plugin_config.cpp
@@ -1,7 +1,7 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "utils/plugin_config.hpp"
+#include "plugin_config.hpp"
 
 namespace MultiDevicePlugin {
 // AUTO will enable the blocklist if
diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/src/plugin_config.hpp
similarity index 99%
rename from src/plugins/auto/utils/plugin_config.hpp
rename to src/plugins/auto/src/plugin_config.hpp
index 30ab08957969ea..762c0b77e90c2d 100644
--- a/src/plugins/auto/utils/plugin_config.hpp
+++ b/src/plugins/auto/src/plugin_config.hpp
@@ -9,7 +9,7 @@
 #include "ie_performance_hints.hpp"
 #include "ie_icore.hpp"
 #include "openvino/runtime/auto/properties.hpp"
-#include "log.hpp"
+#include "utils/log.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include <string>
 #include <map>
diff --git a/src/plugins/auto/schedule.hpp b/src/plugins/auto/src/schedule.hpp
similarity index 100%
rename from src/plugins/auto/schedule.hpp
rename to src/plugins/auto/src/schedule.hpp
diff --git a/src/plugins/auto/utils/log.cpp b/src/plugins/auto/src/utils/log.cpp
similarity index 100%
rename from src/plugins/auto/utils/log.cpp
rename to src/plugins/auto/src/utils/log.cpp
diff --git a/src/plugins/auto/utils/log.hpp b/src/plugins/auto/src/utils/log.hpp
similarity index 100%
rename from src/plugins/auto/utils/log.hpp
rename to src/plugins/auto/src/utils/log.hpp
diff --git a/src/plugins/auto/utils/log_util.hpp b/src/plugins/auto/src/utils/log_util.hpp
similarity index 98%
rename from src/plugins/auto/utils/log_util.hpp
rename to src/plugins/auto/src/utils/log_util.hpp
index 220e55d499e478..cf7f03b92c31e0 100644
--- a/src/plugins/auto/utils/log_util.hpp
+++ b/src/plugins/auto/src/utils/log_util.hpp
@@ -12,7 +12,7 @@
 #include <ie_plugin_config.hpp>
 
 #ifdef  MULTIUNITTEST
-#include "plugin/mock_log_utils.hpp"
+#include "include/mock_log_utils.hpp"
 #define MOCKTESTMACRO virtual
 #define MultiDevicePlugin MockMultiDevicePlugin
 #define HLogger MockMultiDevice::MockLog::GetInstance()
diff --git a/src/plugins/auto/utils/non_copyable.hpp b/src/plugins/auto/src/utils/non_copyable.hpp
similarity index 100%
rename from src/plugins/auto/utils/non_copyable.hpp
rename to src/plugins/auto/src/utils/non_copyable.hpp
diff --git a/src/plugins/auto/utils/singleton.hpp b/src/plugins/auto/src/utils/singleton.hpp
similarity index 100%
rename from src/plugins/auto/utils/singleton.hpp
rename to src/plugins/auto/src/utils/singleton.hpp
diff --git a/src/plugins/auto/utils/time_utils.cpp b/src/plugins/auto/src/utils/time_utils.cpp
similarity index 100%
rename from src/plugins/auto/utils/time_utils.cpp
rename to src/plugins/auto/src/utils/time_utils.cpp
diff --git a/src/plugins/auto/utils/time_utils.hpp b/src/plugins/auto/src/utils/time_utils.hpp
similarity index 100%
rename from src/plugins/auto/utils/time_utils.hpp
rename to src/plugins/auto/src/utils/time_utils.hpp
diff --git a/src/tests/unit/CMakeLists.txt b/src/plugins/auto/tests/CMakeLists.txt
similarity index 84%
rename from src/tests/unit/CMakeLists.txt
rename to src/plugins/auto/tests/CMakeLists.txt
index 607ceb16ea2560..c9273285747381 100644
--- a/src/tests/unit/CMakeLists.txt
+++ b/src/plugins/auto/tests/CMakeLists.txt
@@ -1,15 +1,11 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-
 # RPATH is always enabled for unit tests
 set(CMAKE_SKIP_RPATH OFF)
-
 # because unit tests use plugins object files compiled with LTO
 if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0)
     set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 endif()
 
-if (ENABLE_AUTO OR ENABLE_MULTI)
-    add_subdirectory(auto)
-endif()
+add_subdirectory(unit)
\ No newline at end of file
diff --git a/src/tests/unit/auto/CMakeLists.txt b/src/plugins/auto/tests/unit/CMakeLists.txt
similarity index 76%
rename from src/tests/unit/auto/CMakeLists.txt
rename to src/plugins/auto/tests/unit/CMakeLists.txt
index 8173c30a0287bf..857d7c82d6e425 100644
--- a/src/tests/unit/auto/CMakeLists.txt
+++ b/src/plugins/auto/tests/unit/CMakeLists.txt
@@ -10,13 +10,11 @@ add_definitions(-DMULTIUNITTEST)
 addIeTargetTest(
         NAME ${TARGET_NAME}
         ROOT ${CMAKE_CURRENT_SOURCE_DIR}
-        ADDITIONAL_SOURCE_DIRS ${OpenVINO_SOURCE_DIR}/src/plugins/auto ${OpenVINO_SOURCE_DIR}/src/plugins/auto/utils
+        ADDITIONAL_SOURCE_DIRS ${OpenVINO_SOURCE_DIR}/src/plugins/auto/src ${OpenVINO_SOURCE_DIR}/src/plugins/auto/src/utils
         INCLUDES
-            ${OpenVINO_SOURCE_DIR}/src/plugins/auto ${CMAKE_CURRENT_SOURCE_DIR}
+            ${OpenVINO_SOURCE_DIR}/src/plugins/auto ${CMAKE_CURRENT_SOURCE_DIR} ${OpenVINO_SOURCE_DIR}/src/plugins/auto/src
         LINK_LIBRARIES
             ngraphFunctions
-            openvino::runtime
-            openvino::runtime::dev
             unitTestUtils
         ADD_CPPLINT
         DEPENDENCIES
@@ -27,6 +25,6 @@ addIeTargetTest(
             Multi
             Auto
 )
-ov_add_version_defines(${OpenVINO_SOURCE_DIR}/src/plugins/auto/plugin.cpp ${TARGET_NAME})
+ov_add_version_defines(${OpenVINO_SOURCE_DIR}/src/plugins/auto/src/plugin.cpp ${TARGET_NAME})
 
 set_ie_threading_interface_for(${TARGET_NAME})
diff --git a/src/plugins/auto/tests/unit/auto_async_infer_request_test.cpp b/src/plugins/auto/tests/unit/auto_async_infer_request_test.cpp
new file mode 100644
index 00000000000000..0d78b4f25cbe14
--- /dev/null
+++ b/src/plugins/auto/tests/unit/auto_async_infer_request_test.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/auto_infer_request_test_base.hpp"
+
+std::string AsyncInferenceTest::getTestCaseName(testing::TestParamInfo<AsyncInferRequestTestParams> obj) {
+    bool isCpuFast;
+    bool isSingleDevice;
+    std::tie(isCpuFast, isSingleDevice) = obj.param;
+    std::ostringstream result;
+    result << "_isCPUFaster_" << isCpuFast << "_isSingleDevice" << isSingleDevice;
+    return result.str();
+}
+
+void AsyncInferenceTest::SetUp() {
+        std::tie(isCpuFaster, isSingleDevice) = GetParam();
+        ON_CALL(*core.get(), isNewAPI()).WillByDefault(Return(true));
+        if (isSingleDevice) {
+            std::vector<std::string>  testDevs = {CommonTestUtils::DEVICE_GPU};
+            ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(testDevs));
+        } else if (isCpuFaster) {
+             ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(Not(HasSubstr(CommonTestUtils::DEVICE_CPU))),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+                    return mockExeNetwork; }));
+            ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_CPU)),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
+        } else {
+            ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_CPU)),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+                    return mockExeNetwork; }));
+            ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(Not(HasSubstr(CommonTestUtils::DEVICE_CPU))),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
+        }
+        taskExecutor = std::make_shared<DeferedExecutor>();
+        // replace core with mock Icore
+        plugin->SetCore(core);
+        makeAsyncRequest();
+        exeNetwork = plugin->LoadNetwork(cnnNet, {});
+        auto_request = exeNetwork->CreateInferRequest();
+}
+
+void AsyncInferenceTest::TearDown() {
+}
+
+TEST_P(AsyncInferenceTest, returnRequestBusyOnStartAsync) {
+    for (size_t i = 0; i < request_num_pool ; i++)
+        ON_CALL(*inferReqInternal[i], InferImpl()).WillByDefault(Return());
+    ASSERT_NO_THROW(auto_request->StartAsync());
+    ASSERT_THROW(auto_request->StartAsync(), RequestBusy);
+    std::dynamic_pointer_cast<DeferedExecutor>(taskExecutor)->executeAll();
+}
+
+TEST_P(AsyncInferenceTest, canInferIfStartAsyncSuccess) {
+    auto_request->SetCallback([&](std::exception_ptr exceptionPtr_) {
+        auto exceptionPtr = exceptionPtr_;
+        ASSERT_EQ(exceptionPtr, nullptr);
+    });
+    auto_request->StartAsync();
+    std::dynamic_pointer_cast<DeferedExecutor>(taskExecutor)->executeAll();
+    ASSERT_NO_THROW(auto_request->Wait(InferRequest::WaitMode::RESULT_READY));
+}
+
+TEST_P(AsyncInferenceTest, canRethrowIfStartAsyncFails) {
+    // create additional resource for infer fail restart
+    for (size_t i = 0; i < request_num_pool; i++)
+        ON_CALL(*inferReqInternal[i], InferImpl()).WillByDefault(Throw(InferenceEngine::GeneralError("runtime error")));
+    auto_request = exeNetwork->CreateInferRequest();
+    auto_request->SetCallback([&](std::exception_ptr exceptionPtr_) {
+        auto exceptionPtr = exceptionPtr_;
+        ASSERT_NE(exceptionPtr, nullptr);
+    });
+    auto_request->StartAsync();
+    std::dynamic_pointer_cast<DeferedExecutor>(taskExecutor)->executeAll();
+    EXPECT_THROW(auto_request->Wait(InferRequest::WaitMode::RESULT_READY), std::exception);
+}
+
+TEST_P(AsyncInferenceTest, canStart2AsyncInferRequests) {
+    auto another_auto_request = exeNetwork->CreateInferRequest();
+    auto_request->SetCallback([&](std::exception_ptr exceptionPtr_) {
+        auto exceptionPtr = exceptionPtr_;
+        ASSERT_EQ(exceptionPtr, nullptr);
+    });
+    another_auto_request->SetCallback([&](std::exception_ptr exceptionPtr_) {
+        auto exceptionPtr = exceptionPtr_;
+        ASSERT_EQ(exceptionPtr, nullptr);
+    });
+    auto_request->StartAsync();
+    another_auto_request->StartAsync();
+    std::dynamic_pointer_cast<DeferedExecutor>(taskExecutor)->executeAll();
+    ASSERT_NO_THROW(auto_request->Wait(InferRequest::WaitMode::RESULT_READY));
+    ASSERT_NO_THROW(another_auto_request->Wait(InferRequest::WaitMode::RESULT_READY));
+}
+
+TEST_P(AsyncInferenceTest, returnRequestBusyOnGetPerformanceCounts) {
+    for (size_t i = 0; i < request_num_pool ; i++)
+        ON_CALL(*inferReqInternal[i], InferImpl()).WillByDefault(Return());
+    ASSERT_NO_THROW(auto_request->StartAsync());
+    ASSERT_THROW(auto_request->GetPerformanceCounts(), RequestBusy);
+    std::dynamic_pointer_cast<DeferedExecutor>(taskExecutor)->executeAll();
+}
+
+const std::vector<AsyncInferRequestTestParams> testConfigs = {
+    AsyncInferRequestTestParams {true, true},
+    AsyncInferRequestTestParams {true, false},
+    AsyncInferRequestTestParams {false, true},
+    AsyncInferRequestTestParams {false, false}
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, AsyncInferenceTest,
+                ::testing::ValuesIn(testConfigs),
+            AsyncInferenceTest::getTestCaseName);
\ No newline at end of file
diff --git a/src/tests/unit/auto/auto_ctput_call_multi.cpp b/src/plugins/auto/tests/unit/auto_ctput_call_multi.cpp
similarity index 96%
rename from src/tests/unit/auto/auto_ctput_call_multi.cpp
rename to src/plugins/auto/tests/unit/auto_ctput_call_multi.cpp
index afd893ad7d5554..008fd3653ac1b8 100644
--- a/src/tests/unit/auto/auto_ctput_call_multi.cpp
+++ b/src/plugins/auto/tests/unit/auto_ctput_call_multi.cpp
@@ -8,7 +8,7 @@
 #include <common_test_utils/test_constants.hpp>
 #include <ngraph_functions/subgraph_builders.hpp>
 
-#include "plugin/mock_load_network_properties.hpp"
+#include "include/mock_auto_device_plugin.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
@@ -38,7 +38,7 @@ MATCHER_P(ComparePerfHint, perfHint, "Check if perf hint expects.") {
 class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
 public:
     std::shared_ptr<NiceMock<MockICore>> core;
-    std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
+    std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
     InferenceEngine::CNNNetwork simpleCnnNetwork;
     // mock cpu exeNetwork
     std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
@@ -102,8 +102,8 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
 
         // prepare mockicore and cnnNetwork for loading
         core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
-        auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
-        plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
+        auto* origin_plugin = new NiceMock<MockMultiDeviceInferencePlugin>();
+        plugin = std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>>(origin_plugin);
         // replace core with mock Icore
         plugin->SetCore(core);
         inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
@@ -137,7 +137,7 @@ class AutoCTPUTCallMulti : public ::testing::TestWithParam<ConfigParams> {
             });
 
         ON_CALL(*plugin, GetValidDevice)
-            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+            .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
                 std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
                 return devices;
             });
diff --git a/src/tests/unit/auto/auto_ctput_test.cpp b/src/plugins/auto/tests/unit/auto_ctput_test.cpp
similarity index 96%
rename from src/tests/unit/auto/auto_ctput_test.cpp
rename to src/plugins/auto/tests/unit/auto_ctput_test.cpp
index 28c990ffdc3e61..23af968787b06b 100644
--- a/src/tests/unit/auto/auto_ctput_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_ctput_test.cpp
@@ -8,7 +8,7 @@
 #include <common_test_utils/test_constants.hpp>
 #include <ngraph_functions/subgraph_builders.hpp>
 
-#include "plugin/mock_load_network_properties.hpp"
+#include "include/mock_auto_device_plugin.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
@@ -38,7 +38,7 @@ MATCHER_P(ComparePerfHint, perfHint, "Check if perf hint expects.") {
 class LoadNetworkWithCTPUTMockTest : public ::testing::TestWithParam<ConfigParams> {
 public:
     std::shared_ptr<NiceMock<MockICore>> core;
-    std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
+    std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
     InferenceEngine::CNNNetwork simpleCnnNetwork;
     // mock cpu exeNetwork
     std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
@@ -97,8 +97,8 @@ class LoadNetworkWithCTPUTMockTest : public ::testing::TestWithParam<ConfigParam
 
         // prepare mockicore and cnnNetwork for loading
         core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
-        auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
-        plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
+        auto* origin_plugin = new NiceMock<MockMultiDeviceInferencePlugin>();
+        plugin = std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>>(origin_plugin);
         // replace core with mock Icore
         plugin->SetCore(core);
         inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
@@ -133,7 +133,7 @@ class LoadNetworkWithCTPUTMockTest : public ::testing::TestWithParam<ConfigParam
             });
 
         ON_CALL(*plugin, GetValidDevice)
-            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+            .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
                 std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
                 return devices;
             });
diff --git a/src/tests/unit/auto/auto_default_perf_hint_test.cpp b/src/plugins/auto/tests/unit/auto_default_perf_hint_test.cpp
similarity index 98%
rename from src/tests/unit/auto/auto_default_perf_hint_test.cpp
rename to src/plugins/auto/tests/unit/auto_default_perf_hint_test.cpp
index 0657f4fc4848a5..96e099d185ee7b 100644
--- a/src/tests/unit/auto/auto_default_perf_hint_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_default_perf_hint_test.cpp
@@ -12,9 +12,9 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <openvino/runtime/core.hpp>
 
-#include "mock_common.hpp"
+#include "include/mock_common.hpp"
 #include "openvino/runtime/compiled_model.hpp"
-#include "plugin/mock_load_network_properties.hpp"
+#include "include/mock_auto_device_plugin.hpp"
 #include "so_ptr.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
@@ -54,7 +54,7 @@ static std::vector<ConfigParams> testConfigs;
 class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
 public:
     std::shared_ptr<NiceMock<MockICore>> core;
-    std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
+    std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
     InferenceEngine::CNNNetwork simpleCnnNetwork;
     // mock cpu exeNetwork
     std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
@@ -263,8 +263,8 @@ class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
 
         // prepare mockicore and cnnNetwork for loading
         core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
-        auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
-        plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
+        auto* origin_plugin = new NiceMock<MockMultiDeviceInferencePlugin>();
+        plugin = std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>>(origin_plugin);
         // replace core with mock Icore
         plugin->SetCore(core);
         inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
@@ -301,7 +301,7 @@ class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
             });
 
         ON_CALL(*plugin, GetValidDevice)
-            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+            .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
                 std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
                 return devices;
             });
diff --git a/src/plugins/auto/tests/unit/auto_dynamic_output_test.cpp b/src/plugins/auto/tests/unit/auto_dynamic_output_test.cpp
new file mode 100644
index 00000000000000..0f2fe1d7d832ed
--- /dev/null
+++ b/src/plugins/auto/tests/unit/auto_dynamic_output_test.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/auto_infer_request_test_base.hpp"
+
+std::string DynamicOutputInferenceTest::getTestCaseName(testing::TestParamInfo<DynamicOutputConfigParams> obj) {
+    bool isNewAPI;
+    ov::Any priorityList;
+    ov::AnyMap property;
+    ov::Any targetList;
+    std::tie(isNewAPI, priorityList, property, targetList) = obj.param;
+    std::ostringstream result;
+    result << "_isNewAPI_" << isNewAPI;
+    result << "_withList_" << priorityList.as<std::string>();
+    for (auto& iter : property)
+        result << "_hint_" << iter.first << "_as_" << iter.second.as<std::string>();
+    result << "_expect_";
+    auto targets = targetList.as<std::vector<std::string>>();
+    for (auto& iter : targets)
+        result << "_" << iter;
+    auto string = result.str();
+    CommonTestUtils::replaceSubstringInString(string, ",", "_");
+    return string;
+}
+
+std::shared_ptr<ngraph::Function> DynamicOutputInferenceTest::getFunction() {
+    auto boxes = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 2, 4});
+    boxes->set_friendly_name("param_1");
+    boxes->get_output_tensor(0).set_names({"input_tensor_1"});
+    auto scores = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 1, 2});
+    scores->set_friendly_name("param_2");
+    scores->get_output_tensor(0).set_names({"input_tensor_2"});
+    auto max_output_boxes_per_class = ov::op::v0::Constant::create(ov::element::i64,  ov::Shape{}, {10});
+    auto iou_threshold = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0.75});
+    auto score_threshold = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0.7});
+    auto nms = std::make_shared<ov::op::v9::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                                                                                iou_threshold, score_threshold);
+    auto res = std::make_shared<ov::op::v0::Result>(nms);
+    res->set_friendly_name("output_dynamic");
+    auto func = std::make_shared<ngraph::Function>(ov::NodeVector{nms}, ngraph::ParameterVector{boxes, scores});
+    return func;
+}
+
+void DynamicOutputInferenceTest::SetUp() {
+        std::tie(isNewAPI, priorityList, property, targetList) = GetParam();
+        if (isNewAPI) {
+            ON_CALL(*core.get(), isNewAPI()).WillByDefault(Return(true));
+        } else {
+            ON_CALL(*core.get(), isNewAPI()).WillByDefault(Return(false));
+        }
+        auto function = getFunction();
+        cnnNet = InferenceEngine::CNNNetwork(function);
+        // replace core with mock Icore
+        plugin->SetCore(core);
+        plugin->SetName("MULTI"); // change to AUTO when multi logic been unified to auto
+        makeAsyncRequest();
+        ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                    ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_GPU)),
+                    ::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
+                        std::this_thread::sleep_for(std::chrono::milliseconds(200));
+                        return mockExeNetwork; }));
+        ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+            ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_CPU)),
+            ::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
+}
+
+void DynamicOutputInferenceTest::TearDown() {
+}
+
+TEST_P(DynamicOutputInferenceTest, CanSelectCorrectTargetDeviceandInitizeBlobWithCorrectSize) {
+    auto targets = targetList.as<std::vector<std::string>>();
+    std::map<std::string, std::string> config;
+    for (auto& iter : property)
+        config.insert({iter.first, iter.second.as<std::string>()});
+    config.insert({ov::device::priorities.name(), priorityList.as<std::string>()});
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
+    for (auto& iter : targets) {
+        EXPECT_CALL(
+                *core,
+                LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                            ::testing::Matcher<const std::string&>(HasSubstr(iter)),
+                            ::testing::Matcher<const Config&>(_)))
+                .Times(1);
+    }
+    ASSERT_NO_THROW(exeNetwork = plugin->LoadNetwork(cnnNet, config));
+    std::shared_ptr<InferenceEngine::IInferRequestInternal> auto_request;
+    ASSERT_NO_THROW(auto_request = exeNetwork->CreateInferRequest());
+    for (auto & iter : exeNetwork->GetOutputsInfo()) {
+        auto outBlob = auto_request->GetBlob(iter.first);
+        ASSERT_NE(outBlob->size(), 0);
+    }
+    ASSERT_NO_THROW(auto_request->StartAsync());
+}
+
+const std::vector<DynamicOutputConfigParams> testConfigs = {
+    DynamicOutputConfigParams {false, "CPU,GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)},
+                    std::vector<std::string>{"CPU", "GPU"}},
+    DynamicOutputConfigParams {true, "CPU,GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)},
+                    std::vector<std::string>{"CPU", "GPU"}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, DynamicOutputInferenceTest,
+                ::testing::ValuesIn(testConfigs),
+            DynamicOutputInferenceTest::getTestCaseName);
\ No newline at end of file
diff --git a/src/plugins/auto/tests/unit/auto_infer_request_test_base.cpp b/src/plugins/auto/tests/unit/auto_infer_request_test_base.cpp
new file mode 100644
index 00000000000000..c9f715c5b0d7b3
--- /dev/null
+++ b/src/plugins/auto/tests/unit/auto_infer_request_test_base.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/auto_infer_request_test_base.hpp"
+
+mockAsyncInferRequest::mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
+                                     const ImmediateExecutor::Ptr& taskExecutor,
+                                     const ImmediateExecutor::Ptr& callbackExecutor,
+                                     bool ifThrow)
+    : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _throw(ifThrow) {
+    _pipeline = {};
+
+    _pipeline.push_back({taskExecutor,
+                [this] {
+                    if (_throw)
+                        IE_THROW();
+                } });
+}
+
+AutoInferRequestTestBase::AutoInferRequestTestBase() {
+    // prepare cpuMockExeNetwork
+    mockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
+    mockExeNetwork =  {mockIExeNet, {}};
+
+    // prepare mockicore and cnnNetwork for loading
+    core = std::make_shared<NiceMock<MockICore>>();
+    auto* origin_plugin = new MultiDeviceInferencePlugin();
+    plugin  = std::shared_ptr<MultiDeviceInferencePlugin>(origin_plugin);
+    function = getFunction();
+    cnnNet = InferenceEngine::CNNNetwork(function);
+    ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_GPU)),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+                    return mockExeNetwork; }));
+    ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_CPU)),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
+    ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(HasSubstr(CommonTestUtils::DEVICE_KEEMBAY)),
+                ::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+                    return mockExeNetwork; }));
+    // mock execNetwork can work
+    IE_SET_METRIC(SUPPORTED_METRICS, metrics, {METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME)});
+    ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _))
+        .WillByDefault(RETURN_MOCK_VALUE(metrics));
+    ON_CALL(*core, GetMetric(_,
+                StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(""));
+    ON_CALL(*core, GetConfig(_,
+                StrEq(CONFIG_KEY(DEVICE_ID)))).WillByDefault(Return(0));
+    IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, supportConfigs, {});
+    ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _))
+        .WillByDefault(RETURN_MOCK_VALUE(supportConfigs));
+    ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
+    ON_CALL(*core, GetMetric(StrEq("GPU"),
+                   StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return("iGPU"));
+    IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, optimalNum, 2);
+    ON_CALL(*mockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+        .WillByDefault(Return(optimalNum));
+    std::vector<std::string> cability{"FP32", "FP16", "INT8", "BIN"};
+    ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _))
+        .WillByDefault(Return(cability));
+    ON_CALL(*core, GetConfig(_, StrEq(ov::compilation_num_threads.name())))
+        .WillByDefault(Return(12));
+    ON_CALL(*mockIExeNet.get(), GetConfig(StrEq(ov::inference_num_threads.name())))
+        .WillByDefault(Return(12));
+    ON_CALL(*mockIExeNet.get(), GetConfig(StrEq(ov::num_streams.name())))
+        .WillByDefault(Return(4));
+    ON_CALL(*mockIExeNet.get(), GetConfig(StrEq(ov::compilation_num_threads.name())))
+        .WillByDefault(Return(12));
+    taskExecutor = std::make_shared<ImmediateExecutor>();
+    // test auto plugin
+    plugin->SetName("AUTO");
+}
+
+void AutoInferRequestTestBase::makeAsyncRequest() {
+    // set up mock infer request
+    // set up mock infer request
+    for (size_t i = 0; i < request_num_pool; i++) {
+        auto inferReq = std::make_shared<NiceMock<MockIInferRequestInternal>>();
+        auto asyncRequest = std::make_shared<AsyncInferRequestThreadSafeDefault>(inferReq, taskExecutor, taskExecutor);
+        inferReqInternal.push_back(inferReq);
+        asyncInferRequest.push_back(asyncRequest);
+    }
+    ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault([this]() {
+        return asyncInferRequest[index++];
+    });
+}
+AutoInferRequestTestBase::~AutoInferRequestTestBase() {
+    core.reset();
+    plugin.reset();
+    mockIExeNet.reset();
+    mockExeNetwork = {};
+
+    metaDevices.clear();
+    inferReqInternal.clear();
+    asyncInferRequest.clear();
+}
+
+    // constructing default test model
+std::shared_ptr<ngraph::Function> AutoInferRequestTestBase::getFunction() {
+    return ngraph::builder::subgraph::makeSingleConv();
+}
\ No newline at end of file
diff --git a/src/tests/unit/auto/auto_load_network_properties_test.cpp b/src/plugins/auto/tests/unit/auto_load_network_properties_test.cpp
similarity index 97%
rename from src/tests/unit/auto/auto_load_network_properties_test.cpp
rename to src/plugins/auto/tests/unit/auto_load_network_properties_test.cpp
index d8986e8184ff25..745bdbfd30962b 100644
--- a/src/tests/unit/auto/auto_load_network_properties_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_load_network_properties_test.cpp
@@ -12,8 +12,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <openvino/runtime/core.hpp>
 
-#include "mock_common.hpp"
-#include "plugin/mock_load_network_properties.hpp"
+#include "include/mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
@@ -67,7 +67,7 @@ static std::vector<ConfigParams> testConfigs;
 class LoadNetworkWithSecondaryConfigsMockTest : public ::testing::TestWithParam<ConfigParams> {
 public:
     std::shared_ptr<NiceMock<MockICore>> core;
-    std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
+    std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
     InferenceEngine::CNNNetwork simpleCnnNetwork;
     // mock cpu exeNetwork
     std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
@@ -159,8 +159,8 @@ class LoadNetworkWithSecondaryConfigsMockTest : public ::testing::TestWithParam<
 
         // prepare mockicore and cnnNetwork for loading
         core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
-        auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
-        plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
+        auto* origin_plugin = new NiceMock<MockMultiDeviceInferencePlugin>();
+        plugin = std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>>(origin_plugin);
         // replace core with mock Icore
         plugin->SetCore(core);
         inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
@@ -197,7 +197,7 @@ class LoadNetworkWithSecondaryConfigsMockTest : public ::testing::TestWithParam<
             });
 
         ON_CALL(*plugin, GetValidDevice)
-            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+            .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
                 std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
                 return devices;
             });
diff --git a/src/tests/unit/auto/auto_release_helper_test.cpp b/src/plugins/auto/tests/unit/auto_release_helper_test.cpp
similarity index 98%
rename from src/tests/unit/auto/auto_release_helper_test.cpp
rename to src/plugins/auto/tests/unit/auto_release_helper_test.cpp
index 10ca68c04c2184..7e6babfbc2dc2b 100644
--- a/src/tests/unit/auto/auto_release_helper_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_release_helper_test.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 #include <thread>
 
 using ::testing::MatcherCast;
@@ -163,7 +163,7 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
     DeviceInformation devInfo;
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     ON_CALL(*plugin, GetValidDevice)
-        .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+        .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
             std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
             return devices;
         });
diff --git a/src/tests/unit/auto/auto_runtime_fallback_test.cpp b/src/plugins/auto/tests/unit/auto_runtime_fallback_test.cpp
similarity index 98%
rename from src/tests/unit/auto/auto_runtime_fallback_test.cpp
rename to src/plugins/auto/tests/unit/auto_runtime_fallback_test.cpp
index 9b4e76a3b611ef..fc90baf68abbe7 100644
--- a/src/tests/unit/auto/auto_runtime_fallback_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_runtime_fallback_test.cpp
@@ -8,14 +8,13 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <common_test_utils/test_constants.hpp>
 #include <ie_metric_helpers.hpp>
-#include "mock_common.hpp"
+#include "include/mock_common.hpp"
 
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
-#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "plugin/mock_infer_request.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/auto_infer_request_test_base.hpp"
 
 using ::testing::Throw;
 using ::testing::Matches;
@@ -192,7 +191,7 @@ class AutoRuntimeFallback : public ::testing::TestWithParam<ConfigParams> {
             });
 
         ON_CALL(*plugin, GetValidDevice)
-            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+            .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
                 std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
                 return devices;
             });
diff --git a/src/tests/unit/auto/auto_select_device_failed_test.cpp b/src/plugins/auto/tests/unit/auto_select_device_failed_test.cpp
similarity index 98%
rename from src/tests/unit/auto/auto_select_device_failed_test.cpp
rename to src/plugins/auto/tests/unit/auto_select_device_failed_test.cpp
index 4e8c260ae0fc38..9aecef4aee4c67 100644
--- a/src/tests/unit/auto/auto_select_device_failed_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_select_device_failed_test.cpp
@@ -16,8 +16,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::MatcherCast;
 using ::testing::AllOf;
@@ -181,7 +181,7 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) {
     // test auto plugin
     plugin->SetName("AUTO");
     std::string devicesStr = "";
-    int selDevsSize = deviceConfigs.size();
+    auto selDevsSize = deviceConfigs.size();
     for (auto iter = deviceConfigs.begin(); iter != deviceConfigs.end(); selDevsSize--) {
         std::string deviceName = std::get<0>(*iter);
         bool loadSuccess = std::get<1>(*iter);
@@ -226,7 +226,7 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) {
     }
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     ON_CALL(*plugin, GetValidDevice)
-        .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+        .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
             std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
             return devices;
         });
diff --git a/src/tests/unit/auto/auto_startup_fallback_properties_test.cpp b/src/plugins/auto/tests/unit/auto_startup_fallback_properties_test.cpp
similarity index 96%
rename from src/tests/unit/auto/auto_startup_fallback_properties_test.cpp
rename to src/plugins/auto/tests/unit/auto_startup_fallback_properties_test.cpp
index 80ea0d9a7b355e..55d010bc11e6c5 100644
--- a/src/tests/unit/auto/auto_startup_fallback_properties_test.cpp
+++ b/src/plugins/auto/tests/unit/auto_startup_fallback_properties_test.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 #include <thread>
 
 using ::testing::_;
@@ -113,7 +113,7 @@ TEST_P(AutoStartupFallback, releaseResource) {
     // DeviceInformation devInfo;
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     ON_CALL(*plugin, GetValidDevice)
-        .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+        .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
             std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
             return devices;
         });
diff --git a/src/tests/unit/auto/exec_network_get_metrics.cpp b/src/plugins/auto/tests/unit/exec_network_get_metrics.cpp
similarity index 98%
rename from src/tests/unit/auto/exec_network_get_metrics.cpp
rename to src/plugins/auto/tests/unit/exec_network_get_metrics.cpp
index 3270048823a445..75d15c10e5fafd 100644
--- a/src/tests/unit/auto/exec_network_get_metrics.cpp
+++ b/src/plugins/auto/tests/unit/exec_network_get_metrics.cpp
@@ -20,10 +20,10 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
+#include "include/mock_auto_device_plugin.hpp"
 #include <chrono>
 #include <thread>
-#include "mock_common.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::MatcherCast;
 using ::testing::AllOf;
@@ -279,7 +279,7 @@ TEST_P(ExecNetworkGetMetricOptimalNumInferReq, OPTIMAL_NUMBER_OF_INFER_REQUESTS)
     ON_CALL(*plugin, SelectDevice(_, _, _)).WillByDefault(Return(metaDevices[1]));
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     ON_CALL(*plugin, GetValidDevice)
-        .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+        .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
             std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
             return devices;
         });
@@ -414,7 +414,7 @@ TEST_P(ExecNetworkGetMetricOtherTest, modelPriority_perfHint_exclusiveAsyncReq_t
     ON_CALL(*plugin, SelectDevice(_, _, _)).WillByDefault(Return(metaDevices[1]));
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     ON_CALL(*plugin, GetValidDevice)
-        .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+        .WillByDefault([](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
             std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
             return devices;
         });
diff --git a/src/tests/unit/auto/get_device_list.cpp b/src/plugins/auto/tests/unit/get_device_list.cpp
similarity index 94%
rename from src/tests/unit/auto/get_device_list.cpp
rename to src/plugins/auto/tests/unit/get_device_list.cpp
index 27456ffcd99368..5aae145e52b4af 100644
--- a/src/tests/unit/auto/get_device_list.cpp
+++ b/src/plugins/auto/tests/unit/get_device_list.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::Return;
 using ::testing::Property;
@@ -29,11 +29,6 @@ using ::testing::InvokeWithoutArgs;
 using Config = std::map<std::string, std::string>;
 using namespace MockMultiDevice;
 
-// const char cpuFullDeviceName[] = "Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz";
-const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)";
-const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)";
-// const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU";
-// const char vpuxFullDeviceName[] = "";
 const std::vector<std::string> availableDevs = {"CPU", "GPU", "VPUX"};
 const std::vector<std::string> availableDevsWithId = {"CPU", "GPU.0", "GPU.1", "VPUX"};
 using Params = std::tuple<std::string, std::string>;
diff --git a/src/plugins/auto/tests/unit/include/auto_infer_request_test_base.hpp b/src/plugins/auto/tests/unit/include/auto_infer_request_test_base.hpp
new file mode 100644
index 00000000000000..58c90e2028fda6
--- /dev/null
+++ b/src/plugins/auto/tests/unit/include/auto_infer_request_test_base.hpp
@@ -0,0 +1,138 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock-spec-builders.h>
+#include <ie_metric_helpers.hpp>
+#include <common_test_utils/common_utils.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/mock_task_executor.hpp"
+#include <ie_core.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "mock_common.hpp"
+#include "mock_auto_device_plugin.hpp"
+#include <atomic>
+
+using namespace ::testing;
+
+using Config = std::map<std::string, std::string>;
+const std::vector<std::string>  availableDevs = {CommonTestUtils::DEVICE_CPU, CommonTestUtils::DEVICE_GPU, CommonTestUtils::DEVICE_KEEMBAY};
+struct DeferedExecutor : public ITaskExecutor {
+    using Ptr = std::shared_ptr<DeferedExecutor>;
+    DeferedExecutor() = default;
+
+    void executeOne() {
+        tasks.front()();
+        tasks.pop_front();
+    }
+
+    void executeAll() {
+        while (!tasks.empty()) {
+            executeOne();
+        }
+    }
+
+    ~DeferedExecutor() override {
+        executeAll();
+    };
+
+    void run(Task task) override {
+        tasks.push_back(task);
+    }
+
+    std::deque<Task> tasks;
+};
+
+class AutoInferRequestTestBase {
+protected:
+    // by default will create 4 infer request for tests and 4 reserverd for fallback
+    const unsigned int                                                  request_num_pool{8};
+    std::shared_ptr<ngraph::Function>                                   function;
+    InferenceEngine::CNNNetwork                                         cnnNet;
+    std::shared_ptr<NiceMock<MockICore>>                                core;
+    std::shared_ptr<IInferencePlugin>                                   plugin; // real auto plugin used
+
+    //mock hardware exeNetwork
+    std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>>           mockIExeNet;
+    ov::SoPtr<IExecutableNetworkInternal>                               mockExeNetwork;
+
+    // config for Auto device
+    std::vector<DeviceInformation>                                      metaDevices;
+    std::vector<std::shared_ptr<MockIInferRequestInternal>>             inferReqInternal;
+    std::vector<std::shared_ptr<AsyncInferRequestThreadSafeDefault>>    asyncInferRequest;
+
+    std::atomic_int                                                     index{0};
+    ITaskExecutor::Ptr                                                  taskExecutor;
+
+public:
+    ~AutoInferRequestTestBase();
+
+    AutoInferRequestTestBase();
+
+protected:
+    // constructing default test model
+    std::shared_ptr<ngraph::Function> getFunction();
+    void makeAsyncRequest();
+};
+
+using DynamicOutputConfigParams = std::tuple<
+        bool,                     // is newAPI or not
+        ov::Any,                  // priority device list
+        ov::AnyMap,               // hint setting
+        ov::Any                   // expected device to run inference on
+        >;
+class DynamicOutputInferenceTest : public AutoInferRequestTestBase,
+                            public ::testing::TestWithParam<DynamicOutputConfigParams> {
+public:
+    std::shared_ptr<ngraph::Function> getFunction();
+    static std::string getTestCaseName(testing::TestParamInfo<DynamicOutputConfigParams> obj);
+    void SetUp() override;
+    void TearDown() override;
+
+protected:
+        bool isNewAPI;
+        ov::Any priorityList;
+        ov::AnyMap property;
+        ov::Any targetList;
+};
+
+using AsyncInferRequestTestParams = std::tuple<
+        bool,                      // is cpu loaded faster
+        bool                       // is single device
+>;
+class AsyncInferenceTest : public AutoInferRequestTestBase,
+                            public ::testing::TestWithParam<AsyncInferRequestTestParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<AsyncInferRequestTestParams> obj);
+    void SetUp() override;
+    void TearDown() override;
+
+protected:
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
+    std::shared_ptr<InferenceEngine::IInferRequestInternal> auto_request;
+    bool isCpuFaster;
+    bool isSingleDevice;
+};
+
+class mockAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
+public:
+    using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
+    mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
+                      const ImmediateExecutor::Ptr& taskExecutor,
+                      const ImmediateExecutor::Ptr& callbackExecutor,
+                      bool ifThrow);
+
+    ~mockAsyncInferRequest() override = default;
+
+private:
+    bool _throw;
+};
\ No newline at end of file
diff --git a/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp b/src/plugins/auto/tests/unit/include/mock_auto_device_plugin.hpp
similarity index 100%
rename from src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp
rename to src/plugins/auto/tests/unit/include/mock_auto_device_plugin.hpp
diff --git a/src/tests/unit/auto/mock_common.hpp b/src/plugins/auto/tests/unit/include/mock_common.hpp
similarity index 100%
rename from src/tests/unit/auto/mock_common.hpp
rename to src/plugins/auto/tests/unit/include/mock_common.hpp
diff --git a/src/tests/unit/auto/plugin/mock_log_utils.hpp b/src/plugins/auto/tests/unit/include/mock_log_utils.hpp
similarity index 100%
rename from src/tests/unit/auto/plugin/mock_log_utils.hpp
rename to src/plugins/auto/tests/unit/include/mock_log_utils.hpp
diff --git a/src/tests/unit/auto/key_network_priority_test.cpp b/src/plugins/auto/tests/unit/key_network_priority_test.cpp
similarity index 94%
rename from src/tests/unit/auto/key_network_priority_test.cpp
rename to src/plugins/auto/tests/unit/key_network_priority_test.cpp
index 12eb31d39e8d79..69c68fc4bd10eb 100644
--- a/src/tests/unit/auto/key_network_priority_test.cpp
+++ b/src/plugins/auto/tests/unit/key_network_priority_test.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::MatcherCast;
 using ::testing::AllOf;
@@ -73,6 +73,8 @@ class KeyNetworkPriorityTest : public ::testing::TestWithParam<ConfigParams> {
     }
 
     void SetUp() override {
+        std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = GetParam();
+        sizeOfConfigs = static_cast<int>(PriorityConfigs.size());
        // prepare mockicore and cnnNetwork for loading
        core  = std::shared_ptr<MockICore>(new MockICore());
        auto* origin_plugin = new MockMultiDeviceInferencePlugin();
@@ -101,17 +103,16 @@ class KeyNetworkPriorityTest : public ::testing::TestWithParam<ConfigParams> {
                return plugin->MultiDeviceInferencePlugin::GetValidDevice(metaDevices, netPrecision);
            });
     }
-};
-
-TEST_P(KeyNetworkPriorityTest, SelectDevice) {
-    // get Parameter
 
+protected:
     std::string netPrecision;
     bool enableDevicePriority;
     std::vector<PriorityParams> PriorityConfigs;
-    std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = this->GetParam();
-    std::vector<DeviceInformation> resDevInfo;
+    int sizeOfConfigs;
+};
 
+TEST_P(KeyNetworkPriorityTest, SelectDevice) {
+    std::vector<DeviceInformation> resDevInfo;
     if (enableDevicePriority) {
         metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0},
             {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 1},
@@ -126,27 +127,21 @@ TEST_P(KeyNetworkPriorityTest, SelectDevice) {
             {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 0}};
     }
 
-    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size());
-    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4));
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(sizeOfConfigs);
+    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(sizeOfConfigs * 4));
 
     for (auto& item : PriorityConfigs) {
         resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item)));
     }
-    for (unsigned int i = 0; i < PriorityConfigs.size(); i++) {
+    for (int i = 0; i < sizeOfConfigs; i++) {
         EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i]));
         plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i]));
     }
 }
 
 TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) {
-    // get Parameter
-    std::string netPrecision;
-    bool enableDevicePriority;
-    std::vector<PriorityParams> PriorityConfigs;
-    std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = this->GetParam();
     std::vector<DeviceInformation> resDevInfo;
     std::vector<std::future<void>> futureVect;
-
     if (enableDevicePriority) {
         metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0},
             {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 1},
@@ -161,13 +156,13 @@ TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) {
             {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 0}};
     }
 
-    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size() * 2);
-    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4 * 2));
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(sizeOfConfigs * 2);
+    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(sizeOfConfigs * 4 * 2));
     // selectdevice in multi threads, and UnregisterPriority them all, should not affect the
     // Priority Map
     for (auto& item : PriorityConfigs) {
        unsigned int priority = std::get<0>(item);
-       auto future = std::async(std::launch::async, [this, &netPrecision, priority] {
+       auto future = std::async(std::launch::async, [this, priority] {
                auto deviceInfo = plugin->SelectDevice(metaDevices, netPrecision, priority);
                plugin->UnregisterPriority(priority, deviceInfo.uniqueName);
                });
@@ -181,7 +176,7 @@ TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) {
     for (auto& item : PriorityConfigs) {
         resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item)));
     }
-    for (unsigned int i = 0; i < PriorityConfigs.size(); i++) {
+    for (int i = 0; i < sizeOfConfigs; i++) {
         EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i]));
         plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i]));
     }
diff --git a/src/tests/unit/auto/log_utils_format_test.cpp b/src/plugins/auto/tests/unit/log_utils_format_test.cpp
similarity index 100%
rename from src/tests/unit/auto/log_utils_format_test.cpp
rename to src/plugins/auto/tests/unit/log_utils_format_test.cpp
diff --git a/src/tests/unit/auto/log_utils_test.cpp b/src/plugins/auto/tests/unit/log_utils_test.cpp
similarity index 98%
rename from src/tests/unit/auto/log_utils_test.cpp
rename to src/plugins/auto/tests/unit/log_utils_test.cpp
index e9d1a1c4ed7e4c..54fd53d5b883e9 100644
--- a/src/tests/unit/auto/log_utils_test.cpp
+++ b/src/plugins/auto/tests/unit/log_utils_test.cpp
@@ -131,7 +131,7 @@ TEST(smoke_Auto_BehaviorTests, LogUtilsSingleton) {
     std::vector<std::future<void>> futureVect;
     std::shared_ptr<Log> instanceVector[20];
     for (unsigned int i = 0; i < 20; i++) {
-        auto future = std::async(std::launch::async, [this, &instanceVector, i] {
+        auto future = std::async(std::launch::async, [&instanceVector, i] {
                 instanceVector[i] = Log::instance();
                 });
         futureVect.push_back(std::move(future));
diff --git a/src/tests/unit/auto/mock_common.cpp b/src/plugins/auto/tests/unit/mock_common.cpp
similarity index 92%
rename from src/tests/unit/auto/mock_common.cpp
rename to src/plugins/auto/tests/unit/mock_common.cpp
index 1c68801b4bc261..c5968a241c76c4 100644
--- a/src/tests/unit/auto/mock_common.cpp
+++ b/src/plugins/auto/tests/unit/mock_common.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "mock_common.hpp"
+#include "include/mock_common.hpp"
 
 //  getMetric will return a fake ov::Any, gmock will call ostreamer << ov::Any
 //  it will cause core dump, so add this special implemented
diff --git a/src/tests/unit/auto/parse_meta_device_test.cpp b/src/plugins/auto/tests/unit/parse_meta_device_test.cpp
similarity index 94%
rename from src/tests/unit/auto/parse_meta_device_test.cpp
rename to src/plugins/auto/tests/unit/parse_meta_device_test.cpp
index cf55fcbd3be574..70d387942e1b87 100644
--- a/src/tests/unit/auto/parse_meta_device_test.cpp
+++ b/src/plugins/auto/tests/unit/parse_meta_device_test.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::MatcherCast;
 using ::testing::HasSubstr;
@@ -98,6 +98,8 @@ class ParseMetaDeviceTest : public ::testing::TestWithParam<ConfigParams> {
                    const std::map<std::string, std::string>& config) {
                return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
                });
+        std::tie(priorityDevices, metaDevices, throwException) = GetParam();
+        sizeOfMetaDevices = static_cast<int>(metaDevices.size());
     }
 
     void compare(std::vector<DeviceInformation>& result, std::vector<DeviceInformation>& expect) {
@@ -120,6 +122,13 @@ class ParseMetaDeviceTest : public ::testing::TestWithParam<ConfigParams> {
             }
         }
     }
+
+protected:
+    // get Parameter
+    std::string priorityDevices;
+    std::vector<DeviceInformation> metaDevices;
+    bool throwException;
+    int sizeOfMetaDevices;
 };
 using ParseMetaDeviceNoIDTest = ParseMetaDeviceTest;
 
@@ -128,17 +137,12 @@ TEST_P(ParseMetaDeviceTest, ParseMetaDevicesWithPriority) {
     IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys, {});
     ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _))
            .WillByDefault(RETURN_MOCK_VALUE(configKeys));
-    // get Parameter
-    std::string priorityDevices;
-    std::vector<DeviceInformation> metaDevices;
-    bool throwException;
-    std::tie(priorityDevices, metaDevices, throwException) = this->GetParam();
 
     EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1);
     EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber());
     EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber());
     EXPECT_CALL(*core, GetAvailableDevices()).Times(1);
-    EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(metaDevices.size());
+    EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(sizeOfMetaDevices);
     if (throwException) {
         ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {}));
     } else {
@@ -153,11 +157,6 @@ TEST_P(ParseMetaDeviceTest, ParseMetaDevicesNotWithPriority) {
     IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys, {});
     ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _))
            .WillByDefault(RETURN_MOCK_VALUE(configKeys));
-    // get Parameter
-    std::string priorityDevices;
-    std::vector<DeviceInformation> metaDevices;
-    bool throwException;
-    std::tie(priorityDevices, metaDevices, throwException) = this->GetParam();
 
     EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1 + !throwException);
     EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber());
@@ -184,17 +183,11 @@ TEST_P(ParseMetaDeviceNoIDTest, ParseMetaDevices) {
     IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys, {CONFIG_KEY(DEVICE_ID)});
     ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _))
            .WillByDefault(RETURN_MOCK_VALUE(configKeys));
-    // get Parameter
-    std::string priorityDevices;
-    std::vector<DeviceInformation> metaDevices;
-    bool throwException;
-    std::tie(priorityDevices, metaDevices, throwException) = this->GetParam();
-
     EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1);
     EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber());
     EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber());
     EXPECT_CALL(*core, GetAvailableDevices()).Times(1);
-    EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(metaDevices.size());
+    EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(sizeOfMetaDevices);
     if (throwException) {
         ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {}));
     } else {
diff --git a/src/tests/unit/auto/select_device_test.cpp b/src/plugins/auto/tests/unit/select_device_test.cpp
similarity index 93%
rename from src/tests/unit/auto/select_device_test.cpp
rename to src/plugins/auto/tests/unit/select_device_test.cpp
index 4651a49a4e0750..b87e296fcf926b 100644
--- a/src/tests/unit/auto/select_device_test.cpp
+++ b/src/plugins/auto/tests/unit/select_device_test.cpp
@@ -15,8 +15,8 @@
 #include <ngraph_functions/subgraph_builders.hpp>
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "plugin/mock_auto_device_plugin.hpp"
-#include "mock_common.hpp"
+#include "include/mock_auto_device_plugin.hpp"
+#include "include/mock_common.hpp"
 
 using ::testing::MatcherCast;
 using ::testing::AllOf;
@@ -105,16 +105,15 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
     }
     // combine select_num devices from devices and make them to ConfigParams
     // insert the ConfigParams into testConfigs
-    static void combine_device(const std::vector<DeviceInformation>& devices, int start,
-            int* result, int result_index, const int select_num, std::string& netPrecision,
+    static void combine_device(const std::vector<DeviceInformation>& devices, size_t start,
+            size_t* result, size_t result_index, const size_t select_num, std::string& netPrecision,
             bool enableDevicePriority, bool reverse) {
-        int i = 0;
-        for (i = start; i < devices.size() + 1 - result_index; i++) {
+        for (size_t i = start; i < devices.size() + 1 - result_index; i++) {
             result[result_index - 1] = i;
             if (result_index - 1 == 0) {
                 std::vector<DeviceInformation> metaDevices = {};
                 int devicePriority = 0;
-                for (int j = select_num - 1; j >= 0; j--) {
+                for (int j = static_cast<int>(select_num) - 1; j >= 0; j--) {
                     auto tmpDevInfo = devices[result[j]];
                     if (enableDevicePriority) {
                         tmpDevInfo.devicePriority = devicePriority;
@@ -140,7 +139,7 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
                                 validDevices.push_back(*device);
                             }
                         }
-                        int currentDevicePriority = 100;
+                        unsigned int currentDevicePriority = 100;
                         for (auto iter = validDevices.begin(); iter != validDevices.end(); iter++) {
                             if (iter->devicePriority < currentDevicePriority) {
                                 expect = *iter;
@@ -177,7 +176,7 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
     }
 
     static std::vector<ConfigParams> CreateConfigs() {
-        auto result = new int[totalDevices.size()];
+        auto result = new size_t[totalDevices.size()];
         // test all netPrecision with all possible combine devices
         // netPrecision number is 5
         // device number is 5
@@ -185,7 +184,7 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
         // null device 1
         // total test config num is 32*5 = 160
         for (auto netPrecision : netPrecisions) {
-            for (int i = 1; i <= totalDevices.size(); i++) {
+            for (size_t i = 1; i <= totalDevices.size(); i++) {
                 combine_device(totalDevices, 0, result, i, i, netPrecision, false, false);
             }
             // test null device
@@ -193,7 +192,7 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
         }
         // reverse totalDevices for test
         for (auto netPrecision : netPrecisions) {
-            for (int i = 1; i <= reverseTotalDevices.size(); i++) {
+            for (size_t i = 1; i <= reverseTotalDevices.size(); i++) {
                 combine_device(reverseTotalDevices, 0, result, i, i, netPrecision, false, true);
             }
         }
@@ -201,14 +200,14 @@ class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
         // add test for enableDevicePriority
         // test case num is 31*5 = 155
         for (auto netPrecision : netPrecisions) {
-            for (int i = 1; i <= totalDevices.size(); i++) {
+            for (size_t i = 1; i <= totalDevices.size(); i++) {
                 combine_device(totalDevices, 0, result, i, i, netPrecision, true, false);
             }
         }
 
         // reverse totalDevices for test
         for (auto netPrecision : netPrecisions) {
-            for (int i = 1; i <= reverseTotalDevices.size(); i++) {
+            for (size_t i = 1; i <= reverseTotalDevices.size(); i++) {
                 combine_device(reverseTotalDevices, 0, result, i, i, netPrecision, true, true);
             }
         }
@@ -271,7 +270,7 @@ TEST_P(SelectDeviceTest, SelectDevice) {
 
     EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1);
     if (devices.size() >= 1) {
-        EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(devices.size() - 1));
+        EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(static_cast<int>(devices.size()) - 1));
     } else {
         EXPECT_CALL(*core, GetMetric(_, _, _)).Times(0);
     }
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index ac840e3c62ab80..15f5cc75a92827 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -25,7 +25,6 @@ else()
 endif()
 
 add_subdirectory(ngraph_helpers)
-add_subdirectory(unit)
 add_subdirectory(ie_test_utils)
 add_subdirectory(util)
 add_subdirectory(engines_util)
diff --git a/src/tests/unit/auto/plugin/mock_infer_request.hpp b/src/tests/unit/auto/plugin/mock_infer_request.hpp
deleted file mode 100644
index 9bc6f83b3ad448..00000000000000
--- a/src/tests/unit/auto/plugin/mock_infer_request.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <gmock/gmock.h>
-#include "ie_icore.hpp"
-#include "plugin.hpp"
-#include <iostream>
-
-using namespace MockMultiDevicePlugin;
-namespace MockMultiDevice {
-
-class mockAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
-public:
-    using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
-    mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
-                      const ImmediateExecutor::Ptr& taskExecutor,
-                      const ImmediateExecutor::Ptr& callbackExecutor,
-                      bool ifThrow);
-
-    ~mockAsyncInferRequest() override = default;
-private:
-    bool _throw;
-};
-
-mockAsyncInferRequest::mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
-                                     const ImmediateExecutor::Ptr& taskExecutor,
-                                     const ImmediateExecutor::Ptr& callbackExecutor,
-                                     bool ifThrow)
-    : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _throw(ifThrow) {
-    _pipeline = {};
-
-    _pipeline.push_back({taskExecutor,
-                [this] {
-                    if (_throw)
-                        IE_THROW();
-    } });
-}
-} // namespace MockMultiDevice
\ No newline at end of file
diff --git a/src/tests/unit/auto/plugin/mock_load_network_properties.hpp b/src/tests/unit/auto/plugin/mock_load_network_properties.hpp
deleted file mode 100644
index 22b05ef1a66c6c..00000000000000
--- a/src/tests/unit/auto/plugin/mock_load_network_properties.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <gmock/gmock.h>
-
-#include <iostream>
-
-#include "ie_icore.hpp"
-#include "plugin.hpp"
-
-using namespace MockMultiDevicePlugin;
-namespace MockMultiDevice {
-
-class MockMultiPluginForLoadNetworkWithPropertiesTest : public MultiDeviceInferencePlugin {
-public:
-    MOCK_METHOD((std::string), GetDeviceList, ((const std::map<std::string, std::string>&)), (const, override));
-    MOCK_METHOD(DeviceInformation,
-                SelectDevice,
-                ((const std::vector<DeviceInformation>&), const std::string&, unsigned int),
-                (override));
-    MOCK_METHOD((std::list<DeviceInformation>),
-                GetValidDevice,
-                ((const std::vector<DeviceInformation>&), const std::string&),
-                (override));
-    MOCK_METHOD((std::vector<DeviceInformation>),
-                ParseMetaDevices,
-                (const std::string&, (const std::map<std::string, std::string>&)),
-                (const, override));
-};
-}  // namespace MockMultiDevice

From 8c6910043916d5aa6e5dd17b786eb095854eed62 Mon Sep 17 00:00:00 2001
From: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
Date: Fri, 7 Apr 2023 15:42:51 +0200
Subject: [PATCH 295/296] [GNA] Fix tests which create convolution with stride
 > kernel size on height dimension (#16804)

---
 .../src/subgraph/concat_conv.cpp              |  6 ++++-
 .../src/subgraph/const_conv_concat.cpp        |  6 ++++-
 .../src/subgraph/eltwise_conv_eltwise.cpp     | 25 ++++++++++++++++---
 .../src/subgraph/fc_conv_fc.cpp               | 18 ++++++++++---
 .../subgraph/scaleshift_conv_scaleshift.cpp   | 12 +++++++--
 .../src/subgraph/split_conv.cpp               |  6 ++++-
 .../src/subgraph/stridedslice_conv.cpp        |  6 ++++-
 7 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/src/tests/functional/shared_test_classes/src/subgraph/concat_conv.cpp b/src/tests/functional/shared_test_classes/src/subgraph/concat_conv.cpp
index 1f1a17cc8df1a6..8461ed8745807b 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/concat_conv.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/concat_conv.cpp
@@ -74,7 +74,11 @@ void ConcatConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/const_conv_concat.cpp b/src/tests/functional/shared_test_classes/src/subgraph/const_conv_concat.cpp
index 1d223398777582..1d2aa820897e96 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/const_conv_concat.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/const_conv_concat.cpp
@@ -69,7 +69,11 @@ void ConstConvConcatTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  0.0f, 0.1f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/eltwise_conv_eltwise.cpp b/src/tests/functional/shared_test_classes/src/subgraph/eltwise_conv_eltwise.cpp
index 662d39214235a3..7b5e4bf595f2c8 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/eltwise_conv_eltwise.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/eltwise_conv_eltwise.cpp
@@ -69,7 +69,11 @@ void EltwiseAfterConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -157,7 +161,11 @@ void EltwiseBeforeConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterReshape = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -230,7 +238,12 @@ void EltwiseWithTwoConvsAsInputsTest::SetUp() {
 
     auto filterWeights1 = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                   -0.2f, 0.2f);
-    auto conv1 = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto stride_h = kernelShape[0] > 1 ? stride : 1;
+    auto conv1 = ngraph::builder::makeConvolution(reshape1,
+                                                  ngPrc,
+                                                  {kernelShape[0], kernelShape[1]},
+                                                  {stride_h, stride},
+                                                  {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights1);
 
     auto widthAfterReshape = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -243,7 +256,11 @@ void EltwiseWithTwoConvsAsInputsTest::SetUp() {
 
     auto filterWeights2 = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                   -0.2f, 0.2f);
-    auto conv2 = ngraph::builder::makeConvolution(reshape3, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv2 = ngraph::builder::makeConvolution(reshape3,
+                                                  ngPrc,
+                                                  {kernelShape[0], kernelShape[1]},
+                                                  {stride_h, stride},
+                                                  {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights2);
 
     auto reshapePattern4 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/fc_conv_fc.cpp b/src/tests/functional/shared_test_classes/src/subgraph/fc_conv_fc.cpp
index bedf017e59cc9d..57ae619f7e122c 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/fc_conv_fc.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/fc_conv_fc.cpp
@@ -69,7 +69,11 @@ void FcAfterConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.1f, 0.1f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -155,7 +159,11 @@ void FcBeforeConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.1f, 0.1f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -228,7 +236,11 @@ void FcBetweenConvsTest::SetUp() {
 
     auto filter1Weights = CommonTestUtils::generate_float_numbers(outputChannels * conv1InputShape[1] * kernelShape[0] * kernelShape[1],
                                                                   -0.2f, 0.2f);
-    auto conv1 = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv1 = ngraph::builder::makeConvolution(reshape1,
+                                                  ngPrc,
+                                                  {kernelShape[0], kernelShape[1]},
+                                                  {kernelShape[0] > 1 ? stride : 1, stride},
+                                                  {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filter1Weights);
 
     auto widthAfterConv1 = (conv1InputShape[3] - kernelShape[1]) / stride + 1;
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/scaleshift_conv_scaleshift.cpp b/src/tests/functional/shared_test_classes/src/subgraph/scaleshift_conv_scaleshift.cpp
index 8466679a8cacdb..9c7a27e16bfeae 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/scaleshift_conv_scaleshift.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/scaleshift_conv_scaleshift.cpp
@@ -69,7 +69,11 @@ void ScaleShiftAfterConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
@@ -165,7 +169,11 @@ void ScaleShiftBeforeConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.1f, 0.1f);
-    auto conv = ngraph::builder::makeConvolution(reshape2, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape2,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterReshape = (convInputShape[3] - kernelShape[1]) / stride + 1;
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/split_conv.cpp b/src/tests/functional/shared_test_classes/src/subgraph/split_conv.cpp
index f29ee755b1164e..b48f3837b545ec 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/split_conv.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/split_conv.cpp
@@ -74,7 +74,11 @@ void SplitConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * convInputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(reshape1, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(reshape1,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0]  > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     auto widthAfterConv = (convInputShape[3] - kernelShape[1]) / stride + 1;
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/stridedslice_conv.cpp b/src/tests/functional/shared_test_classes/src/subgraph/stridedslice_conv.cpp
index 4ac59c4d6f49e9..a0ef6ac142eada 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/stridedslice_conv.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/stridedslice_conv.cpp
@@ -66,7 +66,11 @@ void SliceConvTest::SetUp() {
 
     auto filterWeights = CommonTestUtils::generate_float_numbers(outputChannels * inputShape[1] * kernelShape[0] * kernelShape[1],
                                                                  -0.2f, 0.2f);
-    auto conv = ngraph::builder::makeConvolution(ss, ngPrc, { kernelShape[0], kernelShape[1] }, { stride, stride }, { 0, 0 },
+    auto conv = ngraph::builder::makeConvolution(ss,
+                                                 ngPrc,
+                                                 {kernelShape[0], kernelShape[1]},
+                                                 {kernelShape[0] > 1 ? stride : 1, stride},
+                                                 {0, 0},
         { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, outputChannels, false, filterWeights);
 
     function = std::make_shared<ngraph::Function>(conv, params, "StridedSliceConvTest");

From e25ff6a9dc5eac48ce2dad9ddd0e1912a0f3031a Mon Sep 17 00:00:00 2001
From: ynimmaga <yamini.nimmagadda@intel.com>
Date: Mon, 10 Apr 2023 10:35:44 -0700
Subject: [PATCH 296/296] Added environment variables for pytorch tracing mode
 and openvino device

---
 .../src/openvino/frontend/pytorch/decoder.py       |  3 +++
 .../frontend/pytorch/torchdynamo/backend.py        | 14 ++++++++++++--
 .../frontend/pytorch/torchdynamo/compile.py        | 12 ++++++++++--
 src/frontends/pytorch/src/translate_session.cpp    | 14 ++++++++++----
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
index ab51b9478c8964..e1af35db27da82 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
@@ -471,6 +471,9 @@ def input(self, index):  # TODO: remove
     def get_input_debug_name(self, index):
         return "input"+str(index)
 
+    def get_input_signature_name(self, index: int) -> str:
+        return self.get_input_debug_name(index)
+
     def get_input_shape(self, index):
         print(f'[ FX DECODER DEBUG ] Decoder method called: {inspect.currentframe().f_code.co_name}')
         if index < len(self.input_shapes):
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
index 7f84127a374b0c..b0cbee0605afff 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
@@ -2,6 +2,7 @@
 from functools import partial
 
 import torch
+import os
 from torch._dynamo.backends.common import aot_autograd, mem_efficient_fusion_kwargs
 from torch._dynamo.backends.common import fake_tensor_unsupported
 from torch._dynamo.backends.registry import register_backend
@@ -18,7 +19,9 @@
 @register_backend
 @fake_tensor_unsupported
 def openvino(subgraph, example_inputs):
-    return fx_openvino(subgraph, example_inputs)
+    if(os.getenv("PYTORCH_TRACING_MODE")=="TORCHFX"):  
+    	return fx_openvino(subgraph, example_inputs)
+    return ts_openvino(subgraph, example_inputs)
 
 def ts_openvino(subgraph, example_inputs):
     try:
@@ -48,7 +51,12 @@ def ts_openvino(subgraph, example_inputs):
             om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
         om.validate_nodes_and_infer_types()
 
-        compiled_model = core.compile_model(om, 'CPU')
+        device = 'CPU'
+        if os.getenv("OPENVINO_DEVICE") is not None:
+            device = os.getenv("OPENVINO_DEVICE")
+            assert device in core.available_devices, "Specified device " + device + " is not in the list of OpenVINO Available Devices"
+            
+        compiled_model = core.compile_model(om, device)
 
         def _call(*args):
             ov_inputs = [a.detach().cpu().numpy() for a in args]
@@ -60,6 +68,7 @@ def _call(*args):
             return result
         return _call
     except Exception as e:
+        print(str(e))
         return subgraph
 
 
@@ -76,4 +85,5 @@ def _call(*args):
             return res
         return _call
     except Exception as e:
+        print(str(e))
         return subgraph
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
index 34147670f445bb..c29030bb450b43 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
@@ -4,6 +4,7 @@
 from types import MappingProxyType
 from warnings import warn
 
+import os
 import torch
 import torch.overrides
 
@@ -48,12 +49,19 @@ def openvino_compile(gm: GraphModule, *args):
         torch.bool: Type.boolean
     }
 
-    for idx, input_data in enumerate(args): #subgraph.example_inputs):
+    for idx, input_data in enumerate(args): 
         om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
         om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
     om.validate_nodes_and_infer_types()
 
     core = Core()
-    compiled = core.compile_model(om, "CPU")
+    
+    device = 'CPU'
+
+    if os.getenv("OPENVINO_DEVICE") is not None:
+        device = os.getenv("OPENVINO_DEVICE")
+        assert device in core.available_devices, "Specified device " + device + " is not in the list of OpenVINO Available Devices"
+
+    compiled = core.compile_model(om, device)
     print("!!Returning compiled model!!")
     return compiled
diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp
index 047f216e786ba6..f8b50ee6cde437 100644
--- a/src/frontends/pytorch/src/translate_session.cpp
+++ b/src/frontends/pytorch/src/translate_session.cpp
@@ -122,12 +122,12 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
             for (size_t i = 0; i < inlined_inputs.size(); ++i) {
                 size_t fw_tensor_id = inlined_nodes_counter;
                 ++inlined_nodes_counter;     // TODO: Make sure that Decoder side use the same increment for producing ids
-                if (tensor_map.find(fw_tensor_id) != tensor_map.end()) {
+                if (tensor_map->find(fw_tensor_id) != tensor_map->end()) {
                     throw std::runtime_error("Duplicated producer for PT value with unique ID: " +
                                              std::to_string(fw_tensor_id) + " produced by inlined_inputs");
                 }
 
-                tensor_map[fw_tensor_id] = inlined_inputs[i];
+                (*tensor_map)[fw_tensor_id] = inlined_inputs[i];
             }  
 
             auto raw_inputs = node->inputs();
@@ -138,9 +138,15 @@ std::shared_ptr<Model> TranslateSession::convert_pytorch_model(
                     // Linkage to external scope will be performed on the level of the parent operation (if or loop)
                     // TODO: Eliminate duplication with the main code for Parameters creation
                     PartialShape ps = node->get_input_shape(i);
-                    auto type = simplified_type_interpret(node->get_input_type(i)).as<element::Type>();
+                  
+                    auto type = simplified_type_interpret(node->get_input_type(i));
                     // TODO: Use special API to set custom type specification
-                    auto parameter = std::make_shared<v0::Parameter>(type, ps);
+                    std::shared_ptr<v0::Parameter> parameter;
+                    // TODO: Use decoder type or explore adding the missing cast types to Torchscript path
+                    if (std::strcmp(std::getenv("PYTORCH_TRACING_MODE"),"TORCHFX") == 0)
+                        parameter = std::make_shared<v0::Parameter>(type.as<element::Type>(), ps);
+                    else
+                        parameter = std::make_shared<v0::Parameter>(element::dynamic, ps);
                     // TODO: Missing get_input_transpose_order handling for not trivial layouts
                     (*tensor_map)[input] = parameter;
                     // set name of parameter to the index of node in the model