diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml
index fce8fdddcc4f91..aba7016ed65c93 100644
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@@ -112,7 +112,7 @@ jobs:
 
   - script: |
       source $(INSTALL_DIR)/bin/setupvars.sh
-      echo "2021.2" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt
+      echo "2021.4" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt
       CXXFLAGS="-Wno-error=deprecated-declarations" ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR)
     workingDirectory: $(ONNXRUNTIME_REPO_DIR)
     displayName: 'Build ONNX Runtime'
diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml
index 04d4c16ea23344..90fc812bbaa36c 100644
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@@ -87,9 +87,6 @@ jobs:
       export PATH="/usr/local/opt/cython/bin:$PATH"
       export CC=gcc
       export CXX=g++
-      # Disable errors with Ninja
-      export CXXFLAGS="-Wno-error=unused-command-line-argument"
-      export CFLAGS="-Wno-error=unused-command-line-argument"
       cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
     workingDirectory: $(BUILD_DIR)
     displayName: 'CMake'
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
new file mode 100644
index 00000000000000..f02d50499fd857
--- /dev/null
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
@@ -0,0 +1,15 @@
+# Converting RetinaNet Model from TensorFlow* to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow}
+
+This tutorial explains how to convert RetinaNet model to the Intermediate Representation (IR).
+
+[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow\* weights. 
+To convert this model to the TensorFlow\* format, you can use [Reproduce Keras* to TensorFlow* Conversion tutorial](https://docs.openvinotoolkit.org/latest/omz_models_model_retinanet_tf.html).
+
+After you convert the model to TensorFlow* format, run the Model Optimizer command below:
+```sh
+python mo.py --input "input_1[1 1333 1333 3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --data_type FP32 --transformations_config ./extensions/front/tf/retinanet.json
+```
+
+Where `transformations_config` command-line parameter specifies the configuration json file containing model conversion hints for the Model Optimizer.
+The json file contains some parameters that need to be changed if you train the model yourself. It also contains information on how to match endpoints
+to replace the subgraph nodes. After the model is converted to IR, the output nodes will be replaced with DetectionOutput layer.
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
index 19a87a1e11e97c..184c2ea11ecba6 100644
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@@ -34,6 +34,7 @@ limitations under the License.
                             <tab type="user" title="Converting DeepSpeech Model from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow"/>
                             <tab type="user" title="Converting Language Model on One Billion Word Benchmark from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow"/>
                             <tab type="user" title="Converting TensorFlow* Object Detection API Models" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models"/>
+                            <tab type="user" title="Converting RetinaNet Model from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow"/>
                             <tab type="user" title="Converting TensorFlow*-Slim Image Classification Model Library Models" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models"/>
                             <tab type="user" title="Converting CRNN Model from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow"/>
                             <tab type="user" title="Converting GNMT from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow"/>
@@ -176,6 +177,7 @@ limitations under the License.
                 <tab type="user" title="HSigmoid-5" url="@ref openvino_docs_ops_activation_HSigmoid_5"/>
                 <tab type="user" title="HSwish-4" url="@ref openvino_docs_ops_activation_HSwish_4"/>
                 <tab type="user" title="IDFT-7" url="@ref openvino_docs_ops_signals_IDFT_7"/>
+                <tab type="user" title="If-8" url="@ref openvino_docs_ops_condition_If_8"/>
                 <tab type="user" title="Interpolate-1" url="@ref openvino_docs_ops_image_Interpolate_1"/>
                 <tab type="user" title="Interpolate-4" url="@ref openvino_docs_ops_image_Interpolate_4"/>
                 <tab type="user" title="LRN-1" url="@ref openvino_docs_ops_normalization_LRN_1"/>
diff --git a/docs/ops/condition/If_8.md b/docs/ops/condition/If_8.md
new file mode 100644
index 00000000000000..7de2449b1eada1
--- /dev/null
+++ b/docs/ops/condition/If_8.md
@@ -0,0 +1,226 @@
+## If <a name="If"></a> {#openvino_docs_ops_infrastructure_If_8}
+
+**Versioned name**: *If-8*
+
+**Category**: Infrastructure
+
+**Short description**: *If* operation contains two internal networks(subgraphs) such as `then_body` and `else_body`, 
+and performs one of them depending on `cond` value. If `cond` is  `True`, `then_body` is executed. If `cond` is  `False`, 
+the operation executes the `else_body` subgraph. 
+
+**Detailed description**
+
+*If* must not contain empty subgraphs. Each of them must have at least one operation `Result`. 
+Also the number of outputs from *If* always must be greater than zero and equal to the number of outputs from each subgraph.
+
+**If attributes**:
+
+* **Subgraphs**:
+
+    `then_body`/`else_body` are subgraphs that are executed depending on the `cond` value. 
+    The subgraph is described operation by operation as a typical IR network. 
+    The subgraph has inputs (`Parameter` operations) and outputs (`Result` operations).
+    
+    * **Subgraph's inputs** - inputs to the subgraph which associated with *If* inputs via *port_map*. 
+    The subgraph can have any number of inputs (even zero).
+    
+    * **Subgraph's outputs** - outputs from the subgraph which associated with *If* outputs via *port_map*.
+    The subgraph must contain at least one output. Each *If* output is associated with one output from the subgraph. 
+    Therefore the number of `then_body` outputs is equal to the number of outputs from *If* and 
+    the number of `else_body` outputs.
+    The type of the subgraph output and the type of the associated output from *If* must be equal.
+    
+
+* **Port maps**:
+    
+    *port_map* is a set of rules to map input or output data tensors of *If* operation onto the subgraph data tensors. 
+    The `port_map` entries can be `input` and `output`. Each entry describes a corresponding mapping rule. 
+    *If* has two *port_maps*: `then_port_map` for `then_body` and `else_port_map` for `else_body`.
+
+    * **Port map attributes**:
+
+        * *external_port_id*
+            * **Description**: *external_port_id* is a port ID of *If* operation.
+            * **Range of values**: IDs of the *If* inputs and outputs
+            * **Type**: `unsigned int`
+            * **Default value**: None
+            * **Required**: *yes*
+
+        * *internal_layer_id*
+
+            * **Description**: *internal_layer_id* is a `Parameter` or `Result` operation ID inside 
+            the subgraph to map to.
+            * **Range of values**: IDs of the `Parameter` or `Result` operations in the subgraph
+            * **Type**: `unsigned int`
+            * **Default value**: None
+            * **Required**: *yes* 
+
+**If Inputs**
+
+
+* **cond**: A scalar or 1D tensor with 1 element of `boolean` type specifying which subgraph to execute. 
+`True` value means to execute the `then_body`, `False` - `else_body`. *Required*.
+
+* **Multiple other inputs**: Tensors of different types and shapes. *Optional*.
+
+**If Outputs**
+
+* **Multiple outputs**: Results of execution of one of the subgraph. Tensors of any type and shape.
+
+
+**Body Inputs**
+
+* **Multiple inputs**: Tensors of different types and shapes. *Optional*.
+
+
+**Body Outputs**
+
+* **Multiple outputs**: Results of execution of the subgraph. Tensors of any type and shape.
+
+
+**Examples**
+
+*Example 1: a typical If structure*
+```xml
+    <layer id="6" name="if/cond" type="If" version="opset8">
+        <input>
+            <port id="0"/>
+            <port id="1">
+                <dim>2</dim>
+                <dim>4</dim>
+            </port>
+            <port id="2">
+                <dim>2</dim>
+                <dim>4</dim>
+            </port>
+            <port id="3">
+                <dim>2</dim>
+                <dim>4</dim>
+            </port>
+        </input>
+        <output>
+            <port id="4" names="if/cond/Identity:0,if/cond:0" precision="FP32">
+                <dim>2</dim>
+                <dim>4</dim>
+            </port>
+        </output>
+        <then_port_map>
+            <input external_port_id="1" internal_layer_id="0"/>
+            <input external_port_id="2" internal_layer_id="1"/>
+            <output external_port_id="0" internal_layer_id="3"/>
+        </then_port_map>
+        <else_port_map>
+            <input external_port_id="1" internal_layer_id="0"/>
+            <input external_port_id="3" internal_layer_id="1"/>
+            <output external_port_id="0" internal_layer_id="3"/>
+        </else_port_map>
+        <then_body>
+            <layers>
+                <layer id="0" name="add_x" type="Parameter" version="opset1">
+                    <data element_type="f32" shape="2,4"/>
+                    <output>
+                        <port id="0" names="add_x:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="1" name="add_z" type="Parameter" version="opset1">
+                    <data element_type="f32" shape="2,4"/>
+                    <output>
+                        <port id="0" names="add_z:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="2" name="Add" type="Add" version="opset1">
+                    <data auto_broadcast="numpy"/>
+                    <input>
+                        <port id="0">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                        <port id="1">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </input>
+                    <output>
+                        <port id="2" names="Add:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
+                    <input>
+                        <port id="0">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </input>
+                </layer>
+            </layers>
+            <edges>
+                <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+                <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+                <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+            </edges>
+        </then_body>
+        <else_body>
+            <layers>
+                <layer id="0" name="add_x" type="Parameter" version="opset1">
+                    <data element_type="f32" shape="2,4"/>
+                    <output>
+                        <port id="0" names="add_x:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="1" name="add_w" type="Parameter" version="opset1">
+                    <data element_type="f32" shape="2,4"/>
+                    <output>
+                        <port id="0" names="add_w:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="2" name="Add" type="Add" version="opset1">
+                    <data auto_broadcast="numpy"/>
+                    <input>
+                        <port id="0">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                        <port id="1">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </input>
+                    <output>
+                        <port id="2" names="Add:0" precision="FP32">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
+                    <input>
+                        <port id="0">
+                            <dim>2</dim>
+                            <dim>4</dim>
+                        </port>
+                    </input>
+                </layer>
+            </layers>
+            <edges>
+                <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+                <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+                <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+            </edges>
+        </else_body>
+    </layer>
+```
diff --git a/docs/ops/condition/Select_1.md b/docs/ops/condition/Select_1.md
index 8f51624961078e..56e5fde8eab790 100644
--- a/docs/ops/condition/Select_1.md
+++ b/docs/ops/condition/Select_1.md
@@ -17,26 +17,31 @@
 
   * **Description**: specifies rules used for auto-broadcasting of input tensors.
   * **Range of values**:
-    * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
-  * **Type**: string
+    * *none* - no auto-broadcasting is allowed, all input shapes must match
+    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md)
+    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md)
+  * **Type**: `string`
   * **Default value**: "numpy"
   * **Required**: *no*
 
 
 **Inputs**:
 
-* **1**: `cond` tensor with selection mask of type `boolean`. The tensor can be 0D.
+* **1**: `cond` - tensor of type *T_COND* and arbitrary shape with selection mask. **Required**.
 
-* **2**: `then` the tensor with elements to take where the corresponding element in `cond` is true. Arbitrary type that should match type of `else` input tensor.
+* **2**: `then` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `true`. **Required**.
 
-* **3**: `else` the tensor with elements to take where the corresponding element in `cond` is false. Arbitrary type that should match type of `then` input tensor.
+* **3**: `else` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `false`. **Required**.
 
 
 **Outputs**:
 
 * **1**: blended output tensor that is tailored from values of inputs tensors `then` and `else` based on `cond` and broadcasting rules. It has the same type of elements as `then` and `else`.
 
+**Types**
+
+* *T_COND*: `boolean` type.
+* *T*: any supported numeric type.
 
 **Example**
 
diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md
index 02e97eab4e42f6..42d815e7c854ef 100644
--- a/docs/ops/opset8.md
+++ b/docs/ops/opset8.md
@@ -79,6 +79,7 @@ declared in `namespace opset8`.
 * [HSigmoid](activation/HSigmoid_5.md)
 * [HSwish](activation/HSwish_4.md)
 * [IDFT](signals/IDFT_7.md)
+* [If](condition/If_8.md)
 * [Interpolate](image/Interpolate_4.md)
 * [Less](comparison/Less_1.md)
 * [LessEqual](comparison/LessEqual_1.md)
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index ffbfc0a1a14160..bf3acd4d466475 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -120,11 +120,12 @@ ie_faster_build(${TARGET_NAME}_obj
 )
 
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API
-                                                      $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_COMPILE_DEFINITIONS>)
+                                                      $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_COMPILE_DEFINITIONS>
+                                                      $<TARGET_PROPERTY:ngraph::frontend_manager::static,INTERFACE_COMPILE_DEFINITIONS>)
 
 target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
                                                              $<TARGET_PROPERTY:pugixml::static,INTERFACE_INCLUDE_DIRECTORIES>
-                                                             $<TARGET_PROPERTY:ngraph::frontend_manager,INTERFACE_INCLUDE_DIRECTORIES>
+                                                             $<TARGET_PROPERTY:ngraph::frontend_manager::static,INTERFACE_INCLUDE_DIRECTORIES>
                                                              $<TARGET_PROPERTY:xbyak,INTERFACE_INCLUDE_DIRECTORIES>)
 
 target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
@@ -161,7 +162,7 @@ if (TBBBIND_2_4_FOUND)
 endif()
 
 target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMAKE_DL_LIBS} Threads::Threads
-                                             ngraph ngraph::frontend_manager inference_engine_transformations)
+                                             ngraph ngraph::frontend_manager::static inference_engine_transformations)
 
 target_include_directories(${TARGET_NAME} INTERFACE
             $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
@@ -201,7 +202,7 @@ if(WIN32)
     set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s)
 endif()
 
-target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph ngraph::frontend_manager
+target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph ngraph::frontend_manager::static
                                                inference_engine_transformations pugixml::static)
 
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 49b89ee339a89f..be9bfa0d1cbc20 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -26,6 +26,7 @@
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
 #include <transformations/common_optimizations/softmax_fusion.hpp>
+#include <transformations/common_optimizations/normalize_l2_fusion.hpp>
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
@@ -87,6 +88,7 @@
 
 #include "nodes/mkldnn_mvn_node.h"
 #include "nodes/mkldnn_fake_quantize_node.h"
+#include "nodes/mkldnn_normalize_node.h"
 #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
 
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
@@ -277,6 +279,13 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
                 return node->input_value(0).get_partial_shape().rank().get_length() > 5;
             });
 
+    auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool {
+        std::string errorMsg;
+        return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
+    };
+    pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithAdd>(normalizeL2FusionCallback);
+    pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithMax>(normalizeL2FusionCallback);
+
     // List of enabled/disabled transformations
     pass_config->disable<ngraph::pass::ConvertGELU>();
     pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
index ff95f416573a25..2da3ae8f330064 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@@ -660,7 +660,7 @@ MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr<ngraph::Node>
     }
 }
 
-bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
         const auto norm = std::dynamic_pointer_cast<const ngraph::op::v0::NormalizeL2>(op);
         if (!norm) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
index bcb7b0d8d491f2..6b6a62bf42c418 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
@@ -84,7 +84,7 @@ class MKLDNNNormalizeL2Node : public MKLDNNNode {
         return false;
     }
 
-    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     bool canFuse(const MKLDNNNodePtr& node) const override;
 
 private:
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp
index 905356b4d5fd7a..22aac2e1c71d33 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp
@@ -25,10 +25,10 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() {
     auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
     auto axes = ngraph::pattern::wrap_type<ngraph::opset4::Constant>();
     auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes);
-    auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
     auto eps_const = ngraph::pattern::wrap_type<ngraph::opset4::Constant>();
-    auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-    auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+    auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+    auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+    auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
     ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) {
         auto& pattern_to_output = m.get_pattern_value_map();
@@ -52,12 +52,14 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() {
         const auto eps_attr_value = eps_attr->cast_vector<float>()[0];
 
         auto normalize_l2 = std::make_shared<ngraph::opset4::NormalizeL2>(data_input, axes_input, eps_attr_value, op::EpsMode::MAX);
+        if (transformation_callback(normalize_l2))
+            return false;
 
         normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name());
         ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(),
                                    pattern_to_output.at(reduce_sum).get_node_shared_ptr(),
                                    pattern_to_output.at(sqrt).get_node_shared_ptr(),
-                                   pattern_to_output.at(sqrt_max_eps).get_node_shared_ptr(),
+                                   pattern_to_output.at(max).get_node_shared_ptr(),
                                    pattern_to_output.at(divide).get_node_shared_ptr()
                                    },
                                    normalize_l2);
@@ -79,10 +81,10 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() {
     auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
     auto axes = ngraph::pattern::wrap_type<ngraph::opset4::Constant>();
     auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes);
-    auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
     auto eps_const = ngraph::pattern::wrap_type<ngraph::opset4::Constant>();
-    auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-    auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+    auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+    auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+    auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
     ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
         auto& pattern_to_output = m.get_pattern_value_map();
@@ -106,12 +108,14 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() {
         const auto eps_attr_value = op::util::has_constant_value<float>(exp_input, 2.0f);
 
         auto normalize_l2 = std::make_shared<ngraph::opset4::NormalizeL2>(data_input, axes_input, eps_attr_value, op::EpsMode::ADD);
+        if (transformation_callback(normalize_l2))
+            return false;
 
         normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name());
         ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(),
                                    pattern_to_output.at(reduce_sum).get_node_shared_ptr(),
                                    pattern_to_output.at(sqrt).get_node_shared_ptr(),
-                                   pattern_to_output.at(sqrt_add_eps).get_node_shared_ptr(),
+                                   pattern_to_output.at(add).get_node_shared_ptr(),
                                    pattern_to_output.at(divide).get_node_shared_ptr()
                                    },
                                    normalize_l2);
diff --git a/inference-engine/tests/functional/inference_engine/transformations/normalize_l2_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/normalize_l2_fusion_test.cpp
index 822ff62cfc173a..f1d496013a407c 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/normalize_l2_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/normalize_l2_fusion_test.cpp
@@ -27,10 +27,10 @@ TEST(TransformationTests, NormalizeL2FusionWithMax) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {eps_value});
-        auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+        auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -62,10 +62,10 @@ TEST(TransformationTests, NormalizeL2FusionWithMaxIncorrectExp) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {eps_value});
-        auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+        auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -81,10 +81,10 @@ TEST(TransformationTests, NormalizeL2FusionWithMaxIncorrectExp) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {eps_value});
-        auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+        auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
     }
@@ -101,10 +101,10 @@ TEST(TransformationTests, NormalizeL2FusionWithMaxIncorrectEpsValueShape) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{2}, {1, 2});
-        auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+        auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -120,10 +120,10 @@ TEST(TransformationTests, NormalizeL2FusionWithMaxIncorrectEpsValueShape) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{2}, {1, 2});
-        auto sqrt_max_eps = std::make_shared<ngraph::opset4::Maximum>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_max_eps);
+        auto max = std::make_shared<ngraph::opset4::Maximum>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(max);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
     }
@@ -141,10 +141,10 @@ TEST(TransformationTests, NormalizeL2FusionWithAdd) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {eps_value});
-        auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+        auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -176,10 +176,10 @@ TEST(TransformationTests, NormalizeL2FusionWithAddIncorrectExp) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {eps_value});
-        auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+        auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -196,10 +196,10 @@ TEST(TransformationTests, NormalizeL2FusionWithAddIncorrectExp) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {eps_value});
-        auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+        auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
     }
@@ -216,10 +216,10 @@ TEST(TransformationTests, NormalizeL2FusionWithAddIncorrectEpsValueShape) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{2}, {1, 2});
-        auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+        auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
 
@@ -235,10 +235,10 @@ TEST(TransformationTests, NormalizeL2FusionWithAddIncorrectEpsValueShape) {
         auto pow = std::make_shared<ngraph::opset4::Power>(input, exp);
         auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
         auto reduce_sum = std::make_shared<ngraph::opset4::ReduceSum>(pow, axes_const);
-        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(reduce_sum);
         auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{2}, {1, 2});
-        auto sqrt_add_eps = std::make_shared<ngraph::opset4::Add>(sqrt, eps_const);
-        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt_add_eps);
+        auto add = std::make_shared<ngraph::opset4::Add>(reduce_sum, eps_const);
+        auto sqrt = std::make_shared<ngraph::opset4::Sqrt>(add);
+        auto divide = std::make_shared<ngraph::opset4::Divide>(input, sqrt);
 
         f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{divide}, ngraph::ParameterVector{input});
     }
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp
index 437d8737d65bf7..9e9e7796295109 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp
@@ -22,8 +22,8 @@ const std::vector<size_t> groups = {1};
 const std::vector<size_t> defor_groups = {2};
 const std::vector<size_t> numOutChannels = {1, 5};
 const std::vector<size_t> multiple_defor_groups = {4};
-const std::vector<std::vector<size_t>> deform_vals = {{1, 200, 220, 220}};
-const std::vector<std::vector<size_t>> kernel = {{64, 16, 5, 5}};
+const std::vector<std::vector<size_t>> deform_vals = {{1, 72, 64, 64}};
+const std::vector<std::vector<size_t>> kernel = {{16, 16, 3, 3}};
 
 const std::vector<bool> with_bilinear_interpolation_pad = { false, true };
 const std::vector<bool> with_modulated_scalar = { false, true };
@@ -92,7 +92,7 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
         ::testing::Values(InferenceEngine::Layout::ANY),
         ::testing::Values(InferenceEngine::Layout::ANY),
-        ::testing::Values(std::vector<size_t>({1, 16, 224, 224})),
+        ::testing::Values(std::vector<size_t>({1, 16, 66, 66})),
         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
     DeformableConvolutionLayerTest::getTestCaseName);
 
diff --git a/model-optimizer/extensions/back/ReverseInputChannels.py b/model-optimizer/extensions/back/ReverseInputChannels.py
index e8dd267d460422..987347ef6c1b11 100644
--- a/model-optimizer/extensions/back/ReverseInputChannels.py
+++ b/model-optimizer/extensions/back/ReverseInputChannels.py
@@ -94,8 +94,38 @@ class ReverseChannelsPropagationDown(BackReplacementPattern):
 
         'Shape': lambda node, rc: ReverseChannelsPropagationDown.pass_rc_through_shape(node, rc),
         'ShapeOf': lambda node, rc: ReverseChannelsPropagationDown.pass_rc_through_shape(node, rc),
+
+        'Pad': lambda node, rc: ReverseChannelsPropagationDown.pass_rc_through(node, rc),
     }
 
+    @staticmethod
+    def pass_rc_through(node: Node, reverse_channels: Node):
+        r"""
+        BEFORE                          AFTER
+
+          previous_op
+              |
+        ReverseChannels  previous_op     previous_op  previous_op
+                     \     /                      \     /
+                       Node                         Node
+                                                     |
+                                              ReverseChannels
+
+        returns boolean value whatever we should continue propagating current ReverseChannels operation down or not
+        """
+        # detaching reverse_channels node from the graph
+        if reverse_channels.is_in_port_connected(0) and reverse_channels.is_out_port_connected(0)\
+                and node.is_out_port_connected(0):
+            reverse_channels.out_port(0).get_connection().set_source(
+                reverse_channels.in_port(0).get_connection().get_source())
+            reverse_channels.in_port(0).disconnect()
+
+            node.out_port(0).get_connection().set_source(reverse_channels.out_port(0))
+            node.out_port(0).disconnect()
+            node.out_port(0).connect(reverse_channels.in_port(0))
+            return True
+        return False
+
     @staticmethod
     def pass_rc_through_conv(node, reverse_channels):
         r"""
@@ -265,8 +295,39 @@ class ReverseChannelsPropagationUp(BackReplacementPattern):
         'Subtract': lambda node, rc: ReverseChannelsPropagationUp.lift_up_through_eltwise(node, rc),
         'Pow': lambda node, rc: ReverseChannelsPropagationUp.lift_up_through_eltwise(node, rc),
         'Convert': lambda node, rc: ReverseChannelsPropagationUp.lift_up_through_eltwise(node, rc),
+
+        'Pad': lambda node, rc: ReverseChannelsPropagationUp.lift_up_through(node, rc),
     }
 
+    @staticmethod
+    def lift_up_through(node: Node, reverse_channels: Node):
+        r"""
+        BEFORE                       AFTER
+
+                                     previous_op
+                                          \
+        previous_op  previous_op       ReverseChannels  previous_op
+                 \     /                           \     /
+                   Node                             Node
+                    |                                |
+              ReverseChannels                      next_op
+                    |
+                 next_op
+
+        returns boolean value whatever we should continue propagating current ReverseChannels operation up or not
+        """
+        if node.is_in_port_connected(0):
+            node_input_port_0 = node.in_port(0)
+            reverse_channels_out_npde = reverse_channels.out_port(0).get_connection().get_destination().node
+            reverse_channels.out_port(0).disconnect()
+
+            src = node_input_port_0.get_connection().get_source()
+            node_input_port_0.get_connection().set_source(reverse_channels.out_port(0))
+            src.connect(reverse_channels.in_port(0))
+            node.out_port(0).get_connection().set_destination(reverse_channels_out_npde.in_port(0))
+            return True
+        return False
+
     @staticmethod
     def lift_up_through_eltwise(node: Node, reverse_channels: Node):
         r"""
diff --git a/model-optimizer/extensions/back/compress_quantized_weights.py b/model-optimizer/extensions/back/compress_quantized_weights.py
index 62799acc1d1c39..98fbd57f4fd7b2 100644
--- a/model-optimizer/extensions/back/compress_quantized_weights.py
+++ b/model-optimizer/extensions/back/compress_quantized_weights.py
@@ -6,7 +6,8 @@
 import numpy as np
 
 from extensions.ops.Cast import Cast
-from extensions.ops.elementwise import Sub, Div, Mul, Negative
+from extensions.ops.elementwise import Sub, Div, Mul, Negative, Equal
+from extensions.ops.select import Select
 from mo.back.replacement import BackReplacementPattern
 from mo.graph.graph import Graph, Node
 from mo.middle.passes.convert_data_type import data_type_str_to_np, np_data_type_to_destination_type, packed_I4
@@ -70,15 +71,7 @@ class CompressQuantizeWeights(BackReplacementPattern):
             scale = (output_high - output_low) / (input_high - input_low)
                 WARNING: division by zero imposes restriction -- input_high can not be equal to input_low
             zero_point = input_low - output_low / scale
-
-    TODO: steps 5 and 6 are NOT IMPLEMENTED YET
-    TODO: DOES LPT NEED IT???
-    Step 5: Having zero_point == 0 is really beneficial for performance, so we try to fuse Subtract up to the Constant.
-        It is not always possible because of the quantized_dtype possible range of values.
-
-    Step 6: (Optional) From the nature of Subtract and Multiply operations they may be optimized out in cases:
-            zero_point == 0
-            scale == 1
+            NOTE: if scale == 0 than zero_point is equal to zero too (achieved through Select operation)
 
     BENEFITS:
         Such constant data packing reduces IR size (.bin file size)
@@ -186,14 +179,24 @@ def dequantize_data(fake_quantize: Node, dst_type: type, quantized_type: type) -
         descaled_output_low.in_port(0).connect(out_low)
         descaled_output_low.in_port(1).connect(scale.out_port(0))
 
-        shift = Sub(graph, {'name': name + '/zero_point'}).create_node()
+        shift = Sub(graph, {'name': name + '/shift'}).create_node()
         shift.in_port(0).connect(in_low)
         shift.in_port(1).connect(descaled_output_low.out_port(0))
 
+        zero = Const(graph, {'name': name + '/zero', 'value': np.array(0, dtype=dst_type)}).create_node()
+        scale_eq_zero = Equal(graph, {'name': name + '/scale_eq_zero'}).create_node()
+        scale_eq_zero.in_port(0).connect(scale.out_port(0))
+        scale_eq_zero.in_port(1).connect(zero.out_port(0))
+
+        zero_point = Select(graph, {'name': name + '/zero_point'}).create_node()
+        zero_point.in_port(0).connect(scale_eq_zero.out_port(0))
+        zero_point.in_port(1).connect(zero.out_port(0))
+        zero_point.in_port(2).connect(shift.out_port(0))
+
         # DeQuantize(x) == Mul(Sub(x, zero_point), scale)
         sub_zp = Sub(graph, {'name': name + '/minus_zp'}).create_node()
         sub_zp.in_port(0).connect(dequantizing_cast.out_port(0))
-        sub_zp.in_port(1).connect(shift.out_port(0))
+        sub_zp.in_port(1).connect(zero_point.out_port(0))
 
         mul_scale = Mul(graph, {'name': name + '/mulpiply_by_scale'}).create_node()
         mul_scale.in_port(0).connect(sub_zp.out_port(0))
@@ -221,6 +224,12 @@ def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
 
 
 class ZeroPointOptimizer(BackReplacementPattern):
+    r"""
+    Step 1: Having zero_point == 0 is really beneficial for performance, so we try to fuse Subtract up to the Constant.
+        It is not always possible because of the quantized_dtype possible range of values.
+
+    Step 2: From the nature of Subtract operation it may be optimized out if zero_point == 0
+    """
     enabled = True
     force_clean_up = True
 
@@ -249,16 +258,18 @@ def pattern(self):
         )
 
     def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
+        zero_point = match['const_zp'].out_port(0).data.get_value()
+        assert zero_point is not None
+        convert = match['convert']
         sub = match['sub']
-        zero_point = sub.in_port(1).data.get_value()
-        if zero_point is None or np.allclose(zero_point, 0):
+        if np.allclose(zero_point, 0):
+            sub.out_port(0).get_connection().set_source(convert.out_port(0))
             return
 
-        convert = match['convert']
-        dst_type = convert.dst_type
-        weights = convert.in_port(0).data.get_value()
+        weights = match['const'].out_port(0).data.get_value()
         if weights is None or weights.dtype != np.int8:
             return
+        dst_type = convert.dst_type
 
         int8_zero_point = np.round(zero_point).astype(np.int8)
         adj_zero_point = (zero_point - int8_zero_point).astype(dst_type)
@@ -266,8 +277,8 @@ def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
         original = weights.astype(dst_type) - zero_point
         transformed = (weights - int8_zero_point).astype(np.int8) - adj_zero_point
 
-        if not np.allclose(original, transformed) or not np.allclose(adj_zero_point, 0):
+        if not np.allclose(original, transformed) or not np.allclose(adj_zero_point, 0, atol=1.e-04):
             return
 
         match['const_d']['value'] = (weights - int8_zero_point).astype(np.int8)
-        match['const_zp_d']['value'] = np.zeros(adj_zero_point.shape, dst_type)
+        sub.out_port(0).get_connection().set_source(convert.out_port(0))
diff --git a/model-optimizer/extensions/front/tf/pad_tf_to_pad.py b/model-optimizer/extensions/front/tf/pad_tf_to_pad.py
index 42a8de27cf2840..fb423dab124ba7 100644
--- a/model-optimizer/extensions/front/tf/pad_tf_to_pad.py
+++ b/model-optimizer/extensions/front/tf/pad_tf_to_pad.py
@@ -34,12 +34,6 @@ def find_and_replace_pattern(self, graph: Graph):
                 # the input with fill value is an optional third input in TF
                 if not tfpad.in_port(2).disconnected():
                     tfpad.in_port(2).get_connection().set_destination(new_pad.in_port(3))
-                else:
-                    # create Constant node of proper data type (equal to the data type of the Pad first input)
-                    convert_pad_value = create_op_with_const_inputs(graph, ConvertLike, {0: 0.0},
-                                                                    {'name': original_name + '/pad_value_convert'})
-                    convert_pad_value.in_port(1).connect(new_pad.in_port(0).get_source())
-                    new_pad.in_port(3).connect(convert_pad_value.out_port(0))
 
             # convert TF representation of the pads as [N, 2] to MO representation: [N] and [N]
             transposed_pads = create_op_with_const_inputs(graph, Transpose, {1: int64_array([1, 0])})
diff --git a/model-optimizer/mo/utils/check_ie_bindings.py b/model-optimizer/mo/utils/check_ie_bindings.py
index dae7aeb9958b12..fd4732593c1601 100644
--- a/model-optimizer/mo/utils/check_ie_bindings.py
+++ b/model-optimizer/mo/utils/check_ie_bindings.py
@@ -58,6 +58,7 @@ def import_core_modules(silent: bool, path_to_module: str):
 
         import openvino  # pylint: disable=import-error,no-name-in-module
         import ngraph  # pylint: disable=import-error,no-name-in-module
+        import ngraph.frontend  # pylint: disable=import-error,no-name-in-module
 
         if silent:
             return True
diff --git a/model-optimizer/mo/utils/find_ie_version.py b/model-optimizer/mo/utils/find_ie_version.py
index 9251b713ac3cab..59fada431fe58a 100644
--- a/model-optimizer/mo/utils/find_ie_version.py
+++ b/model-optimizer/mo/utils/find_ie_version.py
@@ -26,10 +26,10 @@ def setup_env(module="", libs=[]):
     :param module: path to python module
     :param libs: list with paths to libraries
     """
-    os.environ[python_path_key] = os.pathsep.join([os.environ[python_path_key], module])
-    os.environ[lib_env_key] = os.pathsep.join([os.environ[lib_env_key], *libs])
+    os.environ[python_path_key] = os.pathsep.join([module, os.environ[python_path_key]])
+    os.environ[lib_env_key] = os.pathsep.join([*libs, os.environ[lib_env_key]])
     if not os.getenv("OV_FRONTEND_PATH"):
-        os.environ["OV_FRONTEND_PATH"] = os.pathsep.join([os.environ[lib_env_key], *libs])
+        os.environ["OV_FRONTEND_PATH"] = os.pathsep.join([*libs, os.environ[lib_env_key]])
 
 
 def reset_env():
diff --git a/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py b/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py
index 057f84010e3247..8ac90c8708fdaf 100644
--- a/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py
+++ b/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py
@@ -3,9 +3,10 @@
 
 import unittest
 
-from extensions.back.ReverseInputChannels import ReverseChannelsPropagationUp
+from extensions.back.ReverseInputChannels import ReverseChannelsPropagationUp, ReverseChannelsPropagationDown
 from mo.graph.graph import Node, Graph
-from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data
+from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data, valued_const_with_data
+from mo.front.common.partial_infer.utils import int64_array, float32_array
 
 nodes = {
     **regular_op_with_shaped_data('placeholder1', [1, 3, 10, 10], {'type': 'Parameter'}),
@@ -14,10 +15,25 @@
     **regular_op_with_shaped_data('mul', [1, 3, 10, 10], {'type': 'Multiply'}),
     **regular_op_with_shaped_data('reverse_channels', [1, 3, 10, 10], {'type': 'ReverseChannels', 'axis': 1}),
 
+
+    **regular_op_with_shaped_data('pad', [1, 3, 10, 10], {'type': 'Pad'}),
+
     **result('result'),
 }
 
 
+nodes2 = {
+    **regular_op_with_shaped_data('placeholder', [1, 3, 10, 10], {'type': 'Parameter'}),
+
+    **valued_const_with_data('mul_const', float32_array([-127.5, -127.5, -127.5])),
+    **regular_op_with_shaped_data('mul', [1, 3, 10, 10], {'type': 'Multiply'}),
+    **valued_const_with_data('pad_const_1', int64_array([0, 0, 0, 0])),
+    **valued_const_with_data('pad_const_2', int64_array([0, 0, 1, 1])),
+    **regular_op_with_shaped_data('pad', [1, 3, 10, 10], {'type': 'Pad'}),
+    **regular_op_with_shaped_data('reverse_channels', [1, 3, 10, 10], {'type': 'ReverseChannels', 'axis': 1}),
+    **result('result'),
+}
+
 class ReverseInputChannelsTest(unittest.TestCase):
     def check_graph_attrs(self, graph: Graph, parameter_node_names: list):
         for node in graph.get_op_nodes():
@@ -47,3 +63,30 @@ def test_lift_up_through_eltwise(self):
 
         ReverseChannelsPropagationUp.lift_up_through_eltwise(node, reverse_channels)
         self.check_graph_attrs(graph, ['placeholder1', 'placeholder2'])
+
+    def test_lift_up_through(self):
+        graph = build_graph(nodes2, [*connect('placeholder', '0:mul'), *connect('mul_const', '1:mul'),
+                                     *connect('mul', '0:pad'), *connect('pad_const_1', '1:pad'),
+                                     *connect('pad_const_2', '2:pad'), *connect('pad', 'reverse_channels'),
+                                     *connect('reverse_channels', 'result')])
+        self.set_graph_attrs(graph, ['placeholder'])
+
+        node = Node(graph, 'pad')
+        reverse_channels = Node(graph, 'reverse_channels')
+
+        ReverseChannelsPropagationUp.lift_up_through(node, reverse_channels)
+        self.check_graph_attrs(graph, ['placeholder'])
+
+
+    def test_pass_rc_through(self):
+        graph = build_graph(nodes2, [*connect('placeholder', '0:mul'), *connect('mul_const', '1:mul'),
+                                     *connect('mul', 'reverse_channels'),  *connect('reverse_channels', '0:pad'),
+                                     *connect('pad_const_1', '1:pad'), *connect('pad_const_2', '2:pad'),
+                                     *connect('pad', 'result')])
+        self.set_graph_attrs(graph, ['placeholder'])
+
+        node = Node(graph, 'pad')
+        reverse_channels = Node(graph, 'reverse_channels')
+
+        ReverseChannelsPropagationDown.pass_rc_through(node, reverse_channels)
+        self.check_graph_attrs(graph, ['placeholder'])
diff --git a/model-optimizer/unit_tests/extensions/back/compress_quantized_weights_test.py b/model-optimizer/unit_tests/extensions/back/compress_quantized_weights_test.py
index 5e4aa87b525883..45d977beb55da0 100644
--- a/model-optimizer/unit_tests/extensions/back/compress_quantized_weights_test.py
+++ b/model-optimizer/unit_tests/extensions/back/compress_quantized_weights_test.py
@@ -254,10 +254,42 @@ class ZeroPointOptimizerTestClass(unittest.TestCase):
     @generate(*[
         ([-10, 7], [-1], [-9, 8], [0]),
         ([-10, 7], [-0.99999999], [-9, 8], [0]),
+    ])
+    def test_zero_point_optimization(self, weights, zero_point, adj_weights, adj_zero_point):
+        nodes = lambda w, zp: {
+            **valued_const_with_data('weights', np.array(w, dtype=np.int8)),
+            **regular_op_with_shaped_data(
+                'cast', len(w), {'type': 'Convert', 'op': 'Cast', 'infer': Cast.infer, 'dst_type': np.float32}),
+            **valued_const_with_data('zp', np.array(zp, dtype=np.float32)),
+            **regular_op_with_shaped_data(
+                'sub', len(w),
+                {'type': 'Subtract', 'op': 'Sub', 'infer': lambda node: eltwise_infer(node, Sub.operation)}),
+            **result()
+        }
+        edges = [
+            *connect("weights:0", "0:cast"),
+            *connect("cast:0", "0:sub"),
+            *connect("zp:0", "1:sub"),
+            *connect("sub:0", "0:output"),
+        ]
+        graph = build_graph(nodes(weights, zero_point), edges, nodes_with_edges_only=True)
+        ZeroPointOptimizer().find_and_replace_pattern(graph)
+        graph.clean_up()
+
+        graph_ref = build_graph(nodes(adj_weights, adj_zero_point), [
+            *connect("weights:0", "0:cast"),
+            *connect("cast:0", "0:output"),
+        ], nodes_with_edges_only=True)
+        graph_ref.clean_up()
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    @generate(*[
         ([-128, 7], [1], [-128, 7], [1]),
         ([127, 7], [-1], [127, 7], [-1]),
     ])
-    def test_zero_point_optimization(self, weights, zero_point, adj_weights, adj_zero_point):
+    def test_negative_zero_point_optimization(self, weights, zero_point, adj_weights, adj_zero_point):
         nodes = lambda w, zp: {
             **valued_const_with_data('weights', np.array(w, dtype=np.int8)),
             **regular_op_with_shaped_data(
diff --git a/model-optimizer/unit_tests/extensions/front/tf/pad_tf_to_pad_test.py b/model-optimizer/unit_tests/extensions/front/tf/pad_tf_to_pad_test.py
index a8201d44917b76..e9e47cd2ac0d15 100644
--- a/model-optimizer/unit_tests/extensions/front/tf/pad_tf_to_pad_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/pad_tf_to_pad_test.py
@@ -74,9 +74,7 @@ def test_2_inputs(self):
                             {}, nodes_with_edges_only=True)
         graph.get_op_nodes(op='TFPad')[0].add_input_port(2)
 
-        graph_ref = build_graph(nodes_attributes, common_edges + [('pad_fill', 'convert_like', {'in': 0, 'out': 0}),
-                                                                  ('placeholder', 'convert_like', {'in': 1, 'out': 0}),
-                                                                  ('convert_like', 'pad', {'in': 3, 'out': 0})],
+        graph_ref = build_graph(nodes_attributes, common_edges,
                                 {}, nodes_with_edges_only=True)
         self._run_test(graph, graph_ref)
 
diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
index 1a94414055bf99..48bc37fa38c0fa 100644
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
@@ -30,7 +30,7 @@ source_group("src" FILES ${PYBIND_FE_SRC})
 
 pybind11_add_module(${PYBIND_FE_NAME} MODULE ${PYBIND_FE_SRC})
 
-target_link_libraries(${PYBIND_FE_NAME} PRIVATE ngraph::ngraph ngraph::frontend_manager)
+target_link_libraries(${PYBIND_FE_NAME} PRIVATE ngraph::ngraph ngraph::frontend_manager::static)
 target_link_libraries(${PYBIND_FE_NAME} PRIVATE ${TARGET_FE_NAME})
 
 add_dependencies(${PYBIND_FE_NAME} ${TARGET_FE_NAME})
diff --git a/ngraph/core/builder/include/ngraph/builder/norm.hpp b/ngraph/core/builder/include/ngraph/builder/norm.hpp
index a4fd9e9e931de6..b687418159baa9 100644
--- a/ngraph/core/builder/include/ngraph/builder/norm.hpp
+++ b/ngraph/core/builder/include/ngraph/builder/norm.hpp
@@ -32,11 +32,13 @@ namespace ngraph
             ///
             /// \param[in]  value           The input tensor.
             /// \param[in]  reduction_axes  The axes along which we calculate norm.
+            /// \param[in]  keep_dims       The flag indicates if axes will be removed or kept.
             ///
             /// \return     L-0 norm of value. The output sub-graph is composed of v1 ops.
             ///
             std::shared_ptr<Node> l0_norm(const Output<Node>& value,
-                                          const Output<Node>& reduction_axes);
+                                          const Output<Node>& reduction_axes,
+                                          bool keep_dims = false);
 
             /// \brief      Calculates L-1 norm of a value.
             ///
@@ -45,12 +47,14 @@ namespace ngraph
             /// \param[in]  value           The input tensor.
             /// \param[in]  reduction_axes  The axes along which we calculate norm.
             /// \param[in]  bias            The bias added to the calculated sum.
+            /// \param[in]  keep_dims       The flag indicates if axes will be removed or kept.
             ///
             /// \return     L-1 norm of value. The output sub-graph is composed of v1 ops.
             ///
             std::shared_ptr<Node> l1_norm(const Output<Node>& value,
                                           const Output<Node>& reduction_axes,
-                                          float bias = 0.f);
+                                          float bias = 0.f,
+                                          bool keep_dims = false);
 
             /// \brief      Calculates L-2 norm of input tensor.
             ///
@@ -77,13 +81,15 @@ namespace ngraph
             /// \param[in]  reduction_axes  The axes along which we calculate norm.
             /// \param[in]  p_norm          The p norm to calculate.
             /// \param[in]  bias            The bias added to the calculated sum.
+            /// \param[in]  keep_dims       The flag indicates if axes will be removed or kept.
             ///
             /// \return     L-p norm of value. The output sub-graph is composed of v1 ops.
             ///
             std::shared_ptr<Node> lp_norm(const Output<Node>& value,
                                           const Output<Node>& reduction_axes,
                                           std::size_t p_norm = 2,
-                                          float bias = 0.f);
+                                          float bias = 0.f,
+                                          bool keep_dims = false);
         } // namespace opset1
     }     // namespace builder
 } // namespace ngraph
diff --git a/ngraph/core/builder/src/builder/norm.cpp b/ngraph/core/builder/src/builder/norm.cpp
index 48ea9b5f2c53e5..db5437f082d42f 100644
--- a/ngraph/core/builder/src/builder/norm.cpp
+++ b/ngraph/core/builder/src/builder/norm.cpp
@@ -29,7 +29,8 @@ namespace ngraph
                 shared_ptr<Node> lp_norm(const Output<Node>& value,
                                          size_t p_norm,
                                          const Output<Node>& reduction_axes,
-                                         float bias)
+                                         float bias,
+                                         bool keep_dims)
                 {
                     // In general "entrywise" lp-norm for matrix `A` is defined as following double
                     // sum:
@@ -40,7 +41,8 @@ namespace ngraph
 
                     // Get inner part of equation: abs_values^p_node, then sum over reduction_axes.
                     shared_ptr<Node> values{make_shared<ngraph::opset1::Power>(abs_values, p_node)};
-                    values = make_shared<ngraph::opset1::ReduceSum>(values, reduction_axes, false);
+                    values =
+                        make_shared<ngraph::opset1::ReduceSum>(values, reduction_axes, keep_dims);
 
                     shared_ptr<Node> bias_node{ngraph::opset1::Constant::create(
                         values->get_element_type(), Shape{}, {bias})};
@@ -58,7 +60,8 @@ namespace ngraph
         }     // namespace detail
 
         shared_ptr<Node> builder::opset1::l0_norm(const Output<Node>& value,
-                                                  const Output<Node>& reduction_axes)
+                                                  const Output<Node>& reduction_axes,
+                                                  bool keep_dims)
         {
             // L0 norm returns number of elements different from zero.
             const shared_ptr<Node> zero_node{
@@ -68,16 +71,18 @@ namespace ngraph
             const shared_ptr<Node> non_zero_values = make_shared<ngraph::opset1::Convert>(
                 make_shared<ngraph::opset1::NotEqual>(value, zero_node), value.get_element_type());
 
-            return make_shared<ngraph::opset1::ReduceSum>(non_zero_values, reduction_axes, false)
+            return make_shared<ngraph::opset1::ReduceSum>(
+                       non_zero_values, reduction_axes, keep_dims)
                 ->add_provenance_group_members_above({value});
         }
 
         shared_ptr<Node> builder::opset1::l1_norm(const Output<Node>& value,
                                                   const Output<Node>& reduction_axes,
-                                                  float bias)
+                                                  float bias,
+                                                  bool keep_dims)
         {
             const shared_ptr<Node> values{make_shared<ngraph::opset1::ReduceSum>(
-                make_shared<ngraph::opset1::Abs>(value), reduction_axes, false)};
+                make_shared<ngraph::opset1::Abs>(value), reduction_axes, keep_dims)};
 
             const shared_ptr<Node> bias_node{
                 ngraph::opset1::Constant::create(values->get_element_type(), Shape{}, {bias})};
@@ -92,8 +97,10 @@ namespace ngraph
                                                   BiasMode bias_mode,
                                                   bool keep_dims)
         {
-            shared_ptr<Node> values{make_shared<ngraph::opset1::ReduceSum>(
-                make_shared<ngraph::opset1::Multiply>(value, value), reduction_axes, keep_dims)};
+            shared_ptr<Node> pow = make_shared<ngraph::opset1::Power>(
+                value, make_shared<ngraph::opset1::Constant>(value.get_element_type(), Shape{}, 2));
+            shared_ptr<Node> values{
+                make_shared<ngraph::opset1::ReduceSum>(pow, reduction_axes, keep_dims)};
 
             shared_ptr<Node> bias_node{
                 ngraph::opset1::Constant::create(values->get_element_type(), Shape{}, {bias})};
@@ -117,27 +124,28 @@ namespace ngraph
         shared_ptr<Node> builder::opset1::lp_norm(const Output<Node>& value,
                                                   const Output<Node>& reduction_axes,
                                                   size_t p_norm,
-                                                  float bias)
+                                                  float bias,
+                                                  bool keep_dims)
         {
             // The number of non-zero elements
             if (p_norm == 0)
             {
-                return opset1::l0_norm(value, reduction_axes);
+                return opset1::l0_norm(value, reduction_axes, keep_dims);
             }
             //  sum of absolute values.
             else if (p_norm == 1)
             {
-                return opset1::l1_norm(value, reduction_axes, bias);
+                return opset1::l1_norm(value, reduction_axes, bias, keep_dims);
             }
             // sqrt of sum of squares - Euclidean norm
             else if (p_norm == 2)
             {
-                return opset1::l2_norm(value, reduction_axes, bias);
+                return opset1::l2_norm(value, reduction_axes, bias, BiasMode::ADD, keep_dims);
             }
             // generic case
             else
             {
-                return detail::opset1::lp_norm(value, p_norm, reduction_axes, bias);
+                return detail::opset1::lp_norm(value, p_norm, reduction_axes, bias, keep_dims);
             }
         }
 
diff --git a/ngraph/frontend/frontend_manager/CMakeLists.txt b/ngraph/frontend/frontend_manager/CMakeLists.txt
index d36cf122554074..83069aa16752d0 100644
--- a/ngraph/frontend/frontend_manager/CMakeLists.txt
+++ b/ngraph/frontend/frontend_manager/CMakeLists.txt
@@ -14,11 +14,23 @@ source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})
 source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS})
 
-# Create shared library
+# Static library
+
+add_library(${TARGET_NAME}_static STATIC ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS})
+add_library(ngraph::${TARGET_NAME}::static ALIAS ${TARGET_NAME}_static)
+target_link_libraries(${TARGET_NAME}_static PRIVATE ${CMAKE_DL_LIBS} PUBLIC ngraph)
+target_include_directories(${TARGET_NAME}_static PUBLIC ${FRONTEND_INCLUDE_DIR})
+target_include_directories(${TARGET_NAME}_static PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+target_compile_definitions(${TARGET_NAME}_static PUBLIC USE_STATIC_FRONTEND_MANAGER)
+
+
+# Shared library - need to recompile object files to export necessary symbols
 
 add_library(${TARGET_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS})
 add_library(ngraph::${TARGET_NAME} ALIAS ${TARGET_NAME})
-
+target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${FRONTEND_INCLUDE_DIR}>
+        $<INSTALL_INTERFACE:${FRONTEND_INSTALL_INCLUDE}>)
+target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 target_link_libraries(${TARGET_NAME} PRIVATE ${CMAKE_DL_LIBS} PUBLIC ngraph)
 
 add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
@@ -28,12 +40,7 @@ if(COMMAND ie_add_vs_version_file)
                            FILEDESCRIPTION "Manager of OpenVINO nGraph Frontends")
 endif()
 
-target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${FRONTEND_INCLUDE_DIR}>
-                                                 $<INSTALL_INTERFACE:${FRONTEND_INSTALL_INCLUDE}>)
-
-target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-
-# Installation rules
+# Installation rules for shared version only
 
 install(TARGETS ${TARGET_NAME} EXPORT ngraphTargets
         RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager_defs.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager_defs.hpp
index f7c1f3de86419d..e621c3db7b44d6 100644
--- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager_defs.hpp
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager_defs.hpp
@@ -9,9 +9,13 @@
 // Increment each time when FrontEnd/InputModel/Place interface is changed
 #define OV_FRONTEND_API_VERSION 1
 
+#ifdef USE_STATIC_FRONTEND_MANAGER
+#define FRONTEND_API
+#else
 // Defined if cmake is building the frontend_manager DLL (instead of using it)
 #ifdef frontend_manager_EXPORTS
 #define FRONTEND_API NGRAPH_HELPER_DLL_EXPORT
 #else
 #define FRONTEND_API NGRAPH_HELPER_DLL_IMPORT
 #endif // frontend_manager_EXPORTS
+#endif // USE_STATIC_FRONTEND_MANAGER
\ No newline at end of file
diff --git a/ngraph/frontend/onnx/onnx_import/src/op/lp_norm.cpp b/ngraph/frontend/onnx/onnx_import/src/op/lp_norm.cpp
index b8efda67bfa0d5..3a19d68685be26 100644
--- a/ngraph/frontend/onnx/onnx_import/src/op/lp_norm.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/op/lp_norm.cpp
@@ -30,7 +30,6 @@ namespace ngraph
                     const auto data_shape = data.get_partial_shape();
                     const auto data_rank = data_shape.rank();
 
-                    const auto data_rank_value = data_rank.get_length();
                     const std::int64_t p_norm{node.get_attribute_value<std::int64_t>("p", 2)};
 
                     const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
@@ -46,23 +45,7 @@ namespace ngraph
                     const auto normalize_axis_const =
                         default_opset::Constant::create(element::i64, {}, {normalize_axis});
                     std::shared_ptr<ngraph::Node> norm = ngraph::builder::opset1::lp_norm(
-                        data, normalize_axis_const, static_cast<std::size_t>(p_norm));
-
-                    const auto target_shape = std::make_shared<default_opset::ShapeOf>(data);
-
-                    // Create a default axes order matching the data tensor rank and erase the
-                    // element at the 'normalize_axis' position. The erased element indicates the
-                    // axis
-                    // along which the data should be broadcasted.
-                    std::vector<size_t> axes_values(data_rank_value);
-                    std::iota(axes_values.begin(), axes_values.end(), 0);
-                    axes_values.erase(axes_values.begin() + normalize_axis);
-
-                    const auto axes_mapping = default_opset::Constant::create(
-                        element::i64, Shape{axes_values.size()}, axes_values);
-
-                    norm = std::make_shared<default_opset::Broadcast>(
-                        norm, target_shape, axes_mapping);
+                        data, normalize_axis_const, static_cast<std::size_t>(p_norm), 0.0f, true);
 
                     return {std::make_shared<default_opset::Divide>(data, norm)};
                 }
diff --git a/ngraph/frontend/paddlepaddle/CMakeLists.txt b/ngraph/frontend/paddlepaddle/CMakeLists.txt
index a14011039a26c1..cc1a4eba9cea4e 100644
--- a/ngraph/frontend/paddlepaddle/CMakeLists.txt
+++ b/ngraph/frontend/paddlepaddle/CMakeLists.txt
@@ -70,7 +70,7 @@ endif()
 
 link_system_libraries(${TARGET_NAME} PRIVATE ${Protobuf_LIBRARIES})
 
-target_link_libraries(${TARGET_NAME} PUBLIC frontend_manager
+target_link_libraries(${TARGET_NAME} PRIVATE ngraph::frontend_manager::static
                                      PRIVATE ngraph::builder)
 
 add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
diff --git a/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt b/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
index cbae0eafd0659d..245947d4cac5ed 100644
--- a/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
+++ b/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
@@ -15,6 +15,6 @@ add_library(${TARGET_FE_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS})
 
 target_include_directories(${TARGET_FE_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 
-target_link_libraries(${TARGET_FE_NAME} PUBLIC ngraph::frontend_manager)
+target_link_libraries(${TARGET_FE_NAME} PRIVATE ngraph::frontend_manager::static)
 
 add_clang_format_target(${TARGET_FE_NAME}_clang FOR_TARGETS ${TARGET_FE_NAME})
diff --git a/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt b/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
index a371a491dc2b0e..c8300df3d8797e 100644
--- a/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
+++ b/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
@@ -11,6 +11,6 @@ source_group("src" FILES ${PYBIND_FE_SRC})
 
 pybind11_add_module(${PYBIND_FE_NAME} MODULE ${PYBIND_FE_SRC})
 
-target_link_libraries(${PYBIND_FE_NAME} PRIVATE ${TARGET_FE_NAME})
+target_link_libraries(${PYBIND_FE_NAME} PRIVATE ${TARGET_FE_NAME} ngraph::frontend_manager::static)
 
 add_clang_format_target(${PYBIND_FE_NAME}_clang FOR_TARGETS ${PYBIND_FE_NAME})
diff --git a/scripts/setupvars/setupvars.bat b/scripts/setupvars/setupvars.bat
index f58bc1bd2cb6a1..c7920d3fa332eb 100644
--- a/scripts/setupvars/setupvars.bat
+++ b/scripts/setupvars/setupvars.bat
@@ -68,6 +68,7 @@ set "TBB_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb
 if exist %INTEL_OPENVINO_DIR%\deployment_tools\ngraph (
 set "OPENVINO_LIB_PATHS=%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib;%OPENVINO_LIB_PATHS%"
 set "ngraph_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake"
+set "OV_FRONTEND_PATH=%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib;%OV_FRONTEND_PATH%"
 )
 
 :: Compile tool
diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh
index 880ec2d0633590..199ef0edd9355f 100755
--- a/scripts/setupvars/setupvars.sh
+++ b/scripts/setupvars/setupvars.sh
@@ -68,6 +68,7 @@ fi
 if [ -e "$INSTALLDIR/deployment_tools/ngraph" ]; then
     export LD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/ngraph/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
     export ngraph_DIR=$INSTALLDIR/deployment_tools/ngraph/cmake
+    export OV_FRONTEND_PATH=$INSTALLDIR/deployment_tools/ngraph/lib${OV_FRONTEND_PATH:+:$OV_FRONTEND_PATH}
 fi
 
 if [ -e "$INSTALLDIR/opencv" ]; then
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index a65fdb2e56680d..d35947dca9e38b 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -158,7 +158,7 @@ if(NGRAPH_PDPD_FRONTEND_ENABLE OR NGRAPH_ONNX_IMPORT_ENABLE)
         if(NGRAPH_USE_SYSTEM_PROTOBUF)
             set(link_type INTERFACE)
         endif()
-        if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
+        if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
             target_compile_options(${target} ${link_type} -Wno-undef)
         endif()
     endforeach()
diff --git a/thirdparty/protobuf/CMakeLists.txt b/thirdparty/protobuf/CMakeLists.txt
index c4d6fe26bc2af6..f3cec9914a16f5 100644
--- a/thirdparty/protobuf/CMakeLists.txt
+++ b/thirdparty/protobuf/CMakeLists.txt
@@ -63,6 +63,8 @@ if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
     if(TARGET libprotoc)
         list(APPEND _proto_libs libprotoc)
         target_compile_options(libprotoc PRIVATE -Wno-all -Wno-unused-variable)
+        # libprotobuf is always built for protoc
+        target_compile_options(libprotobuf PUBLIC -Wno-undef)
     endif()
     set_target_properties(${_proto_libs} PROPERTIES
         CXX_VISIBILITY_PRESET default